PyPI - disdrodb - Versions diffs - 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

disdrodb 0.1.4py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

disdrodb/__init__.py +1 -5
disdrodb/_version.py +2 -2
disdrodb/accessor/methods.py +14 -3
disdrodb/api/checks.py +10 -0
disdrodb/api/create_directories.py +0 -2
disdrodb/api/io.py +14 -17
disdrodb/api/path.py +42 -77
disdrodb/api/search.py +89 -23
disdrodb/cli/disdrodb_create_summary.py +11 -1
disdrodb/cli/disdrodb_create_summary_station.py +10 -0
disdrodb/cli/disdrodb_run_l0.py +1 -1
disdrodb/cli/disdrodb_run_l0a.py +1 -1
disdrodb/cli/disdrodb_run_l0b.py +1 -1
disdrodb/cli/disdrodb_run_l0c.py +1 -1
disdrodb/cli/disdrodb_run_l1.py +1 -1
disdrodb/cli/disdrodb_run_l2e.py +1 -1
disdrodb/cli/disdrodb_run_l2m.py +1 -1
disdrodb/configs.py +30 -83
disdrodb/constants.py +4 -3
disdrodb/data_transfer/download_data.py +4 -2
disdrodb/docs.py +2 -2
disdrodb/etc/products/L1/1MIN.yaml +13 -0
disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
disdrodb/etc/products/L1/global.yaml +7 -1
disdrodb/etc/products/L2E/10MIN.yaml +1 -12
disdrodb/etc/products/L2E/5MIN.yaml +1 -0
disdrodb/etc/products/L2E/global.yaml +1 -1
disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
disdrodb/etc/products/L2M/global.yaml +11 -3
disdrodb/l0/check_configs.py +49 -16
disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
disdrodb/l0/l0_reader.py +2 -2
disdrodb/l0/l0b_processing.py +70 -15
disdrodb/l0/l0c_processing.py +7 -3
disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
disdrodb/l1/beard_model.py +31 -129
disdrodb/l1/fall_velocity.py +136 -83
disdrodb/l1/filters.py +25 -28
disdrodb/l1/processing.py +16 -17
disdrodb/l1/resampling.py +101 -38
disdrodb/l1_env/routines.py +46 -17
disdrodb/l2/empirical_dsd.py +6 -0
disdrodb/l2/processing.py +6 -5
disdrodb/metadata/geolocation.py +0 -2
disdrodb/metadata/search.py +3 -4
disdrodb/psd/fitting.py +16 -13
disdrodb/routines/l0.py +2 -2
disdrodb/routines/l1.py +173 -60
disdrodb/routines/l2.py +148 -284
disdrodb/routines/options.py +345 -0
disdrodb/routines/wrappers.py +14 -1
disdrodb/scattering/axis_ratio.py +90 -84
disdrodb/scattering/permittivity.py +6 -0
disdrodb/summary/routines.py +735 -670
disdrodb/utils/archiving.py +51 -44
disdrodb/utils/attrs.py +3 -1
disdrodb/utils/dask.py +4 -4
disdrodb/utils/dict.py +33 -0
disdrodb/utils/encoding.py +6 -1
disdrodb/utils/routines.py +9 -8
disdrodb/utils/time.py +11 -3
disdrodb/viz/__init__.py +0 -13
disdrodb/viz/plots.py +231 -1
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
/disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
/disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
/disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
/disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
/disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
/disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0

disdrodb/l0/configs/SWS250/raw_data_format.yml ADDED Viewed

@@ -0,0 +1,148 @@
+precipitation_rate:
+  n_digits: 6
+  n_characters: 7
+  n_decimals: 3
+  n_naturals: 3
+  data_range:
+    - 0
+    - 9999.999
+  nan_flags: null
+precipitation_accumulated:
+  n_digits: 6
+  n_characters: 7
+  n_decimals: 2
+  n_naturals: 4
+  data_range:
+    - 0
+    - 9999.0
+  nan_flags: null
+weather_code_synop_4680:
+  n_digits: 2
+  n_characters: 2
+  n_decimals: 0
+  n_naturals: 2
+  data_range:
+    - 0
+    - 89
+  nan_flags: null
+weather_code_metar_4678:
+  n_digits: null
+  n_characters: null
+  n_decimals: null
+  n_naturals: null
+  data_range: null
+  nan_flags: null
+past_weather1:
+  n_digits: null
+  n_characters: null
+  n_decimals: null
+  n_naturals: null
+  data_range: null
+  nan_flags: null
+past_weather2:
+  n_digits: null
+  n_characters: null
+  n_decimals: null
+  n_naturals: null
+  data_range: null
+  nan_flags: null
+mor_visibility_5min:
+  n_digits: null
+  n_characters: null
+  n_decimals: null
+  n_naturals: null
+  data_range: null
+  nan_flags: null
+mor_visibility:
+  n_digits: null
+  n_characters: null
+  n_decimals: null
+  n_naturals: null
+  data_range: null
+  nan_flags: null
+number_particles:
+  n_digits: 4
+  n_characters: 4
+  n_decimals: 0
+  n_naturals: 4
+  data_range:
+    - 0
+    - 9999
+  nan_flags: null
+sensor_temperature:
+  n_digits: 4
+  n_characters: 6
+  n_decimals: 1
+  n_naturals: 3
+  data_range:
+    - -99
+    - 100
+  nan_flags: null
+obstruction_status:
+  n_digits: null
+  n_characters: null
+  n_decimals: null
+  n_naturals: null
+  data_range: null
+  nan_flags: null
+total_extinction_coefficient:
+  n_digits: 5
+  n_characters: 6
+  n_decimals: 2
+  n_naturals: 3
+  data_range:
+    - 0
+    - 999.99
+  nan_flags: null
+transmissometer_extinction_coefficient:
+  n_digits: 5
+  n_characters: 6
+  n_decimals: 2
+  n_naturals: 3
+  data_range:
+    - 0
+    - 999.99
+  nan_flags: null
+back_scatter_extinction_coefficient:
+  n_digits: 5
+  n_characters: 7
+  n_decimals: 2
+  n_naturals: 3
+  data_range:
+    - -999.99
+    - 999.99
+  nan_flags: null
+ambient_light_sensor_signal:
+  n_digits: 5
+  n_characters: 5
+  n_decimals: 0
+  n_naturals: 5
+  data_range:
+    - 0
+    - 99998
+  nan_flags: 99999
+sensor_status:
+  n_digits: null
+  n_characters: null
+  n_decimals: null
+  n_naturals: null
+  data_range: null
+  nan_flags: null
+ambient_light_sensor_signal_status:
+  n_digits: null
+  n_characters: null
+  n_decimals: null
+  n_naturals: null
+  data_range: null
+  nan_flags: null
+raw_drop_number:
+  n_digits: 0
+  n_characters: 4096
+  n_decimals: 0
+  n_naturals: 0
+  data_range: null
+  nan_flags: null
+  dimension_order:
+    - velocity_bin_center
+    - diameter_bin_center
+  n_values: 336

disdrodb/l0/l0_reader.py CHANGED Viewed

@@ -35,9 +35,9 @@ logger = logging.getLogger(__name__)
 def define_readers_directory(sensor_name="") -> str:
     """Returns the path to the ``disdrodb.l0.readers`` directory within the disdrodb package."""
-    from disdrodb import __root_path__
+    from disdrodb import package_dir
-    reader_dir = os.path.join(__root_path__, "disdrodb", "l0", "readers", sensor_name)
+    reader_dir = os.path.join(package_dir, "l0", "readers", sensor_name)
     return reader_dir

disdrodb/l0/l0b_processing.py CHANGED Viewed

@@ -80,15 +80,16 @@ def infer_split_str(string: str) -> str:
     return split_str
-def _replace_empty_strings_with_zeros(values):
+def replace_empty_strings_with_zeros(values):
+    """Replace empty comma separated strings with '0'."""
     values[np.char.str_len(values) == 0] = "0"
     return values
-def _format_string_array(string: str, n_values: int) -> np.array:
+def format_string_array(string: str, n_values: int) -> np.array:
     """Split a string with multiple numbers separated by a delimiter into an 1D array.
-        e.g. : _format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
+        e.g. : format_string_array("2,44,22,33", 4) will return [ 2. 44. 22. 33.]
     If empty string ("") --> Return an arrays of zeros
     If the list length is not n_values -> Return an arrays of np.nan
@@ -126,7 +127,7 @@ def _format_string_array(string: str, n_values: int) -> np.array:
         # Ensure string type
         values = values.astype("str")
         # Replace '' with 0
-        values = _replace_empty_strings_with_zeros(values)
+        values = replace_empty_strings_with_zeros(values)
         # Replace "-9.999" with 0
         values = np.char.replace(values, "-9.999", "0")
         # Cast values to float type
@@ -135,7 +136,7 @@ def _format_string_array(string: str, n_values: int) -> np.array:
     return values
-def _reshape_raw_spectrum(
+def reshape_raw_spectrum(
     arr: np.array,
     dims_order: list,
     dims_size_dict: dict,
@@ -243,17 +244,17 @@ def retrieve_l0b_arrays(
         # Ensure is a string, get a numpy array for each row and then stack
         # - Option 1: Clear but lot of copies
         # df_series = df[key].astype(str)
-        # list_arr = df_series.apply(_format_string_array, n_values=n_values)
+        # list_arr = df_series.apply(format_string_array, n_values=n_values)
         # arr = np.stack(list_arr, axis=0)
         # - Option 2: still copies
-        # arr = np.vstack(_format_string_array(s, n_values=n_values) for s in df_series.astype(str))
+        # arr = np.vstack(format_string_array(s, n_values=n_values) for s in df_series.astype(str))
         # - Option 3: more memory efficient
         n_timesteps = len(df[key])
         arr = np.empty((n_timesteps, n_values), dtype=float)  # preallocates
         for i, s in enumerate(df[key].astype(str)):
-            arr[i, :] = _format_string_array(s, n_values=n_values)
+            arr[i, :] = format_string_array(s, n_values=n_values)
         # Retrieve dimensions
         dims_order = dims_order_dict[key]
@@ -263,7 +264,7 @@ def retrieve_l0b_arrays(
         # - This applies i.e for PARSIVEL*, LPM, PWS100
         # - This does not apply to RD80
         if key == "raw_drop_number" and len(dims_order) == 2:
-            arr, dims = _reshape_raw_spectrum(
+            arr, dims = reshape_raw_spectrum(
                 arr=arr,
                 dims_order=dims_order,
                 dims_size_dict=dims_size_dict,
@@ -288,7 +289,57 @@ def retrieve_l0b_arrays(
 #### L0B Coords and attributes
-def _convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
+def ensure_valid_geolocation(ds: xr.Dataset, coord: str, errors: str = "ignore") -> xr.Dataset:
+    """Ensure valid geolocation coordinates.
+    'altitude' must be >= 0, 'latitude' must be within [-90, 90] and
+    'longitude' within [-180, 180].
+    It can deal with coordinates varying with time.
+    Parameters
+    ----------
+    ds : xarray.Dataset
+        Dataset containing the coordinate.
+    coord : str
+        Name of the coordinate variable to validate.
+    errors : {"ignore", "raise", "coerce"}, default "ignore"
+        - "ignore": nothing is done.
+        - "raise" : raise ValueError if invalid values are found.
+        - "coerce": out-of-range values are replaced with NaN.
+    Returns
+    -------
+    xr.Dataset
+        Dataset with validated coordinate values.
+    """
+    # Define coordinates ranges
+    ranges = {
+        "altitude": (0, np.inf),
+        "latitude": (-90, 90),
+        "longitude": (-180, 180),  # used only for "raise"/"coerce"
+    }
+    # Check coordinate is available and correctly defined.
+    if coord not in ds:
+        raise ValueError(f"Coordinate '{coord}' not found in dataset.")
+    if coord not in list(ranges):
+        raise ValueError(f"Valid geolocation coordinates are: {list(ranges)}.")
+    # Validate coordinate
+    vmin, vmax = ranges[coord]
+    invalid = (ds[coord] < vmin) | (ds[coord] > vmax)
+    invalid = invalid.compute()
+    # Deal within invalid errors
+    if errors == "raise" and invalid.any():
+        raise ValueError(f"{coord} out of range {vmin}-{vmax}.")
+    if errors == "coerce":
+        ds[coord] = ds[coord].where(~invalid)
+    return ds
+def convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
     """Convert variables with ``object`` dtype to ``string``.
     Parameters
@@ -307,7 +358,7 @@ def _convert_object_variables_to_string(ds: xr.Dataset) -> xr.Dataset:
     return ds
-def _set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
+def set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
     """Set attributes to each ``xr.Dataset`` variable.
     Parameters
@@ -353,7 +404,7 @@ def add_dataset_crs_coords(ds):
 def _define_dataset_variables(df, sensor_name, logger=None, verbose=False):
-    """Define DISDRODB L0B netCDF variables."""
+    """Define DISDRODB L0B netCDF array variables."""
     # Preprocess raw_spectrum, diameter and velocity arrays if available
     raw_fields = ["raw_drop_concentration", "raw_drop_average_velocity", "raw_drop_number"]
     if np.any(np.isin(raw_fields, df.columns)):
@@ -436,7 +487,7 @@ def set_geolocation_coordinates(ds, metadata):
         # If coordinate not present, add it from dictionary
         if coord not in ds:
             ds = ds.assign_coords({coord: metadata.pop(coord, np.nan)})
-        # Else if set coordinates the variable in the dataset (present in the raw data)
+        # Else ensure coord is a dataset coordinates
         else:
             ds = ds.set_coords(coord)
             _ = metadata.pop(coord, None)
@@ -445,6 +496,10 @@ def set_geolocation_coordinates(ds, metadata):
     for coord in coords:
         ds[coord] = xr.where(ds[coord] == -9999, np.nan, ds[coord])
+    # Ensure valid geolocation coordinates
+    for coord in coords:
+        ds = ensure_valid_geolocation(ds=ds, coord=coord, errors="coerce")
     # Set attributes without geolocation coordinates
     ds.attrs = metadata
     return ds
@@ -469,11 +524,11 @@ def finalize_dataset(ds, sensor_name, metadata):
     ds = ds.transpose("time", "diameter_bin_center", ...)
     # Ensure variables with dtype object are converted to string
-    ds = _convert_object_variables_to_string(ds)
+    ds = convert_object_variables_to_string(ds)
     # Add netCDF variable and coordinate attributes
     # - Add variable attributes: long_name, units, descriptions, valid_min, valid_max
-    ds = _set_variable_attributes(ds=ds, sensor_name=sensor_name)
+    ds = set_variable_attributes(ds=ds, sensor_name=sensor_name)
     # - Add netCDF coordinate attributes
     ds = set_coordinate_attributes(ds=ds)
     #  - Set DISDRODB global attributes

disdrodb/l0/l0c_processing.py CHANGED Viewed

@@ -117,7 +117,12 @@ def split_dataset_by_sampling_intervals(
     # If sample_interval is a dataset variable, use it to define dictionary of datasets
     if "sample_interval" in ds:
-        return {int(interval): ds.isel(time=ds["sample_interval"] == interval) for interval in measurement_intervals}
+        dict_ds = {}
+        for interval in measurement_intervals:
+            ds_subset = ds.isel(time=ds["sample_interval"] == interval)
+            if ds_subset.sizes["time"] > 2:
+                dict_ds[int(interval)] = ds_subset
+        return dict_ds
     # ---------------------------------------------------------------------------------------.
     # Otherwise exploit difference between timesteps to identify change point
@@ -460,9 +465,8 @@ def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=Tru
         # if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
         #     qc_flag[-1] = 0
-        # Assign time quality flag coordinate
+        # Add time quality flag variable
         ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
-        ds = ds.set_coords("time_qc")
         # Add CF attributes for time_qc
         ds["time_qc"].attrs = {

disdrodb/l0/readers/LPM/ARM/ARM_LPM.py CHANGED Viewed

@@ -69,7 +69,7 @@ def reader(
         "quality_measurement": "quality_index",
         "max_diameter_hail": "max_hail_diameter",
         "laser_status": "laser_status",
-        "static_signal": "static_signal",
+        "static_signal_status": "static_signal_status",
         "interior_temperature": "temperature_interior",
         "laser_temperature": "laser_temperature",
         "laser_temperature_analog_status": "laser_temperature_analog_status",

disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py CHANGED Viewed

@@ -137,7 +137,7 @@ def reader(
         "quality_index",
         "max_hail_diameter",
         "laser_status",
-        "static_signal",
+        "static_signal_status",
         "laser_temperature_analog_status",
         "laser_temperature_digital_status",
         "laser_current_analog_status",
@@ -151,7 +151,7 @@ def reader(
         "current_heating_heads_status",
         "current_heating_carriers_status",
         "control_output_laser_power_status",
-        "reserve_status",
+        "reserved_status",
         "temperature_interior",
         "laser_temperature",
         "laser_current_average",

disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py ADDED Viewed

@@ -0,0 +1,256 @@
+#!/usr/bin/env python3
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------.
+"""DISDRODB reader for ULIEGE LPM stations."""
+import numpy as np
+import pandas as pd
+from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
+from disdrodb.l0.l0a_processing import read_raw_text_file
+from disdrodb.utils.logger import log_error, log_warning
+def read_txt_file(file, filename, logger):
+    """Parse ULIEGE LPM hourly file."""
+    #### - Define raw data headers
+    column_names = ["TO_PARSE"]
+    ##------------------------------------------------------------------------.
+    #### Define reader options
+    # - For more info: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
+    reader_kwargs = {}
+    # - Define delimiter
+    reader_kwargs["delimiter"] = "\\n"
+    # - Avoid first column to become df index !!!
+    reader_kwargs["index_col"] = False
+    # Since column names are expected to be passed explicitly, header is set to None
+    reader_kwargs["header"] = None
+    # - Number of rows to be skipped at the beginning of the file
+    reader_kwargs["skiprows"] = None
+    # - Define behaviour when encountering bad lines
+    reader_kwargs["on_bad_lines"] = "skip"
+    # - Define reader engine
+    #   - C engine is faster
+    #   - Python engine is more feature-complete
+    reader_kwargs["engine"] = "python"
+    # - Define on-the-fly decompression of on-disk data
+    #   - Available: gzip, bz2, zip
+    reader_kwargs["compression"] = "infer"
+    # - Strings to recognize as NA/NaN and replace with standard NA flags
+    #   - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
+    #                       '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
+    #                       'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
+    reader_kwargs["na_values"] = ["na", "", "error"]
+    ##------------------------------------------------------------------------.
+    #### Read the data
+    df = read_raw_text_file(
+        filepath=file,
+        column_names=column_names,
+        reader_kwargs=reader_kwargs,
+        logger=logger,
+    )
+    ##------------------------------------------------------------------------.
+    #### Adapt the dataframe to adhere to DISDRODB L0 standards
+    # Count number of delimiters to identify valid rows
+    df = df[df["TO_PARSE"].str.count(";") == 442]
+    # Check there are still valid rows
+    if len(df) == 0:
+        raise ValueError(f"No valid rows in {filename}.")
+    # Split by ; delimiter (before raw drop number)
+    df = df["TO_PARSE"].str.split(";", expand=True, n=43)
+    # Assign column names
+    column_names = [
+        "id",
+        "sample_interval",
+        "weather_code_synop_4677_5min",  # or  "weather_code_synop_4680_5min",
+        "weather_code_metar_4678_5min",
+        "precipitation_rate_5min",
+        "weather_code_synop_4677",  # or "weather_code_synop_4680",
+        "weather_code_metar_4678",
+        "precipitation_rate",
+        "precipitation_accumulated",
+        "sensor_time",
+        # "mor_visibility",
+        # "reflectivity",
+        # "quality_index",
+        # "max_hail_diameter",
+        # "laser_status",
+        "dummy1",
+        "dummy2",
+        # "laser_temperature",
+        "laser_current_average",
+        "control_voltage",
+        "optical_control_voltage_output",
+        # "current_heating_house",
+        # "current_heating_heads",
+        # "current_heating_carriers",
+        "number_particles",
+        "number_particles_internal_data",
+        "number_particles_min_speed",
+        "number_particles_min_speed_internal_data",
+        "number_particles_max_speed",
+        "number_particles_max_speed_internal_data",
+        "number_particles_min_diameter",
+        "number_particles_min_diameter_internal_data",
+        "number_particles_no_hydrometeor",
+        "number_particles_no_hydrometeor_internal_data",
+        # "number_particles_unknown_classification",  # ????
+        # "number_particles_unknown_classification_internal_data",
+        "number_particles_class_1",
+        "number_particles_class_1_internal_data",
+        "number_particles_class_2",
+        "number_particles_class_2_internal_data",
+        "number_particles_class_3",
+        "number_particles_class_3_internal_data",
+        "number_particles_class_4",
+        "number_particles_class_4_internal_data",
+        "number_particles_class_5",
+        "number_particles_class_5_internal_data",
+        "number_particles_class_6",
+        "number_particles_class_6_internal_data",
+        "number_particles_class_7",
+        "number_particles_class_7_internal_data",
+        "number_particles_class_8",
+        "number_particles_class_8_internal_data",
+        "number_particles_class_9",
+        "number_particles_class_9_internal_data",
+        "raw_drop_number",
+    ]
+    df.columns = column_names
+    # Deal with case if there are 61 timesteps
+    # - Occurs sometimes when previous hourly file miss timesteps
+    if len(df) == 61:
+        log_warning(logger=logger, msg=f"{filename} contains 61 timesteps. Dropping the first.")
+        df = df.iloc[1:]
+    # Raise error if more than 60 timesteps/rows
+    n_rows = len(df)
+    if n_rows > 60:
+        raise ValueError(f"The hourly file contains {n_rows} timesteps.")
+    # Infer and define "time" column
+    start_time_str = filename.split(".")[0]  # '2024020200.txt'
+    start_time = pd.to_datetime(start_time_str, format="%Y%m%d%H")
+    # - Define timedelta based on sensor_time
+    dt = pd.to_timedelta(df["sensor_time"] + ":00").to_numpy().astype("m8[s]")
+    dt = dt - dt[0]
+    # - Define approximate time
+    df["time"] = start_time + dt
+    # - Keep rows where time increment is between 00 and 59 minutes
+    valid_rows = dt <= np.timedelta64(3540, "s")
+    df = df[valid_rows]
+    # Drop rows where sample interval is not 60 seconds
+    df = df[df["sample_interval"] == "000060"]
+    # Drop rows with invalid raw_drop_number
+    # --> 440 value # 22x20
+    # --> 400 here  # 20x20
+    df = df[df["raw_drop_number"].astype(str).str.len() == 1599]
+    # Deal with old LPM version 20x20 spectrum
+    # - Add 000 in first two velocity bins
+    df["raw_drop_number"] = df["raw_drop_number"] + ";" + ";".join(["000"] * 40)
+    # Drop columns not agreeing with DISDRODB L0 standards
+    columns_to_drop = [
+        "sample_interval",
+        "sensor_time",
+        "dummy1",
+        "dummy2",
+        "id",
+    ]
+    df = df.drop(columns=columns_to_drop)
+    return df
+@is_documented_by(reader_generic_docstring)
+def reader(
+    filepath,
+    logger=None,
+):
+    """Reader."""
+    import zipfile
+    ##------------------------------------------------------------------------.
+    # filename = os.path.basename(filepath)
+    # return read_txt_file(file=filepath, filename=filename, logger=logger)
+    # ---------------------------------------------------------------------.
+    #### Iterate over all files (aka timesteps) in the daily zip archive
+    # - Each file contain a single timestep !
+    # list_df = []
+    # with tempfile.TemporaryDirectory() as temp_dir:
+    #     # Extract all files
+    #     unzip_file_on_terminal(filepath, temp_dir)
+    #     # Walk through extracted files
+    #     for root, _, files in os.walk(temp_dir):
+    #         for filename in sorted(files):
+    #             if filename.endswith(".txt"):
+    #                 full_path = os.path.join(root, filename)
+    #                 try:
+    #                     df = read_txt_file(file=full_path, filename=filename, logger=logger)
+    #                     if df is not None:
+    #                         list_df.append(df)
+    #                 except Exception as e:
+    #                     msg = f"An error occurred while reading {filename}: {e}"
+    #                     log_error(logger=logger, msg=msg, verbose=True)
+    list_df = []
+    with zipfile.ZipFile(filepath, "r") as zip_ref:
+        filenames = sorted(zip_ref.namelist())
+        for filename in filenames:
+            if filename.endswith(".txt"):
+                # Open file
+                with zip_ref.open(filename) as file:
+                    try:
+                        df = read_txt_file(file=file, filename=filename, logger=logger)
+                        if df is not None:
+                            list_df.append(df)
+                    except Exception as e:
+                        msg = f"An error occurred while reading {filename}. The error is: {e}"
+                        log_error(logger=logger, msg=msg, verbose=True)
+    # Check the zip file contains at least some non.empty files
+    if len(list_df) == 0:
+        raise ValueError(f"{filepath} contains only empty files!")
+    # Concatenate all dataframes into a single one
+    df = pd.concat(list_df)
+    # ---------------------------------------------------------------------.
+    return df

disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py CHANGED Viewed

@@ -96,7 +96,7 @@ def reader(
         "quality_index",
         "max_hail_diameter",
         "laser_status",
-        "static_signal",
+        "static_signal_status",
         "laser_temperature_analog_status",
         "laser_temperature_digital_status",
         "laser_current_analog_status",
@@ -110,7 +110,7 @@ def reader(
         "current_heating_heads_status",
         "current_heating_carriers_status",
         "control_output_laser_power_status",
-        "reserve_status",
+        "reserved_status",
         "temperature_interior",
         "laser_temperature",
         "laser_current_average",

disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py CHANGED Viewed

@@ -96,7 +96,7 @@ def reader(
         "quality_index",
         "max_hail_diameter",
         "laser_status",
-        "static_signal",
+        "static_signal_status",
         "laser_temperature_analog_status",
         "laser_temperature_digital_status",
         "laser_current_analog_status",
@@ -110,7 +110,7 @@ def reader(
         "current_heating_heads_status",
         "current_heating_carriers_status",
         "control_output_laser_power_status",
-        "reserve_status",
+        "reserved_status",
         "temperature_interior",
         "laser_temperature",
         "laser_current_average",

disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

disdrodb 0.1.4py3-none-any.whl → 0.2.0py3-none-any.whl