PyPI - disdrodb - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

disdrodb 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

disdrodb/__init__.py +64 -34
disdrodb/_config.py +5 -4
disdrodb/_version.py +16 -3
disdrodb/accessor/__init__.py +20 -0
disdrodb/accessor/methods.py +125 -0
disdrodb/api/checks.py +139 -9
disdrodb/api/configs.py +4 -2
disdrodb/api/info.py +10 -10
disdrodb/api/io.py +237 -18
disdrodb/api/path.py +81 -75
disdrodb/api/search.py +6 -6
disdrodb/cli/disdrodb_create_summary_station.py +91 -0
disdrodb/cli/disdrodb_run_l0.py +1 -1
disdrodb/cli/disdrodb_run_l0_station.py +1 -1
disdrodb/cli/disdrodb_run_l0b.py +1 -1
disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
disdrodb/cli/disdrodb_run_l0c.py +1 -1
disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
disdrodb/configs.py +149 -4
disdrodb/constants.py +61 -0
disdrodb/data_transfer/download_data.py +5 -5
disdrodb/etc/configs/attributes.yaml +339 -0
disdrodb/etc/configs/encodings.yaml +473 -0
disdrodb/etc/products/L1/global.yaml +13 -0
disdrodb/etc/products/L2E/10MIN.yaml +12 -0
disdrodb/etc/products/L2E/1MIN.yaml +1 -0
disdrodb/etc/products/L2E/global.yaml +22 -0
disdrodb/etc/products/L2M/10MIN.yaml +12 -0
disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
disdrodb/etc/products/L2M/global.yaml +26 -0
disdrodb/l0/__init__.py +13 -0
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
disdrodb/l0/l0a_processing.py +30 -30
disdrodb/l0/l0b_nc_processing.py +108 -2
disdrodb/l0/l0b_processing.py +4 -4
disdrodb/l0/l0c_processing.py +5 -13
disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
disdrodb/l0/routines.py +105 -14
disdrodb/l1/__init__.py +5 -0
disdrodb/l1/filters.py +34 -20
disdrodb/l1/processing.py +45 -44
disdrodb/l1/resampling.py +77 -66
disdrodb/l1/routines.py +35 -43
disdrodb/l1_env/routines.py +18 -3
disdrodb/l2/__init__.py +7 -0
disdrodb/l2/empirical_dsd.py +58 -10
disdrodb/l2/event.py +27 -120
disdrodb/l2/processing.py +267 -116
disdrodb/l2/routines.py +618 -254
disdrodb/metadata/standards.py +3 -1
disdrodb/psd/fitting.py +463 -144
disdrodb/psd/models.py +8 -5
disdrodb/routines.py +3 -3
disdrodb/scattering/__init__.py +16 -4
disdrodb/scattering/axis_ratio.py +56 -36
disdrodb/scattering/permittivity.py +486 -0
disdrodb/scattering/routines.py +701 -159
disdrodb/summary/__init__.py +17 -0
disdrodb/summary/routines.py +4120 -0
disdrodb/utils/attrs.py +68 -125
disdrodb/utils/compression.py +30 -1
disdrodb/utils/dask.py +59 -8
disdrodb/utils/dataframe.py +61 -7
disdrodb/utils/directories.py +35 -15
disdrodb/utils/encoding.py +33 -19
disdrodb/utils/logger.py +13 -6
disdrodb/utils/manipulations.py +71 -0
disdrodb/utils/subsetting.py +214 -0
disdrodb/utils/time.py +165 -19
disdrodb/utils/writer.py +20 -7
disdrodb/utils/xarray.py +2 -4
disdrodb/viz/__init__.py +13 -0
disdrodb/viz/plots.py +327 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
disdrodb/l1/encoding_attrs.py +0 -642
disdrodb/l2/processing_options.py +0 -213
/disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0

disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml CHANGED Viewed

@@ -47,7 +47,7 @@ number_particles:
 sensor_temperature:
   description: Temperature in sensor housing
   long_name: Temperature of the sensor
-  units: "C"
+  units: "degC"
 sensor_serial_number:
   description: Sensor serial number
   long_name: Serial number of the sensor
@@ -105,15 +105,15 @@ error_code:
 sensor_temperature_pcb:
   description: Temperature in printed circuit board
   long_name: Sensor PCB temperature
-  units: "C"
+  units: "degC"
 sensor_temperature_receiver:
   description: Temperature in right sensor head
   long_name: Sensor receiver temperature
-  units: "C"
+  units: "degC"
 sensor_temperature_trasmitter:
   description: Temperature in left sensor head
   long_name: Sensor trasmitter temperature
-  units: "C"
+  units: "degC"
 rainfall_rate_16_bit_30:
   description: Rainfall rate
   long_name: Rainfall rate max 30 mm/h 16 bit
@@ -161,7 +161,7 @@ raw_drop_number:
 air_temperature:
   description: "Air temperature in degrees Celsius (C)"
   long_name: Air temperature
-  units: "C"
+  units: "degC"
 relative_humidity:
   description: "Relative humidity in percent (%)"
   long_name: Relative humidity

disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml CHANGED Viewed

@@ -102,7 +102,7 @@ sensor_temperature:
   chunksizes: 5000
   _FillValue: 127
 sensor_serial_number:
-  dtype: object
+  dtype: str
   zlib: false
   complevel: 3
   shuffle: true
@@ -110,7 +110,7 @@ sensor_serial_number:
   contiguous: false
   chunksizes: 5000
 firmware_iop:
-  dtype: object
+  dtype: str
   zlib: false
   complevel: 3
   shuffle: true
@@ -118,7 +118,7 @@ firmware_iop:
   contiguous: false
   chunksizes: 5000
 firmware_dsp:
-  dtype: object
+  dtype: str
   zlib: false
   complevel: 3
   shuffle: true

disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml CHANGED Viewed

@@ -15,7 +15,7 @@ rainfall_accumulated_32bit:
   n_naturals: 4
   data_range:
     - 0
-    - 300.0
+    - 9999.0
   nan_flags: null
   field_number: "02"
 weather_code_synop_4680:

disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml CHANGED Viewed

@@ -25,7 +25,7 @@ sensor_status:
 air_temperature:
   description: "Air temperature in degrees Celsius"
   long_name: Air temperature
-  units: "C"
+  units: "degC"
 relative_humidity:
   description: "Relative humidity in percent (%)"
   long_name: Relative humidity
@@ -33,15 +33,15 @@ relative_humidity:
 wetbulb_temperature:
   description: "Wet bulb temperature in degrees Celsius"
   long_name: Wet bulb temperature
-  units: "C"
+  units: "degC"
 air_temperature_max:
   description: "Maximum air temperature in degrees Celsius"
   long_name: Maximum air temperature
-  units: "C"
+  units: "degC"
 air_temperature_min:
   description: "Minimum air temperature in degrees Celsius"
   long_name: Minimum air temperature
-  units: "C"
+  units: "degC"
 rainfall_rate:
   description: Rainfall rate
   long_name: Rainfall rate

disdrodb/l0/configs/PWS100/raw_data_format.yml CHANGED Viewed

@@ -5,7 +5,7 @@ mor_visibility:
   n_naturals: 4
   data_range:
     - 0
-    - 9999.9
+    - 20000
   nan_flags: null
   field_number: "20"
 weather_code_synop_4680:

disdrodb/l0/l0a_processing.py CHANGED Viewed

@@ -18,13 +18,13 @@
 # -----------------------------------------------------------------------------.
 """Functions to process raw text files into DISDRODB L0A Apache Parquet."""
 import logging
 import os
 from typing import Union
 import numpy as np
 import pandas as pd
+import pyarrow.parquet as pq
 from disdrodb.l0.check_standards import check_l0a_column_names, check_l0a_standards
 from disdrodb.l0.l0b_processing import infer_split_str
@@ -130,11 +130,15 @@ def read_raw_text_file(
     try:
         df = pd.read_csv(filepath, names=column_names, dtype=dtype, **reader_kwargs)
     except pd.errors.EmptyDataError:
+        # if isinstance(filepath, zipfile.ZipExtFile):
+        #     filepath = filepath.name
         msg = f"The following file is empty: {filepath}"
         raise ValueError(msg)
     # Check the dataframe is not empty
     if len(df.index) == 0:
+        # if isinstance(filepath, zipfile.ZipExtFile):
+        #     filepath = filepath.name
         msg = f"The following file is empty: {filepath}"
         raise ValueError(msg)
@@ -413,6 +417,8 @@ def is_raw_array_string_not_corrupted(string):
     """Check if the raw array is corrupted."""
     if not isinstance(string, str):
         return False
+    if string in ["", "NAN", "NaN"]:
+        return True
     split_str = infer_split_str(string=string)
     list_values = string.split(split_str)
     values = pd.to_numeric(list_values, errors="coerce")
@@ -625,6 +631,9 @@ def sanitize_df(
     # - Sort by time
     df = df.sort_values("time")
+    # - Drop index
+    df = df.reset_index(drop=True)
     # ------------------------------------------------------.
     # - Check column names agrees to DISDRODB standards
     check_l0a_column_names(df, sensor_name=sensor_name)
@@ -755,24 +764,8 @@ def concatenate_dataframe(list_df: list, logger=None, verbose: bool = False) ->
     return df
-def _read_l0a(filepath: str, verbose: bool = False, logger=None, debugging_mode: bool = False) -> pd.DataFrame:
-    # Log
-    msg = f"Reading L0 Apache Parquet file at {filepath} started."
-    log_info(logger=logger, msg=msg, verbose=verbose)
-    # Open file
-    df = pd.read_parquet(filepath)
-    if debugging_mode:
-        df = df.iloc[0:100]
-    # Log
-    msg = f"Reading L0 Apache Parquet file at {filepath} ended."
-    log_info(logger=logger, msg=msg, verbose=verbose)
-    return df
 def read_l0a_dataframe(
     filepaths: Union[str, list],
-    verbose: bool = False,
-    logger=None,
     debugging_mode: bool = False,
 ) -> pd.DataFrame:
     """Read DISDRODB L0A Apache Parquet file(s).
@@ -781,13 +774,10 @@ def read_l0a_dataframe(
     ----------
     filepaths : str or list
         Either a list or a single filepath.
-    verbose : bool
-        Whether to print detailed processing information into terminal.
-        The default is ``False``.
     debugging_mode : bool
         If ``True``, it reduces the amount of data to process.
         If filepaths is a list, it reads only the first 3 files.
-        For each file it select only the first 100 rows.
+        It selects only 100 rows sampled from the first 3 files.
         The default is ``False``.
     Returns
@@ -796,8 +786,6 @@ def read_l0a_dataframe(
         L0A Dataframe.
     """
-    from disdrodb.l0.l0a_processing import concatenate_dataframe
     # ----------------------------------------
     # Check filepaths validity
     if not isinstance(filepaths, (list, str)):
@@ -814,12 +802,15 @@ def read_l0a_dataframe(
     # ---------------------------------------------------
     # Define the list of dataframe
-    list_df = [
-        _read_l0a(filepath, verbose=verbose, logger=logger, debugging_mode=debugging_mode) for filepath in filepaths
-    ]
+    df = pq.ParquetDataset(filepaths).read().to_pandas()
-    # Concatenate dataframe
-    df = concatenate_dataframe(list_df, logger=logger, verbose=verbose)
+    # Ensure no index
+    df = df.reset_index(drop=True)
+    # Reduce rows
+    if debugging_mode:
+        n_rows = min(100, len(df))
+        df = df.sample(n=n_rows)
     # Ensure time is in nanoseconds
     df["time"] = df["time"].astype("M8[ns]")
@@ -833,14 +824,15 @@ def read_l0a_dataframe(
 #### L0A Utility
-def read_raw_text_files(
+def generate_l0a(
     filepaths: Union[list, str],
     reader,
     sensor_name,
+    issue_dict=None,
     verbose=True,
     logger=None,
 ) -> pd.DataFrame:
-    """Read and parse a list for raw files into a dataframe.
+    """Read and parse a list of raw files and generate a DISDRODB L0A dataframe.
     Parameters
     ----------
@@ -851,6 +843,13 @@ def read_raw_text_files(
         Format: reader(filepath, logger=None)
     sensor_name : str
         Name of the sensor.
+    issue_dict : dict, optional
+        Issue dictionary providing information on timesteps to remove.
+        The default is an empty dictionary ``{}``.
+        Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
+        Valid issue_dict values are list of datetime64 values (with second accuracy).
+        To correctly format and check the validity of the ``issue_dict``, use
+        the ``disdrodb.l0.issue.check_issue_dict`` function.
     verbose : bool
         Whether to verbose the processing. The default is ``True``.
@@ -886,6 +885,7 @@ def read_raw_text_files(
             df = sanitize_df(
                 df=df,
                 sensor_name=sensor_name,
+                issue_dict=issue_dict,
                 logger=logger,
                 verbose=verbose,
             )

disdrodb/l0/l0b_nc_processing.py CHANGED Viewed

@@ -19,6 +19,7 @@
 """Functions to process DISDRODB raw netCDF files into DISDRODB L0B netCDF files."""
 import logging
+from typing import Union
 import numpy as np
@@ -33,6 +34,7 @@ from disdrodb.l0.standards import (
     get_valid_variable_names,
 )
 from disdrodb.utils.logger import (
+    log_error,
     # log_warning,
     # log_debug,
     log_info,
@@ -169,6 +171,8 @@ def standardize_raw_dataset(ds, dict_names, sensor_name):
     # If missing variables, infill with NaN array
     missing_vars = _get_missing_variables(ds, dict_names, sensor_name)
+    if "raw_drop_number" in missing_vars:
+        raise ValueError("The raw drop spectrum is not present in the netCDF file!")
     if len(missing_vars) > 0:
         ds = add_dataset_missing_variables(ds=ds, missing_vars=missing_vars, sensor_name=sensor_name)
@@ -454,8 +458,8 @@ def sanitize_ds(
     ----------
     ds : xarray.Dataset
         Raw xarray dataset
-    attrs: dict
-        Global metadata to attach as global attributes to the xr.Dataset.
+    metadata: dict
+        Station metadata to attach as global attributes to the xr.Dataset.
     sensor_name : str
         Name of the sensor.
     verbose : bool
@@ -525,3 +529,105 @@ def open_raw_netcdf_file(
     # Log information
     log_info(logger=logger, msg=f"netCDF file {filepath} has been loaded successively into xarray.", verbose=False)
     return ds
+def generate_l0b_from_nc(
+    filepaths: Union[list, str],
+    reader,
+    sensor_name,
+    metadata,
+    issue_dict=None,
+    verbose=True,
+    logger=None,
+):
+    """Read and parse a list of raw netCDF files and generate a DISDRODB L0B dataset.
+    Parameters
+    ----------
+    filepaths : Union[list,str]
+        File(s) path(s)
+    reader:
+        DISDRODB reader function.
+        Format: reader(filepath, logger=None)
+    sensor_name : str
+        Name of the sensor.
+    metadata: dict
+        Station metadata to attach as global attributes to the xr.Dataset.
+    issue_dict : dict, optional
+        Issue dictionary providing information on timesteps to remove.
+        The default is an empty dictionary ``{}``.
+        Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
+        Valid issue_dict values are list of datetime64 values (with second accuracy).
+        To correctly format and check the validity of the ``issue_dict``, use
+        the ``disdrodb.l0.issue.check_issue_dict`` function.
+    verbose : bool
+        Whether to verbose the processing. The default is ``True``.
+    Returns
+    -------
+    xarray.Dataset
+        DISDRODB L0B Dataset.
+    Raises
+    ------
+    ValueError
+        Input parameters can not be used or the raw file can not be processed.
+    """
+    import xarray as xr
+    # Check input list
+    if isinstance(filepaths, str):
+        filepaths = [filepaths]
+    if len(filepaths) == 0:
+        raise ValueError("'filepaths' must contains at least 1 filepath.")
+    # ------------------------------------------------------.
+    # Loop over all raw files
+    n_files = len(filepaths)
+    processed_file_counter = 0
+    list_skipped_files_msg = []
+    list_ds = []
+    for filepath in filepaths:
+        # Try read the raw netCDF file
+        try:
+            ds = reader(filepath, logger=logger)
+            # Sanitize the dataframe
+            ds = sanitize_ds(
+                ds=ds,
+                sensor_name=sensor_name,
+                metadata=metadata,
+                issue_dict=issue_dict,
+                verbose=verbose,
+                logger=logger,
+            )
+            # Append dataframe to the list
+            list_ds.append(ds)
+            # Update the logger
+            processed_file_counter += 1
+            msg = f"Raw file '{filepath}' processed successfully ({processed_file_counter}/{n_files})."
+            log_info(logger=logger, msg=msg, verbose=verbose)
+        # Skip the file if the processing fails
+        except Exception as e:
+            # Update the logger
+            msg = f"{filepath} has been skipped. The error is: {e}."
+            log_error(logger=logger, msg=msg, verbose=verbose)
+            list_skipped_files_msg.append(msg)
+    # Update logger
+    msg = f"{len(list_skipped_files_msg)} of {n_files} have been skipped."
+    log_info(logger=logger, msg=msg, verbose=verbose)
+    # Check if there are files to concatenate
+    if len(list_ds) == 0:
+        raise ValueError("Any raw file could be read!")
+    ##----------------------------------------------------------------.
+    # Concatenate the datasets
+    list_ds = [ds.chunk({"time": -1}) for ds in list_ds]
+    ds = xr.concat(list_ds, dim="time", join="outer", compat="no_conflicts", combine_attrs="override").sortby("time")
+    ds = ds.compute()
+    # Return the dataframe
+    return ds

disdrodb/l0/l0b_processing.py CHANGED Viewed

@@ -386,13 +386,13 @@ def _define_dataset_variables(df, sensor_name, logger=None, verbose=False):
     return data_vars
-def create_l0b_from_l0a(
+def generate_l0b(
     df: pd.DataFrame,
     metadata: dict,
     logger=None,
     verbose: bool = False,
 ) -> xr.Dataset:
-    """Transform the L0A dataframe to the L0B xr.Dataset.
+    """Transform the DISDRODB L0A dataframe to the DISDRODB L0B xr.Dataset.
     Parameters
     ----------
@@ -503,8 +503,8 @@ def set_l0b_encodings(ds: xr.Dataset, sensor_name: str):
     xarray.Dataset
         Output xarray dataset.
     """
-    encoding_dict = get_l0b_encodings_dict(sensor_name)
-    ds = set_encodings(ds=ds, encoding_dict=encoding_dict)
+    encodings_dict = get_l0b_encodings_dict(sensor_name)
+    ds = set_encodings(ds=ds, encodings_dict=encodings_dict)
     return ds

disdrodb/l0/l0c_processing.py CHANGED Viewed

@@ -388,11 +388,10 @@ def check_timesteps_regularity(ds, sample_interval, verbose=False, logger=None):
     return ds
-def finalize_l0c_dataset(ds, sample_interval, start_day, end_day, verbose=True, logger=None):
+def finalize_l0c_dataset(ds, sample_interval, verbose=True, logger=None):
     """Finalize a L0C dataset with unique sampling interval.
-    It adds the sampling_interval coordinate and it regularizes
-    the timesteps for trailing seconds.
+    It adds the sampling_interval coordinate and it regularizes the timesteps for trailing seconds.
     """
     # Add sample interval as coordinate
     ds = add_sample_interval(ds, sample_interval=sample_interval)
@@ -409,9 +408,6 @@ def finalize_l0c_dataset(ds, sample_interval, start_day, end_day, verbose=True,
     # Performs checks about timesteps regularity
     ds = check_timesteps_regularity(ds=ds, sample_interval=sample_interval, verbose=verbose, logger=logger)
-    # Slice for requested day
-    ds = ds.sel({"time": slice(start_day, end_day)})
     return ds
@@ -442,7 +438,7 @@ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_eq
     - The function adds a tolerance for searching timesteps
     before and after 00:00 to account for imprecise logging times.
     - It checks that duplicated timesteps have the same raw drop number values.
-    - The function infers the time integration sample interval and
+    - The function infers the sample interval and
     regularizes timesteps to handle trailing seconds.
     - The data is loaded into memory and connections to source files
     are closed before returning the dataset.
@@ -461,10 +457,8 @@ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_eq
     # ---------------------------------------------------------------------------------------.
     # Open files with data within the provided day and concatenate them
-    # list_ds = [xr.open_dataset(filepath, chunks={}).sel({"time": slice(start_day_tol, end_day_tol)})
-    # for filepath in filepaths]
     list_ds = [
-        xr.open_dataset(filepath, decode_timedelta=False, chunks={}, cache=False).sortby("time")
+        xr.open_dataset(filepath, decode_timedelta=False, chunks=-1, cache=False).sortby("time")
         for filepath in filepaths
     ]
     list_ds = [ds.sel({"time": slice(start_day_tol, end_day_tol)}) for ds in list_ds]
@@ -533,11 +527,9 @@ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_eq
         sample_interval: finalize_l0c_dataset(
             ds=ds,
             sample_interval=sample_interval,
-            start_day=start_day,
-            end_day=end_day,
             verbose=verbose,
             logger=logger,
-        )
+        ).sel({"time": slice(start_day, end_day)})
         for sample_interval, ds in dict_ds.items()
     }
     return dict_ds

disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py ADDED Viewed

@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------.
+"""Reader for DELFT Thies LPM sensor in netCDF format."""
+from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
+from disdrodb.l0.l0b_nc_processing import open_raw_netcdf_file, standardize_raw_dataset
+@is_documented_by(reader_generic_docstring)
+def reader(
+    filepath,
+    logger=None,
+):
+    """Reader."""
+    ##------------------------------------------------------------------------.
+    #### Open the netCDF
+    ds = open_raw_netcdf_file(filepath=filepath, logger=logger)
+    ##------------------------------------------------------------------------.
+    #### Adapt the dataframe to adhere to DISDRODB L0 standards
+    # Add time coordinate
+    ds["time"] = ds["time_as_string"].astype("M8[s]")
+    ds["time"].attrs.pop("comment", None)
+    ds["time"].attrs.pop("units", None)
+    ds = ds.set_coords("time")
+    # Define dictionary mapping dataset variables to select and rename
+    dict_names = {
+        ### Dimensions
+        "diameter_classes": "diameter_bin_center",
+        "velocity_classes": "velocity_bin_center",
+        ### Variables
+        "liquid_precip_intensity": "rainfall_rate",
+        "solid_precip_intensity": "snowfall_rate",
+        "all_precip_intensity": "precipitation_rate",
+        "weather_code_synop_4680": "weather_code_synop_4680",
+        "weather_code_synop_4677": "weather_code_synop_4677",
+        "reflectivity": "reflectivity",
+        "visibility": "mor_visibility",
+        "total_number_particles": "number_particles",
+        "ambient_temperature": "temperature_ambient",
+        "status_laser": "laser_status",
+        "measurement_quality": "quality_index",
+        "raw_spectrum": "raw_drop_number",
+    }
+    # Rename dataset variables and columns and infill missing variables
+    ds = standardize_raw_dataset(ds=ds, dict_names=dict_names, sensor_name="LPM")
+    # Return the dataset adhering to DISDRODB L0B standards
+    return ds

disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} RENAMED Viewed

@@ -62,6 +62,9 @@ def reader(
     #   - Available: gzip, bz2, zip
     reader_kwargs["compression"] = "infer"
+    # - Skip rows with badly encoded data
+    reader_kwargs["encoding_errors"] = "replace"
     # - Strings to recognize as NA/NaN and replace with standard NA flags
     #   - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
     #                       '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',

disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

disdrodb 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl