PyPI - disdrodb - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

disdrodb 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (142) hide show

disdrodb/__init__.py +68 -34
disdrodb/_config.py +5 -4
disdrodb/_version.py +16 -3
disdrodb/accessor/__init__.py +20 -0
disdrodb/accessor/methods.py +125 -0
disdrodb/api/checks.py +177 -24
disdrodb/api/configs.py +3 -3
disdrodb/api/info.py +13 -13
disdrodb/api/io.py +281 -22
disdrodb/api/path.py +184 -195
disdrodb/api/search.py +18 -9
disdrodb/cli/disdrodb_create_summary.py +103 -0
disdrodb/cli/disdrodb_create_summary_station.py +91 -0
disdrodb/cli/disdrodb_run_l0.py +1 -1
disdrodb/cli/disdrodb_run_l0_station.py +1 -1
disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
disdrodb/cli/disdrodb_run_l0b.py +1 -1
disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
disdrodb/cli/disdrodb_run_l0c.py +1 -1
disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
disdrodb/cli/disdrodb_run_l1_station.py +2 -2
disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
disdrodb/configs.py +149 -4
disdrodb/constants.py +61 -0
disdrodb/data_transfer/download_data.py +127 -11
disdrodb/etc/configs/attributes.yaml +339 -0
disdrodb/etc/configs/encodings.yaml +473 -0
disdrodb/etc/products/L1/global.yaml +13 -0
disdrodb/etc/products/L2E/10MIN.yaml +12 -0
disdrodb/etc/products/L2E/1MIN.yaml +1 -0
disdrodb/etc/products/L2E/global.yaml +22 -0
disdrodb/etc/products/L2M/10MIN.yaml +12 -0
disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
disdrodb/etc/products/L2M/global.yaml +26 -0
disdrodb/issue/writer.py +2 -0
disdrodb/l0/__init__.py +13 -0
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
disdrodb/l0/l0a_processing.py +37 -32
disdrodb/l0/l0b_nc_processing.py +118 -8
disdrodb/l0/l0b_processing.py +30 -65
disdrodb/l0/l0c_processing.py +369 -259
disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
disdrodb/l1/__init__.py +5 -0
disdrodb/l1/fall_velocity.py +46 -0
disdrodb/l1/filters.py +34 -20
disdrodb/l1/processing.py +46 -45
disdrodb/l1/resampling.py +77 -66
disdrodb/l1_env/routines.py +18 -3
disdrodb/l2/__init__.py +7 -0
disdrodb/l2/empirical_dsd.py +58 -10
disdrodb/l2/processing.py +268 -117
disdrodb/metadata/checks.py +132 -125
disdrodb/metadata/standards.py +3 -1
disdrodb/psd/fitting.py +631 -345
disdrodb/psd/models.py +9 -6
disdrodb/routines/__init__.py +54 -0
disdrodb/{l0/routines.py → routines/l0.py} +316 -355
disdrodb/{l1/routines.py → routines/l1.py} +76 -116
disdrodb/routines/l2.py +1019 -0
disdrodb/{routines.py → routines/wrappers.py} +98 -10
disdrodb/scattering/__init__.py +16 -4
disdrodb/scattering/axis_ratio.py +61 -37
disdrodb/scattering/permittivity.py +504 -0
disdrodb/scattering/routines.py +746 -184
disdrodb/summary/__init__.py +17 -0
disdrodb/summary/routines.py +4196 -0
disdrodb/utils/archiving.py +434 -0
disdrodb/utils/attrs.py +68 -125
disdrodb/utils/cli.py +5 -5
disdrodb/utils/compression.py +30 -1
disdrodb/utils/dask.py +121 -9
disdrodb/utils/dataframe.py +61 -7
disdrodb/utils/decorators.py +31 -0
disdrodb/utils/directories.py +35 -15
disdrodb/utils/encoding.py +37 -19
disdrodb/{l2 → utils}/event.py +15 -173
disdrodb/utils/logger.py +14 -7
disdrodb/utils/manipulations.py +81 -0
disdrodb/utils/routines.py +166 -0
disdrodb/utils/subsetting.py +214 -0
disdrodb/utils/time.py +35 -177
disdrodb/utils/writer.py +20 -7
disdrodb/utils/xarray.py +5 -4
disdrodb/viz/__init__.py +13 -0
disdrodb/viz/plots.py +398 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
disdrodb/l1/encoding_attrs.py +0 -642
disdrodb/l2/processing_options.py +0 -213
disdrodb/l2/routines.py +0 -868
/disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0

disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py ADDED Viewed

@@ -0,0 +1,525 @@
+#!/usr/bin/env python3
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------.
+import os
+import numpy as np
+import pandas as pd
+from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
+from disdrodb.l0.l0a_processing import read_raw_text_file
+from disdrodb.utils.logger import log_error
+def reader_parsivel(filepath, logger):
+    """Reader for Parsivel CR1000 Data Logger file."""
+    ##------------------------------------------------------------------------.
+    #### Define reader options
+    reader_kwargs = {}
+    # - Define delimiter
+    reader_kwargs["delimiter"] = "\\n"
+    # - Skip first row as columns names
+    reader_kwargs["header"] = None
+    # - Skip first 3 rows
+    reader_kwargs["skiprows"] = 0
+    # - Define encoding
+    reader_kwargs["encoding"] = "latin"  # "ISO-8859-1"
+    # - Avoid first column to become df index !!!
+    reader_kwargs["index_col"] = False
+    # - Define behaviour when encountering bad lines
+    reader_kwargs["on_bad_lines"] = "skip"
+    # - Define reader engine
+    #   - C engine is faster
+    #   - Python engine is more feature-complete
+    reader_kwargs["engine"] = "python"
+    # - Define on-the-fly decompression of on-disk data
+    #   - Available: gzip, bz2, zip
+    reader_kwargs["compression"] = "infer"
+    # - Strings to recognize as NA/NaN and replace with standard NA flags
+    #   - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
+    #                       '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
+    #                       'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
+    reader_kwargs["na_values"] = ["na", "", "error"]
+    ##------------------------------------------------------------------------.
+    #### Read the data
+    df_raw = read_raw_text_file(
+        filepath=filepath,
+        column_names=["TO_PARSE"],
+        reader_kwargs=reader_kwargs,
+        logger=logger,
+    )
+    # Retrieve header, number of columns and starting rows
+    # - Search in the first 3 rows where "TIMESTAMP" occurs
+    # - Once identified the row, strip away everything before TIMESTAMP
+    # - Then identify start_row_idx as the row where "TIMESTAMP" occurs + 2
+    for i in range(3):
+        line = df_raw.iloc[i]["TO_PARSE"]
+        if "TIMESTAMP" in line:
+            # Remove double and single quotes
+            line = line.replace('""', '"').replace('"', "")
+            # Define header
+            timestamp_idx = line.find("TIMESTAMP")
+            header_str = line[timestamp_idx:]
+            header = header_str.split(",")
+            # Define number of columns
+            n_columns = len(header)
+            # Define start row with data
+            start_row_idx = i + 3
+            break
+    else:
+        # start_row_idx = 0
+        # n_columns = len(df_raw["TO_PARSE"].iloc[0].split(","))
+        raise ValueError("Could not find 'TIMESTAMP' in the first 3 rows of the file.")
+    # Retrieve rows with actual data
+    df = df_raw.iloc[start_row_idx:]
+    # Expand dataframe
+    df = df["TO_PARSE"].str.split(",", expand=True, n=n_columns - 1)
+    #### Define column names
+    if n_columns == 15:
+        # 05_VILLENEUVE_DE_BERG_1 (2011)
+        # 90_GALABRE (2020)
+        column_names = [
+            "time",
+            "RECORD",
+            "rainfall_rate_32bit",
+            "rainfall_accumulated_32bit",
+            "weather_code_synop_4680",
+            "weather_code_synop_4677",
+            "reflectivity_32bit",
+            "mor_visibility",
+            "laser_amplitude",
+            "number_particles",
+            "sensor_temperature",
+            "sensor_heating_current",
+            "sensor_battery_voltage",
+            "sensor_status",
+            "rain_kinetic_energy",
+        ]
+    elif n_columns == 16:
+        # 33_PRADEL_VIGNES (2011-2015)
+        if "Panel_Temp" in header:
+            column_names = [
+                "time",
+                "RECORD",
+                "V_Batt_Min",
+                "Panel_Temp",
+                "rainfall_rate_32bit",
+                "rainfall_accumulated_32bit",
+                "weather_code_synop_4680",
+                "weather_code_synop_4677",
+                "reflectivity_32bit",
+                "mor_visibility",
+                "laser_amplitude",
+                "number_particles",
+                "sensor_temperature",
+                "sensor_heating_current",
+                "sensor_battery_voltage",
+                "sensor_status",
+            ]
+        else:
+            # 33_PRADEL_VIGNES (2020)
+            column_names = [
+                "time",
+                "RECORD",
+                "rainfall_rate_32bit",
+                "rainfall_accumulated_32bit",
+                "weather_code_synop_4680",
+                "weather_code_synop_4677",
+                "reflectivity_32bit",
+                "mor_visibility",
+                "laser_amplitude",
+                "number_particles",
+                "sensor_temperature",
+                "sensor_heating_current",
+                "sensor_battery_voltage",
+                "sensor_status",
+                "rain_kinetic_energy",
+                "V_Batt_Min",
+            ]
+    elif n_columns == 19:
+        column_names = [
+            "time",
+            "RECORD",
+            "rainfall_rate_32bit",
+            "rainfall_accumulated_32bit",
+            "weather_code_synop_4680",
+            "weather_code_synop_4677",
+            "reflectivity_32bit",
+            "mor_visibility",
+            "laser_amplitude",
+            "number_particles",
+            "sensor_temperature",
+            "sensor_heating_current",
+            "sensor_battery_voltage",
+            "sample_interval",
+            "sensor_status",
+            "rain_kinetic_energy",
+            "sensor_temperature_receiver",
+            "sensor_temperature_trasmitter",
+            "V_Batt_Min",
+        ]
+    elif n_columns == 20:
+        column_names = [
+            "time",
+            "RECORD",
+            "rainfall_rate_32bit",
+            "rainfall_accumulated_32bit",
+            "weather_code_synop_4680",
+            "weather_code_synop_4677",
+            "reflectivity_32bit",
+            "mor_visibility",
+            "laser_amplitude",
+            "number_particles",
+            "sensor_temperature",
+            "sensor_heating_current",
+            "sensor_battery_voltage",
+            "sensor_status",
+            "sensor_temperature_receiver",
+            "sensor_temperature_trasmitter",
+            "rain_kinetic_energy",
+            "V_Batt_Min",
+            "sample_interval",
+            "Temps_present",
+        ]
+    elif n_columns == 24:
+        # ALES (2021)
+        column_names = [
+            "time",  # 0
+            "RECORD",  # 1
+            "rainfall_rate_32bit",  # 2
+            "rainfall_accumulated_32bit",  # 3
+            "weather_code_synop_4680",  # 4
+            "weather_code_synop_4677",  # 5
+            "reflectivity_32bit",  # 6
+            "mor_visibility",  # 7
+            "laser_amplitude",  # 8
+            "number_particles",  # 9
+            "sensor_temperature",  # 10
+            "sensor_heating_current",  # 11
+            "sensor_battery_voltage",  # 12
+            "sensor_status",  # # 13
+            "rain_kinetic_energy",  # 14
+            "AccuH_parsivel",  # 15
+            "AccuD_parsivel",  # 16
+            "AccuM_parsivel",  # 17
+            "AccuY_parsivel",  # 18
+            "air_temperature",  # 19
+            "relative_humidity",  # 20
+            "wind_speed",  # 21
+            "wind_direction",  # 22
+            "V_Batt_Min",  # 23
+        ]
+    elif n_columns == 25:
+        # AINAC (2024)
+        column_names = [
+            "time",  # 0
+            "RECORD",  # 1
+            "rainfall_rate_32bit",  # 2
+            "rainfall_accumulated_32bit",  # 3
+            "weather_code_synop_4680",  # 4
+            "weather_code_synop_4677",  # 5
+            "reflectivity_32bit",  # 6
+            "mor_visibility",  # 7
+            "laser_amplitude",  # 8
+            "number_particles",  # 9
+            "sensor_temperature",  # 10
+            "sensor_heating_current",  # 11
+            "sensor_battery_voltage",  # 12
+            "sensor_status",  # # 13
+            "rain_kinetic_energy",  # 14
+            "AccuH_parsivel",  # 15
+            "AccuD_parsivel",  # 16
+            "AccuM_parsivel",  # 17
+            "AccuY_parsivel",  # 18
+            "air_temperature",  # 19
+            "relative_humidity",  # 20
+            "wind_speed",  # 21
+            "wind_direction",  # 22
+            "V_Batt_Min",  # 23
+            "unknown",
+        ]
+    elif n_columns == 41:
+        df = df.iloc[:, :15]
+        column_names = [
+            "time",  # 0
+            "RECORD",  # 1
+            "rainfall_rate_32bit",  # 2
+            "rainfall_accumulated_32bit",  # 3
+            "weather_code_synop_4680",  # 4
+            "weather_code_synop_4677",  # 5
+            "reflectivity_32bit",  # 6
+            "mor_visibility",  # 7
+            "laser_amplitude",  # 8
+            "number_particles",  # 9
+            "sensor_temperature",  # 10
+            "sensor_heating_current",  # 11
+            "sensor_battery_voltage",  # 12
+            "sensor_status",  # # 13
+            "rain_kinetic_energy",  # 14
+        ]
+    elif n_columns == 76:
+        # ALES (2009)
+        raw_drop_concentration = df.iloc[:, 14:46].agg(",".join, axis=1).str.replace("-10", "0")
+        raw_drop_average_velocity = "0,0," + df.iloc[:, 46:].agg(",".join, axis=1)
+        df = df.iloc[:, 0:14]
+        df["raw_drop_concentration"] = raw_drop_concentration
+        df["raw_drop_average_velocity"] = raw_drop_average_velocity
+        column_names = [
+            "time",
+            "RECORD",
+            "V_Batt_Min",
+            "rainfall_rate_32bit",
+            "rainfall_accumulated_32bit",
+            "weather_code_synop_4680",
+            "weather_code_synop_4677",
+            "reflectivity_32bit",
+            "mor_visibility",
+            "laser_amplitude",
+            "number_particles",
+            "sensor_heating_current",
+            "sensor_serial_numer",
+            "error_code",
+            "raw_drop_concentration",
+            "raw_drop_average_velocity",
+        ]
+    else:
+        raise ValueError(f"{filepath} has {n_columns} columns. Undefined reader.")
+    ##------------------------------------------------------------------------.
+    #### Assign column names
+    df.columns = column_names
+    ##------------------------------------------------------------------------.
+    #### Adapt the dataframe to adhere to DISDRODB L0 standards
+    # Define time as datetime column
+    df["time"] = pd.to_datetime(df["time"].str.strip('"'), format="%Y-%m-%d %H:%M:%S", errors="coerce")
+    # Set missing columns as NaN
+    potential_missing_columns = [
+        "sensor_temperature_receiver",
+        "sensor_temperature_trasmitter",
+        "rain_kinetic_energy",
+    ]
+    for column in potential_missing_columns:
+        if column not in df.columns:
+            df[column] = np.nan
+    # Drop columns not agreeing with DISDRODB L0 standards
+    columns_to_drop = [
+        "RECORD",
+        "V_Batt_Min",
+        "Panel_Temp",
+        "Temps_present",
+        "sample_interval",
+        "sensor_serial_numer",
+        "AccuH_parsivel",
+        "AccuD_parsivel",
+        "AccuM_parsivel",
+        "AccuY_parsivel",
+        "unknown",
+    ]
+    df = df.drop(columns=columns_to_drop, errors="ignore")
+    return df
+def select_only_valid_rows(df, expected_n_values, logger, filepath):
+    """Select only rows with the expected number of values."""
+    # Ensure expected_n_values to be a list
+    if isinstance(expected_n_values, (int, float)):
+        expected_n_values = [expected_n_values]
+    # Identify number of values per row
+    n_values_per_row = df["TO_PARSE"].apply(lambda x: len(x.split(",")))
+    # Get the frequency of each unique number of values
+    unique_values, counts = np.unique(n_values_per_row, return_counts=True)
+    # Determine the valid number of values
+    valid_counts = [(val, count) for val, count in zip(unique_values, counts) if val in expected_n_values]
+    if not valid_counts:
+        raise ValueError(
+            f"{filepath} has no rows with expected number of values: {expected_n_values}."
+            f"Found rows with the following number of values: {unique_values}.",
+        )
+    # Select the most frequent valid number of values
+    n_values = max(valid_counts, key=lambda x: x[1])[0]
+    # Identify invalid rows
+    indices_invalid_values = n_values_per_row != n_values
+    invalid_timesteps = df["time"][indices_invalid_values]
+    invalid_timesteps_str = list(invalid_timesteps.astype(str))
+    # Log if multiple value formats are detected
+    if len(unique_values) != 1:
+        msg = f"{filepath} has an unexpected number of values at following timesteps: {invalid_timesteps_str}."
+        log_error(msg=msg, logger=logger)
+    # Remove rows with invalid number of values
+    df = df[~indices_invalid_values]
+    return df, n_values, invalid_timesteps
+def add_nan_at_invalid_timesteps(df, invalid_timesteps):
+    """Infill invalid timesteps columns with NaN."""
+    # If no invalid timesteps, return dataframe as it is
+    if len(invalid_timesteps) == 0:
+        return df
+    # Create a DataFrame with NaNs and the original time values
+    nan_rows = pd.DataFrame({col: ["NaN"] * len(invalid_timesteps) for col in df.columns if col != "time"})
+    nan_rows["time"] = invalid_timesteps.to_numpy()
+    # Reinsert NaN rows and re-sort by time
+    df = pd.concat([df, nan_rows], ignore_index=True).sort_values("time").reset_index(drop=True)
+    return df
+def reader_spectrum(filepath, logger):
+    """Reader for Spectrum CR1000 Data Logger file."""
+    ##------------------------------------------------------------------------.
+    #### Define column names
+    column_names = ["TO_PARSE"]
+    ##------------------------------------------------------------------------.
+    #### Define reader options
+    reader_kwargs = {}
+    # - Define delimiter
+    reader_kwargs["delimiter"] = "\\n"
+    # - Skip first row as columns names
+    reader_kwargs["header"] = None
+    # - Skip first 3 rows
+    reader_kwargs["skiprows"] = 4
+    # - Define encoding
+    reader_kwargs["encoding"] = "latin"  # "ISO-8859-1"
+    # - Avoid first column to become df index !!!
+    reader_kwargs["index_col"] = False
+    # - Define behaviour when encountering bad lines
+    reader_kwargs["on_bad_lines"] = "skip"
+    # - Define reader engine
+    #   - C engine is faster
+    #   - Python engine is more feature-complete
+    reader_kwargs["engine"] = "python"
+    # - Define on-the-fly decompression of on-disk data
+    #   - Available: gzip, bz2, zip
+    reader_kwargs["compression"] = "infer"
+    # - Strings to recognize as NA/NaN and replace with standard NA flags
+    #   - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
+    #                       '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
+    #                       'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
+    reader_kwargs["na_values"] = ["na", "", "error"]
+    ##------------------------------------------------------------------------.
+    #### Read the data
+    df = read_raw_text_file(
+        filepath=filepath,
+        column_names=column_names,
+        reader_kwargs=reader_kwargs,
+        logger=logger,
+    )
+    ##------------------------------------------------------------------------.
+    #### Adapt the dataframe to adhere to DISDRODB L0 standards
+    # Split and assign columns
+    df = df["TO_PARSE"].str.split(",", n=2, expand=True)
+    df.columns = ["time", "RECORD", "TO_PARSE"]
+    # Define time in datetime format
+    df["time"] = pd.to_datetime(df["time"].str.strip('"'), format="%Y-%m-%d %H:%M:%S", errors="coerce")
+    # Keep only rows with valid number of values
+    df, n_values, invalid_timesteps = select_only_valid_rows(
+        df=df,
+        expected_n_values=[1024, 1054, 1086],
+        logger=logger,
+        filepath=filepath,
+    )
+    # Derive raw drop arrays
+    if n_values == 1024:
+        df["raw_drop_number"] = df["TO_PARSE"]
+    elif n_values == 1054:
+        # VALESCURE (2014 03-09)
+        df_split = df["TO_PARSE"].str.split(",", expand=True)
+        raw_drop_average_velocity = "0,0," + df_split.iloc[:, :30].agg(",".join, axis=1)
+        raw_drop_number = df_split.iloc[:, 30:].agg(",".join, axis=1)
+        df["raw_drop_average_velocity"] = raw_drop_average_velocity
+        df["raw_drop_number"] = raw_drop_number
+        df["raw_drop_concentration"] = "NaN"
+    elif n_values == 1086:
+        df_split = df["TO_PARSE"].str.split(",", expand=True)
+        raw_drop_concentration = df_split.iloc[:, :32].agg(",".join, axis=1).str.replace("-10", "0")
+        raw_drop_average_velocity = "0,0," + df_split.iloc[:, 32:62].agg(",".join, axis=1)
+        raw_drop_number = df_split.iloc[:, 62:].agg(",".join, axis=1)
+        df["raw_drop_concentration"] = raw_drop_concentration
+        df["raw_drop_average_velocity"] = raw_drop_average_velocity
+        df["raw_drop_number"] = raw_drop_number
+    else:
+        raise ValueError(f"{filepath} has {n_values} spectrum values. Undefined reader.")
+    # Drop columns not agreeing with DISDRODB L0 standards
+    df = df.drop(columns=["TO_PARSE", "RECORD"])
+    # Infill with NaN at invalid timesteps
+    add_nan_at_invalid_timesteps(df, invalid_timesteps)
+    return df
+@is_documented_by(reader_generic_docstring)
+def reader(
+    filepath,
+    logger=None,
+):
+    """Reader."""
+    # Retrieve spectrum filepath
+    spectrum_filepath = filepath.replace("parsivel", "spectre")
+    # Read integral variables
+    df = reader_parsivel(filepath, logger=logger)
+    # Drop duplicates timesteps
+    df = df.drop_duplicates(subset="time", keep="first")
+    # Initialize empty arrays
+    # --> 0 values array produced in L0B
+    arrays_columns = ["raw_drop_concentration", "raw_drop_average_velocity", "raw_drop_number"]
+    for c in arrays_columns:
+        if c not in df:
+            df[c] = ""
+    # Add raw spectrum if available
+    if os.path.exists(spectrum_filepath):
+        # Read raw spectrum for corresponding timesteps
+        df_raw_spectrum = reader_spectrum(spectrum_filepath, logger=logger)
+        df_raw_spectrum = df_raw_spectrum.drop_duplicates(subset="time", keep="first")
+        # Add raw array to df
+        df = df.set_index("time")
+        df_raw_spectrum = df_raw_spectrum.set_index("time")
+        df.update(df_raw_spectrum)
+        # Set back time as column
+        df = df.reset_index()
+    # Return the dataframe adhering to DISDRODB L0 standards
+    return df

disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py CHANGED Viewed

@@ -107,7 +107,7 @@ def reader(
     # - The '<SPECTRUM>ZERO</SPECTRUM>' indicates no drops detected
     # --> "" generates an array of zeros in L0B processing
     df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
-    # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
+    # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
     df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
     df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")

disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py CHANGED Viewed

@@ -15,6 +15,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # -----------------------------------------------------------------------------.
+"""This reader allows to read raw data from NASA GCPEX, OLYMPEX and IPHEX campaigns."""
 import pandas as pd
 from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
@@ -34,12 +35,13 @@ def reader(
     ##------------------------------------------------------------------------.
     #### Define reader options
     reader_kwargs = {}
-    # Skip first row as columns names
-    reader_kwargs["header"] = None
-    # Skip file with encoding errors
-    reader_kwargs["encoding_errors"] = "ignore"
     # - Define delimiter
     reader_kwargs["delimiter"] = ";"
+    # - Skip first row as columns names
+    reader_kwargs["header"] = None
+    reader_kwargs["skiprows"] = 0
+    # - Skip file with encoding errors
+    reader_kwargs["encoding_errors"] = "ignore"
     # - Avoid first column to become df index !!!
     reader_kwargs["index_col"] = False
     # - Define behaviour when encountering bad lines
@@ -68,14 +70,14 @@ def reader(
     ##------------------------------------------------------------------------.
     #### Adapt the dataframe to adhere to DISDRODB L0 standards
-    # Define 'time' datetime
+    # Convert time column to datetime
     df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
     # Split the 'TO_BE_SPLITTED' column
     df = df["TO_BE_SPLITTED"].str.split(",", n=9, expand=True)
     # Assign column names
-    column_names = [
+    names = [
         "station_name",
         "sensor_status",
         "sensor_temperature",
@@ -87,7 +89,7 @@ def reader(
         "weather_code_synop_4677",
         "raw_drop_number",
     ]
-    df.columns = column_names
+    df.columns = names
     # Add the time column
     df["time"] = df_time

disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py CHANGED Viewed

@@ -115,7 +115,7 @@ def reader(
     # --> "" generates an array of zeros in L0B processing
     df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>ZERO</SPECTRUM>", "")
-    # Remove <SPECTRUM> and </SPECTRUM>" acronyms from the raw_drop_number field
+    # Remove <SPECTRUM> and </SPECTRUM> prefix and suffix from the raw_drop_number field
     df["raw_drop_number"] = df["raw_drop_number"].str.replace("<SPECTRUM>", "")
     df["raw_drop_number"] = df["raw_drop_number"].str.replace("</SPECTRUM>", "")

disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

disdrodb 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl