PyPI - disdrodb - Versions diffs - 0.0.21__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

disdrodb 0.0.21py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (264) hide show

disdrodb/__init__.py +132 -15
disdrodb/_config.py +4 -2
disdrodb/_version.py +9 -4
disdrodb/api/checks.py +264 -237
disdrodb/api/configs.py +4 -8
disdrodb/api/create_directories.py +235 -290
disdrodb/api/info.py +217 -26
disdrodb/api/io.py +295 -269
disdrodb/api/path.py +597 -173
disdrodb/api/search.py +486 -0
disdrodb/{metadata/scripts → cli}/disdrodb_check_metadata_archive.py +12 -7
disdrodb/{utils/pandas.py → cli/disdrodb_data_archive_directory.py} +9 -18
disdrodb/cli/disdrodb_download_archive.py +86 -0
disdrodb/cli/disdrodb_download_metadata_archive.py +53 -0
disdrodb/cli/disdrodb_download_station.py +84 -0
disdrodb/{api/scripts → cli}/disdrodb_initialize_station.py +22 -10
disdrodb/cli/disdrodb_metadata_archive_directory.py +32 -0
disdrodb/{data_transfer/scripts/disdrodb_download_station.py → cli/disdrodb_open_data_archive.py} +22 -22
disdrodb/cli/disdrodb_open_logs_directory.py +69 -0
disdrodb/{data_transfer/scripts/disdrodb_upload_station.py → cli/disdrodb_open_metadata_archive.py} +22 -24
disdrodb/cli/disdrodb_open_metadata_directory.py +71 -0
disdrodb/cli/disdrodb_open_product_directory.py +74 -0
disdrodb/cli/disdrodb_open_readers_directory.py +32 -0
disdrodb/{l0/scripts → cli}/disdrodb_run_l0.py +38 -31
disdrodb/{l0/scripts → cli}/disdrodb_run_l0_station.py +32 -30
disdrodb/{l0/scripts → cli}/disdrodb_run_l0a.py +30 -21
disdrodb/{l0/scripts → cli}/disdrodb_run_l0a_station.py +24 -33
disdrodb/{l0/scripts → cli}/disdrodb_run_l0b.py +30 -21
disdrodb/{l0/scripts → cli}/disdrodb_run_l0b_station.py +25 -34
disdrodb/cli/disdrodb_run_l0c.py +130 -0
disdrodb/cli/disdrodb_run_l0c_station.py +129 -0
disdrodb/cli/disdrodb_run_l1.py +122 -0
disdrodb/cli/disdrodb_run_l1_station.py +121 -0
disdrodb/cli/disdrodb_run_l2e.py +122 -0
disdrodb/cli/disdrodb_run_l2e_station.py +122 -0
disdrodb/cli/disdrodb_run_l2m.py +122 -0
disdrodb/cli/disdrodb_run_l2m_station.py +122 -0
disdrodb/cli/disdrodb_upload_archive.py +105 -0
disdrodb/cli/disdrodb_upload_station.py +98 -0
disdrodb/configs.py +90 -25
disdrodb/data_transfer/__init__.py +22 -0
disdrodb/data_transfer/download_data.py +87 -90
disdrodb/data_transfer/upload_data.py +64 -37
disdrodb/data_transfer/zenodo.py +15 -18
disdrodb/docs.py +1 -1
disdrodb/issue/__init__.py +17 -4
disdrodb/issue/checks.py +10 -23
disdrodb/issue/reader.py +9 -12
disdrodb/issue/writer.py +14 -17
disdrodb/l0/__init__.py +17 -26
disdrodb/l0/check_configs.py +35 -23
disdrodb/l0/check_standards.py +32 -42
disdrodb/l0/configs/{Thies_LPM → LPM}/bins_diameter.yml +44 -44
disdrodb/l0/configs/{Thies_LPM → LPM}/bins_velocity.yml +40 -40
disdrodb/l0/configs/LPM/l0a_encodings.yml +80 -0
disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_cf_attrs.yml +62 -59
disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_encodings.yml +9 -9
disdrodb/l0/configs/{Thies_LPM → LPM}/raw_data_format.yml +245 -245
disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_diameter.yml +66 -66
disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_velocity.yml +64 -64
disdrodb/l0/configs/PARSIVEL/l0a_encodings.yml +32 -0
disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_cf_attrs.yml +22 -20
disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_encodings.yml +17 -17
disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/raw_data_format.yml +77 -77
disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_diameter.yml +64 -64
disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_velocity.yml +64 -64
disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +39 -0
disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_cf_attrs.yml +24 -22
disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_encodings.yml +20 -20
disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/raw_data_format.yml +98 -98
disdrodb/l0/configs/{RD_80 → RD80}/bins_diameter.yml +40 -40
disdrodb/l0/configs/RD80/l0a_encodings.yml +16 -0
disdrodb/l0/configs/{RD_80 → RD80}/l0b_cf_attrs.yml +3 -3
disdrodb/l0/configs/RD80/l0b_encodings.yml +135 -0
disdrodb/l0/configs/{RD_80 → RD80}/raw_data_format.yml +48 -48
disdrodb/l0/l0_reader.py +216 -340
disdrodb/l0/l0a_processing.py +237 -208
disdrodb/l0/l0b_nc_processing.py +227 -80
disdrodb/l0/l0b_processing.py +93 -173
disdrodb/l0/l0c_processing.py +627 -0
disdrodb/l0/readers/{ARM → LPM/ARM}/ARM_LPM.py +36 -58
disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +226 -0
disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +185 -0
disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +183 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +179 -0
disdrodb/l0/readers/{UK → LPM/UK}/DIVEN.py +14 -35
disdrodb/l0/readers/PARSIVEL/AUSTRALIA/MELBOURNE_2007_PARSIVEL.py +157 -0
disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +113 -0
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/ARCTIC_2021.py +40 -57
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/COMMON_2011.py +37 -54
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/DAVOS_2009_2011.py +34 -51
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_2009.py +34 -51
disdrodb/l0/readers/{EPFL/PARADISO_2014.py → PARSIVEL/EPFL/EPFL_ROOF_2008.py} +38 -50
disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +105 -0
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2011.py +34 -51
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2012.py +33 -51
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GENEPI_2007.py +25 -44
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007.py +25 -44
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007_2.py +25 -44
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HPICONET_2010.py +34 -51
disdrodb/l0/readers/{EPFL/EPFL_ROOF_2010.py → PARSIVEL/EPFL/HYMEX_LTE_SOP2.py} +37 -50
disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +111 -0
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HYMEX_LTE_SOP4.py +36 -54
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2018.py +34 -52
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2019.py +38 -56
disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +105 -0
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PARSIVEL_2007.py +27 -45
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PLATO_2019.py +24 -44
disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +140 -0
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RACLETS_2019_WJF.py +41 -59
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RIETHOLZBACH_2011.py +34 -51
disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +117 -0
disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +137 -0
disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/UNIL_2022.py +42 -55
disdrodb/l0/readers/PARSIVEL/GPM/IFLOODS.py +104 -0
disdrodb/l0/readers/{GPM → PARSIVEL/GPM}/LPVEX.py +29 -48
disdrodb/l0/readers/PARSIVEL/GPM/MC3E.py +184 -0
disdrodb/l0/readers/PARSIVEL/NCAR/CCOPE_2015.py +113 -0
disdrodb/l0/readers/{NCAR/VORTEX_SE_2016_P1.py → PARSIVEL/NCAR/OWLES_MIPS.py} +46 -72
disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +125 -0
disdrodb/l0/readers/{NCAR/OWLES_MIPS.py → PARSIVEL/NCAR/PLOWS_MIPS.py} +45 -64
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +114 -0
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +176 -0
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +183 -0
disdrodb/l0/readers/{ARM/ARM_LD.py → PARSIVEL2/ARM/ARM_PARSIVEL2.py} +27 -50
disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +163 -0
disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +163 -0
disdrodb/l0/readers/{DENMARK → PARSIVEL2/DENMARK}/EROSION_nc.py +14 -35
disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +119 -0
disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +104 -0
disdrodb/l0/readers/PARSIVEL2/GPM/NSSTC.py +176 -0
disdrodb/l0/readers/PARSIVEL2/ITALY/GID_PARSIVEL2.py +32 -0
disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +56 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +120 -0
disdrodb/l0/readers/{NCAR → PARSIVEL2/NCAR}/PECAN_MIPS.py +45 -64
disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +181 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +160 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +160 -0
disdrodb/l0/readers/{NCAR/PLOWS_MIPS.py → PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py} +49 -66
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +118 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +152 -0
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT.py +166 -0
disdrodb/l0/readers/{NCAR/RELAMPAGO_RD80.py → RD80/BRAZIL/CHUVA_RD80.py} +36 -60
disdrodb/l0/readers/{BRAZIL → RD80/BRAZIL}/GOAMAZON_RD80.py +36 -55
disdrodb/l0/readers/{NCAR → RD80/NCAR}/CINDY_2011_RD80.py +35 -54
disdrodb/l0/readers/{BRAZIL/CHUVA_RD80.py → RD80/NCAR/RELAMPAGO_RD80.py} +40 -54
disdrodb/l0/readers/template_reader_raw_netcdf_data.py +62 -0
disdrodb/l0/readers/{reader_template.py → template_reader_raw_text_data.py} +20 -44
disdrodb/l0/routines.py +885 -581
disdrodb/l0/standards.py +72 -236
disdrodb/l0/template_tools.py +104 -109
disdrodb/l1/__init__.py +17 -0
disdrodb/l1/beard_model.py +716 -0
disdrodb/l1/encoding_attrs.py +620 -0
disdrodb/l1/fall_velocity.py +260 -0
disdrodb/l1/filters.py +192 -0
disdrodb/l1/processing.py +200 -0
disdrodb/l1/resampling.py +236 -0
disdrodb/l1/routines.py +357 -0
disdrodb/l1_env/__init__.py +17 -0
disdrodb/l1_env/routines.py +38 -0
disdrodb/l2/__init__.py +17 -0
disdrodb/l2/empirical_dsd.py +1735 -0
disdrodb/l2/event.py +388 -0
disdrodb/l2/processing.py +519 -0
disdrodb/l2/processing_options.py +213 -0
disdrodb/l2/routines.py +868 -0
disdrodb/metadata/__init__.py +9 -2
disdrodb/metadata/checks.py +165 -118
disdrodb/metadata/download.py +81 -0
disdrodb/metadata/geolocation.py +146 -0
disdrodb/metadata/info.py +20 -13
disdrodb/metadata/manipulation.py +1 -1
disdrodb/metadata/reader.py +59 -8
disdrodb/metadata/search.py +77 -144
disdrodb/metadata/standards.py +7 -8
disdrodb/metadata/writer.py +8 -14
disdrodb/psd/__init__.py +38 -0
disdrodb/psd/fitting.py +2146 -0
disdrodb/psd/models.py +774 -0
disdrodb/routines.py +1176 -0
disdrodb/scattering/__init__.py +28 -0
disdrodb/scattering/axis_ratio.py +344 -0
disdrodb/scattering/routines.py +456 -0
disdrodb/utils/__init__.py +17 -0
disdrodb/utils/attrs.py +208 -0
disdrodb/utils/cli.py +269 -0
disdrodb/utils/compression.py +60 -42
disdrodb/utils/dask.py +62 -0
disdrodb/utils/decorators.py +110 -0
disdrodb/utils/directories.py +107 -46
disdrodb/utils/encoding.py +127 -0
disdrodb/utils/list.py +29 -0
disdrodb/utils/logger.py +168 -46
disdrodb/utils/time.py +657 -0
disdrodb/utils/warnings.py +30 -0
disdrodb/utils/writer.py +57 -0
disdrodb/utils/xarray.py +138 -47
disdrodb/utils/yaml.py +0 -1
disdrodb/viz/__init__.py +17 -0
disdrodb/viz/plots.py +17 -0
disdrodb-0.1.0.dist-info/METADATA +321 -0
disdrodb-0.1.0.dist-info/RECORD +216 -0
{disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info}/WHEEL +1 -1
disdrodb-0.1.0.dist-info/entry_points.txt +30 -0
disdrodb/data_transfer/scripts/disdrodb_download_archive.py +0 -53
disdrodb/data_transfer/scripts/disdrodb_upload_archive.py +0 -57
disdrodb/l0/configs/OTT_Parsivel/l0a_encodings.yml +0 -32
disdrodb/l0/configs/OTT_Parsivel2/l0a_encodings.yml +0 -39
disdrodb/l0/configs/RD_80/l0a_encodings.yml +0 -16
disdrodb/l0/configs/RD_80/l0b_encodings.yml +0 -135
disdrodb/l0/configs/Thies_LPM/l0a_encodings.yml +0 -80
disdrodb/l0/io.py +0 -257
disdrodb/l0/l0_processing.py +0 -1091
disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_OTT.py +0 -178
disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_THIES.py +0 -247
disdrodb/l0/readers/BRAZIL/CHUVA_LPM.py +0 -204
disdrodb/l0/readers/BRAZIL/CHUVA_OTT.py +0 -183
disdrodb/l0/readers/BRAZIL/GOAMAZON_LPM.py +0 -204
disdrodb/l0/readers/BRAZIL/GOAMAZON_OTT.py +0 -183
disdrodb/l0/readers/CHINA/CHONGQING.py +0 -131
disdrodb/l0/readers/EPFL/EPFL_ROOF_2008.py +0 -128
disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP2.py +0 -127
disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP3.py +0 -129
disdrodb/l0/readers/EPFL/RACLETS_2019.py +0 -158
disdrodb/l0/readers/EPFL/SAMOYLOV_2017.py +0 -136
disdrodb/l0/readers/EPFL/SAMOYLOV_2019.py +0 -158
disdrodb/l0/readers/FRANCE/SIRTA_OTT2.py +0 -138
disdrodb/l0/readers/GPM/GCPEX.py +0 -123
disdrodb/l0/readers/GPM/IFLOODS.py +0 -123
disdrodb/l0/readers/GPM/MC3E.py +0 -123
disdrodb/l0/readers/GPM/NSSTC.py +0 -164
disdrodb/l0/readers/ITALY/GID.py +0 -199
disdrodb/l0/readers/MEXICO/OH_IIUNAM_nc.py +0 -92
disdrodb/l0/readers/NCAR/CCOPE_2015.py +0 -133
disdrodb/l0/readers/NCAR/PECAN_FP3.py +0 -137
disdrodb/l0/readers/NCAR/PECAN_MOBILE.py +0 -144
disdrodb/l0/readers/NCAR/RELAMPAGO_OTT.py +0 -195
disdrodb/l0/readers/NCAR/SNOWIE_PJ.py +0 -172
disdrodb/l0/readers/NCAR/SNOWIE_SB.py +0 -179
disdrodb/l0/readers/NCAR/VORTEX2_2009.py +0 -133
disdrodb/l0/readers/NCAR/VORTEX2_2010.py +0 -188
disdrodb/l0/readers/NCAR/VORTEX2_2010_UF.py +0 -191
disdrodb/l0/readers/NCAR/VORTEX_SE_2016_P2.py +0 -135
disdrodb/l0/readers/NCAR/VORTEX_SE_2016_PIPS.py +0 -170
disdrodb/l0/readers/NETHERLANDS/DELFT.py +0 -187
disdrodb/l0/readers/SPAIN/SBEGUERIA.py +0 -179
disdrodb/l0/scripts/disdrodb_run_l0b_concat.py +0 -93
disdrodb/l0/scripts/disdrodb_run_l0b_concat_station.py +0 -85
disdrodb/utils/netcdf.py +0 -452
disdrodb/utils/scripts.py +0 -102
disdrodb-0.0.21.dist-info/AUTHORS.md +0 -18
disdrodb-0.0.21.dist-info/METADATA +0 -186
disdrodb-0.0.21.dist-info/RECORD +0 -168
disdrodb-0.0.21.dist-info/entry_points.txt +0 -15
/disdrodb/l0/configs/{RD_80 → RD80}/bins_velocity.yml +0 -0
/disdrodb/l0/manuals/{Thies_LPM.pdf → LPM.pdf} +0 -0
/disdrodb/l0/manuals/{ODM_470.pdf → ODM470.pdf} +0 -0
/disdrodb/l0/manuals/{OTT_Parsivel.pdf → PARSIVEL.pdf} +0 -0
/disdrodb/l0/manuals/{OTT_Parsivel2.pdf → PARSIVEL2.pdf} +0 -0
/disdrodb/l0/manuals/{PWS_100.pdf → PWS100.pdf} +0 -0
/disdrodb/l0/manuals/{RD_80.pdf → RD80.pdf} +0 -0
{disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info/licenses}/LICENSE +0 -0
{disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info}/top_level.txt +0 -0

disdrodb/l0/l0a_processing.py CHANGED Viewed

@@ -19,7 +19,6 @@
 """Functions to process raw text files into DISDRODB L0A Apache Parquet."""
-import inspect
 import logging
 import os
 from typing import Union
@@ -39,7 +38,6 @@ from disdrodb.utils.directories import create_directory, remove_if_exists
 # Logger
 from disdrodb.utils.logger import (
-    log_debug,
     log_error,
     log_info,
     log_warning,
@@ -55,7 +53,7 @@ pd.set_option("mode.chained_assignment", None)  # Avoid SettingWithCopyWarning
 #### Raw file readers
-def _preprocess_reader_kwargs(reader_kwargs: dict) -> dict:
+def preprocess_reader_kwargs(reader_kwargs: dict) -> dict:
     """Preprocess arguments required to read raw text file into Pandas.
     Parameters
@@ -86,10 +84,20 @@ def _preprocess_reader_kwargs(reader_kwargs: dict) -> dict:
     return reader_kwargs
-def read_raw_file(
+def check_matching_column_number(df, column_names):
+    """Check the number of columns in the dataframe matches the length of column names."""
+    n_columns = len(df.columns)
+    n_expected_columns = len(column_names)
+    if n_columns != n_expected_columns:
+        msg = f"The dataframe has {n_columns} columns, while {n_expected_columns} are expected !."
+        raise ValueError(msg)
+def read_raw_text_file(
     filepath: str,
     column_names: list,
     reader_kwargs: dict,
+    logger=None,  # noqa
 ) -> pd.DataFrame:
     """Read a raw file into a dataframe.
@@ -100,7 +108,12 @@ def read_raw_file(
     column_names : list
         Column names.
     reader_kwargs : dict
-        Pandas pd.read_csv arguments.
+        Pandas ``pd.read_csv`` arguments.
+    logger : logging.Logger
+        Logger object.
+        The default is ``None``.
+        If ``None``, the logger is created using the module name.
+        If ``logger`` is passed, it will be used to log messages.
     Returns
     -------
@@ -108,7 +121,7 @@ def read_raw_file(
         Pandas dataframe.
     """
     # Preprocess reader_kwargs
-    reader_kwargs = _preprocess_reader_kwargs(reader_kwargs)
+    reader_kwargs = preprocess_reader_kwargs(reader_kwargs)
     # Enforce all raw files columns with dtype = 'object'
     dtype = "object"
@@ -117,8 +130,17 @@ def read_raw_file(
     try:
         df = pd.read_csv(filepath, names=column_names, dtype=dtype, **reader_kwargs)
     except pd.errors.EmptyDataError:
-        msg = f" - Is empty, skip file: {filepath}"
-        log_warning(logger=logger, msg=msg, verbose=False)
+        msg = f"The following file is empty: {filepath}"
+        raise ValueError(msg)
+    # Check the dataframe is not empty
+    if len(df.index) == 0:
+        msg = f"The following file is empty: {filepath}"
+        raise ValueError(msg)
+    # Check dataframe column number matches columns_names
+    if column_names is not None:
+        check_matching_column_number(df, column_names)
     # Return dataframe
     return df
@@ -128,45 +150,19 @@ def read_raw_file(
 #### L0A checks and homogenization
-def _check_df_sanitizer_fun(df_sanitizer_fun):
-    """Check the argument of df_sanitizer_fun is only df."""
-    if df_sanitizer_fun is None:
-        return None
-    if not callable(df_sanitizer_fun):
-        raise ValueError("'df_sanitizer_fun' must be a function.")
-    if not np.all(np.isin(inspect.getfullargspec(df_sanitizer_fun).args, ["df"])):
-        raise ValueError("The `df_sanitizer_fun` must have only `df` as input argument!")
-def _check_not_empty_dataframe(df, verbose=False):
-    if len(df.index) == 0:
-        msg = " - The file is empty and has been skipped."
-        log_error(logger=logger, msg=msg, verbose=False)
-        raise ValueError(msg)
-def _check_matching_column_number(df, column_names, verbose=False):
-    n_columns = len(df.columns)
-    n_expected_columns = len(column_names)
-    if n_columns != n_expected_columns:
-        msg = f" - The dataframe has {n_columns} columns, while {n_expected_columns} are expected !."
-        log_error(logger, msg, verbose)
-        raise ValueError(msg)
-def remove_rows_with_missing_time(df: pd.DataFrame, verbose: bool = False):
-    """Remove dataframe rows where the "time" is NaT.
+def remove_rows_with_missing_time(df: pd.DataFrame, logger=logger, verbose: bool = False):
+    """Remove dataframe rows where the ``"time"`` is ``NaT``.
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     verbose : bool
-        Whether to verbose the processing.
+        Whether to verbose the processing. The default is ``False``.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe with valid timesteps.
     """
     # Get the number of rows of the dataframe
@@ -175,32 +171,31 @@ def remove_rows_with_missing_time(df: pd.DataFrame, verbose: bool = False):
     df = df.dropna(subset="time", axis=0)
     # If no valid timesteps, raise error
     if len(df.index) == 0:
-        msg = " - There are not valid timestep."
-        log_error(logger=logger, msg=msg, verbose=False)
+        msg = "There are not valid timestep."
         raise ValueError(msg)
     # Otherwise, report the number of invalid timesteps
     n_invalid_timesteps = n_rows - len(df)
     if n_invalid_timesteps > 0:
-        msg = f" - {n_invalid_timesteps} rows had invalid timesteps and were discarded."
+        msg = f"{n_invalid_timesteps} rows had invalid timesteps and were discarded."
         log_warning(logger=logger, msg=msg, verbose=verbose)
     return df
-def remove_duplicated_timesteps(df: pd.DataFrame, verbose: bool = False):
+def remove_duplicated_timesteps(df: pd.DataFrame, logger=None, verbose: bool = False):
     """Remove duplicated timesteps.
     It keep only the first timestep occurrence !
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     verbose : bool
-        Whether to verbose the processing.
+        Whether to verbose the processing. The default is ``False``.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe with valid unique timesteps.
     """
     values, counts = np.unique(df["time"], return_counts=True)
@@ -208,11 +203,13 @@ def remove_duplicated_timesteps(df: pd.DataFrame, verbose: bool = False):
     values_duplicates = values[idx_duplicates].astype("M8[s]")
     # If there are duplicated timesteps
     if len(values_duplicates) > 0:
+        # TODO: raise error if duplicated timesteps have different values !
         # Drop duplicated timesteps (keeping the first occurrence)
         df = df.drop_duplicates(subset="time", keep="first")
         # Report the values of duplicated timesteps
         msg = (
-            f" - The following timesteps occurred more than once: {values_duplicates}. Only the first occurrence"
+            f"The following timesteps occurred more than once: {values_duplicates}. Only the first occurrence"
             " selected."
         )
         log_warning(logger=logger, msg=msg, verbose=verbose)
@@ -225,13 +222,12 @@ def drop_timesteps(df, timesteps):
     # Check there are row left
     if len(df) == 0:
         msg = "No rows left after removing problematic timesteps. Maybe you need to adjust the issue YAML file."
-        log_warning(logger=logger, msg=msg, verbose=False)
         raise ValueError(msg)
     return df
 def drop_time_periods(df, time_periods):
-    """Drop problematic time_period."""
+    """Drop problematic time periods."""
     for time_period in time_periods:
         if len(df) > 0:
             start_time = time_period[0]
@@ -240,25 +236,26 @@ def drop_time_periods(df, time_periods):
     # Check there are row left
     if len(df) == 0:
         msg = "No rows left after removing problematic time_periods. Maybe you need to adjust the issue YAML file."
-        log_warning(logger=logger, msg=msg, verbose=False)
         raise ValueError(msg)
     return df
-def remove_issue_timesteps(df, issue_dict, verbose=False):
+def remove_issue_timesteps(df, issue_dict, logger=None, verbose=False):
     """Drop dataframe rows with timesteps listed in the issue dictionary.
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     issue_dict : dict
-        Issue dictionary
+        Issue dictionary.
+    verbose : bool
+        Whether to verbose the processing. The default is ``False``.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe with problematic timesteps removed.
     """
@@ -286,24 +283,21 @@ def remove_issue_timesteps(df, issue_dict, verbose=False):
     return df
-def cast_column_dtypes(df: pd.DataFrame, sensor_name: str, verbose: bool = False) -> pd.DataFrame:
-    """Convert 'object' dataframe columns into DISDRODB L0A dtype standards.
+def cast_column_dtypes(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame:
+    """Convert ``'object'`` dataframe columns into DISDRODB L0A dtype standards.
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     sensor_name : str
         Name of the sensor.
-    verbose : bool
-        Whether to verbose the processing.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe with corrected columns types.
     """
     # Cast dataframe to dtypes
     dtype_dict = get_l0a_dtype(sensor_name)
     # Ensure time column is saved with seconds resolution
@@ -321,26 +315,23 @@ def cast_column_dtypes(df: pd.DataFrame, sensor_name: str, verbose: bool = False
             df[column] = df[column].astype(dtype_dict[column])
         except ValueError as e:
             msg = f"ValueError: The column {column} has {e}"
-            log_error(logger=logger, msg=msg, verbose=False)
             raise ValueError(msg)
     return df
-def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str, verbose: bool = False) -> pd.DataFrame:
-    """Coerce corrupted values in dataframe numeric columns to np.nan.
+def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame:
+    """Coerce corrupted values in dataframe numeric columns to ``np.nan``.
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     sensor_name : str
         Name of the sensor.
-    verbose : bool
-        Whether to verbose the processing.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe with string columns without corrupted values.
     """
     # Cast dataframe to dtypes
@@ -359,21 +350,19 @@ def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str, verbose:
     return df
-def strip_string_spaces(df: pd.DataFrame, sensor_name: str, verbose: bool = False) -> pd.DataFrame:
+def strip_string_spaces(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame:
     """Strip leading/trailing spaces from dataframe string columns.
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     sensor_name : str
         Name of the sensor.
-    verbose : bool
-        Whether to verbose the processing.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe with string columns without leading/trailing spaces.
     """
     # Cast dataframe to dtypes
@@ -390,13 +379,13 @@ def strip_string_spaces(df: pd.DataFrame, sensor_name: str, verbose: bool = Fals
             try:
                 df[column] = df[column].str.strip()
             except AttributeError:
-                msg = f"AttributeError: The column {column} is not a string/object dtype."
-                log_error(logger=logger, msg=msg, verbose=False)
+                msg = f"The column {column} is not a string/object dtype."
                 raise AttributeError(msg)
     return df
-def _strip_delimiter(string):
+def strip_delimiter(string):
+    """Remove the first and last delimiter occurrence from a string."""
     if not isinstance(string, str):
         return string
     split_str = infer_split_str(string=string)
@@ -415,12 +404,12 @@ def strip_delimiter_from_raw_arrays(df):
     available_fields = list(df.columns[np.isin(df.columns, possible_fields)])
     # Loop over the fields and strip away the delimiter
     for field in available_fields:
-        df[field] = df[field].apply(_strip_delimiter)
+        df[field] = df[field].apply(strip_delimiter)
     # Return the dataframe
     return df
-def _is_not_corrupted(string):
+def is_raw_array_string_not_corrupted(string):
     """Check if the raw array is corrupted."""
     if not isinstance(string, str):
         return False
@@ -445,32 +434,32 @@ def remove_corrupted_rows(df):
     # Loop over the fields and remove corrupted ones
     for field in available_fields:
         if len(df) != 0:
-            df = df[df[field].apply(_is_not_corrupted)]
+            df = df[df[field].apply(is_raw_array_string_not_corrupted)]
     # Check if there are rows left
     if len(df) == 0:
         raise ValueError("No remaining rows after data corruption checks.")
     # If only one row available, raise also error
     if len(df) == 1:
-        raise ValueError("Only 1 row remains after data corruption checks. Check the file.")
+        raise ValueError("Only 1 row remains after data corruption checks. Check the raw file and maybe delete it.")
     # Return the dataframe
     return df
-def replace_nan_flags(df, sensor_name, verbose):
-    """Set values corresponding to nan_flags to np.nan.
+def replace_nan_flags(df, sensor_name, logger=None, verbose=False):
+    """Set values corresponding to ``nan_flags`` to ``np.nan``.
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     sensor_name : str
         Name of the sensor.
     verbose : bool
-        Whether to verbose the processing.
+        Whether to verbose the processing. The default is ``False``.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe without nan_flags values.
     """
     # Get dictionary of nan flags
@@ -486,26 +475,26 @@ def replace_nan_flags(df, sensor_name, verbose):
             if n_nan_flags_values > 0:
                 msg = f"In variable {var}, {n_nan_flags_values} values were nan_flags and were replaced to np.nan."
                 log_info(logger=logger, msg=msg, verbose=verbose)
-                df[var][is_a_nan_flag] = np.nan
+                df.loc[is_a_nan_flag, var] = np.nan
     # Return dataframe
     return df
-def set_nan_outside_data_range(df, sensor_name, verbose):
-    """Set values outside the data range as np.nan.
+def set_nan_outside_data_range(df, sensor_name, logger=None, verbose=False):
+    """Set values outside the data range as ``np.nan``.
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     sensor_name : str
         Name of the sensor.
     verbose : bool
-        Whether to verbose the processing.
+        Whether to verbose the processing. The default is ``False``.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe without values outside the expected data range.
     """
     # Get dictionary of data_range
@@ -530,21 +519,21 @@ def set_nan_outside_data_range(df, sensor_name, verbose):
     return df
-def set_nan_invalid_values(df, sensor_name, verbose):
-    """Set invalid (class) values to np.nan.
+def set_nan_invalid_values(df, sensor_name, logger=None, verbose=False):
+    """Set invalid (class) values to ``np.nan``.
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     sensor_name : str
         Name of the sensor.
     verbose : bool
-        Whether to verbose the processing.
+        Whether to verbose the processing. The default is ``False``.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe without invalid values.
     """
     # Get dictionary of valid values
@@ -566,14 +555,12 @@ def set_nan_invalid_values(df, sensor_name, verbose):
     return df
-def process_raw_file(
-    filepath,
-    column_names,
-    reader_kwargs,
-    df_sanitizer_fun,
+def sanitize_df(
+    df,
     sensor_name,
     verbose=True,
-    issue_dict={},
+    issue_dict=None,
+    logger=None,
 ):
     """Read and parse a raw text files into a L0A dataframe.
@@ -581,63 +568,41 @@ def process_raw_file(
     ----------
     filepath : str
         File path
-    column_names : list
-        Columns names.
-    reader_kwargs : dict
-         Pandas `read_csv` arguments.
-    df_sanitizer_fun : object, optional
-        Sanitizer function to format the datafame.
     sensor_name : str
         Name of the sensor.
     verbose : bool
-        Whether to verbose the processing.
-        The default is True
+        Whether to verbose the processing. The default is ``True``.
     issue_dict : dict
         Issue dictionary providing information on timesteps to remove.
-        The default is an empty dictionary {}.
-        Valid issue_dict key are 'timesteps' and 'time_periods'.
+        The default is an empty dictionary ``{}``.
+        Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
         Valid issue_dict values are list of datetime64 values (with second accuracy).
-        To correctly format and check the validity of the issue_dict, use
-        the disdrodb.l0.issue.check_issue_dict function.
+        To correctly format and check the validity of the ``issue_dict``, use
+        the ``disdrodb.l0.issue.check_issue_dict`` function.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe
     """
-    _check_df_sanitizer_fun(df_sanitizer_fun)
-    # Read the data
-    df = read_raw_file(
-        filepath=filepath,
-        column_names=column_names,
-        reader_kwargs=reader_kwargs,
-    )
-    # - Check if file empty
-    _check_not_empty_dataframe(df=df, verbose=verbose)
-    # - Check dataframe column number matches columns_names
-    _check_matching_column_number(df, column_names, verbose=False)
-    # - Sanitize the dataframe with a custom function
-    if df_sanitizer_fun is not None:
-        df = df_sanitizer_fun(df)
+    # Define the issue dictionary
+    # - If None, set to empty dictionary
+    issue_dict = {} if issue_dict is None else issue_dict
     # - Remove rows with time NaT
-    df = remove_rows_with_missing_time(df, verbose=verbose)
+    df = remove_rows_with_missing_time(df, logger=logger, verbose=verbose)
     # - Remove duplicated timesteps
-    df = remove_duplicated_timesteps(df, verbose=verbose)
+    df = remove_duplicated_timesteps(df, logger=logger, verbose=verbose)
     # - Filter out problematic tiemsteps reported in the issue YAML file
-    df = remove_issue_timesteps(df, issue_dict=issue_dict, verbose=verbose)
+    df = remove_issue_timesteps(df, issue_dict=issue_dict, logger=logger, verbose=verbose)
     # - Coerce numeric columns corrupted values to np.nan
-    df = coerce_corrupted_values_to_nan(df, sensor_name=sensor_name, verbose=verbose)
+    df = coerce_corrupted_values_to_nan(df, sensor_name=sensor_name)
     # - Strip trailing/leading space from string columns
-    df = strip_string_spaces(df, sensor_name=sensor_name, verbose=verbose)
+    df = strip_string_spaces(df, sensor_name=sensor_name)
     # - Strip first and last delimiter from the raw arrays
     df = strip_delimiter_from_raw_arrays(df)
@@ -646,16 +611,19 @@ def process_raw_file(
     df = remove_corrupted_rows(df)
     # - Cast dataframe to dtypes
-    df = cast_column_dtypes(df, sensor_name=sensor_name, verbose=verbose)
+    df = cast_column_dtypes(df, sensor_name=sensor_name)
     # - Replace nan flags values with np.nans
-    df = replace_nan_flags(df, sensor_name=sensor_name, verbose=verbose)
+    df = replace_nan_flags(df, sensor_name=sensor_name, logger=logger, verbose=verbose)
     # - Set values outside the data range to np.nan
-    df = set_nan_outside_data_range(df, sensor_name=sensor_name, verbose=verbose)
+    df = set_nan_outside_data_range(df, sensor_name=sensor_name, logger=logger, verbose=verbose)
     # - Replace invalid values with np.nan
-    df = set_nan_invalid_values(df, sensor_name=sensor_name, verbose=verbose)
+    df = set_nan_invalid_values(df, sensor_name=sensor_name, logger=logger, verbose=verbose)
+    # - Sort by time
+    df = df.sort_values("time")
     # ------------------------------------------------------.
     # - Check column names agrees to DISDRODB standards
@@ -677,23 +645,23 @@ def write_l0a(
     df: pd.DataFrame,
     filepath: str,
     force: bool = False,
+    logger=None,
     verbose: bool = False,
 ):
     """Save the dataframe into an Apache Parquet file.
     Parameters
     ----------
-    df : pd.DataFrame
+    df : pandas.DataFrame
         Input dataframe.
     filepath : str
         Output file path.
     force : bool, optional
         Whether to overwrite existing data.
-        If True, overwrite existing data into destination directories.
-        If False, raise an error if there are already data into destination directories. This is the default.
+        If ``True``, overwrite existing data into destination directories.
+        If ``False``, raise an error if there are already data into destination directories. This is the default.
     verbose : bool, optional
-        Whether to verbose the processing.
-        The default is False.
+        Whether to verbose the processing. The default is ``False``.
     Raises
     ------
@@ -702,7 +670,6 @@ def write_l0a(
     NotImplementedError
         The input dataframe can not be processed.
     """
     # -------------------------------------------------------------------------.
     # Create station directory if does not exist
     create_directory(os.path.dirname(filepath))
@@ -710,7 +677,7 @@ def write_l0a(
     # Check if the file already exists
     # - If force=True --> Remove it
     # - If force=False --> Raise error
-    remove_if_exists(filepath, force=force)
+    remove_if_exists(filepath, force=force, logger=logger)
     # -------------------------------------------------------------------------.
     # Define writing options
@@ -727,20 +694,18 @@ def write_l0a(
             row_group_size=row_group_size,
         )
         msg = f"The Pandas Dataframe has been written as an Apache Parquet file to {filepath}."
-        log_info(logger=logger, msg=msg, verbose=False)
+        log_info(logger=logger, msg=msg, verbose=verbose)
     except Exception as e:
-        msg = f" - The Pandas DataFrame cannot be written as an Apache Parquet file. The error is: \n {e}."
-        log_error(logger=logger, msg=msg, verbose=False)
+        msg = f"The Pandas DataFrame cannot be written as an Apache Parquet file. The error is: \n {e}."
         raise ValueError(msg)
     # -------------------------------------------------------------------------.
-    return None
-####---------------------------------------------------------------------------.
-#### L0A Utility
+####--------------------------------------------------------------------------.
+#### DISDRODB L0A product reader
-def concatenate_dataframe(list_df: list, verbose: bool = False) -> pd.DataFrame:
+def concatenate_dataframe(list_df: list, logger=None, verbose: bool = False) -> pd.DataFrame:
     """Concatenate a list of dataframes.
     Parameters
@@ -748,12 +713,12 @@ def concatenate_dataframe(list_df: list, verbose: bool = False) -> pd.DataFrame:
     list_df : list
         List of dataframes.
     verbose : bool, optional
-        If True, print messages.
-        If False, no print.
+        If ``True``, print messages.
+        If ``False``, no print.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Concatenated dataframe.
     Raises
@@ -769,39 +734,111 @@ def concatenate_dataframe(list_df: list, verbose: bool = False) -> pd.DataFrame:
         return df
     # Log
-    msg = " - Concatenation of dataframes started."
-    log_info(logger, msg, verbose)
+    msg = "Concatenation of dataframes started."
+    log_info(logger=logger, msg=msg, verbose=verbose)
     # Concatenate the dataframe
     try:
         df = pd.concat(list_df, axis=0, ignore_index=True)
-        # Drop duplicated values
-        df = df.drop_duplicates(subset="time")
         # Sort by increasing time
         df = df.sort_values(by="time")
     except (AttributeError, TypeError) as e:
-        msg = f" - Can not concatenate the files. \n Error: {e}"
-        log_error(logger=logger, msg=msg, verbose=False)
+        msg = f"Can not concatenate the files. \n Error: {e}"
         raise ValueError(msg)
     # Log
-    msg = " - Concatenation of dataframes has finished."
-    log_info(logger, msg, verbose)
+    msg = "Concatenation of dataframes has finished."
+    log_info(logger=logger, msg=msg, verbose=verbose)
     # Return dataframe
     return df
-def read_raw_files(
+def _read_l0a(filepath: str, verbose: bool = False, logger=None, debugging_mode: bool = False) -> pd.DataFrame:
+    # Log
+    msg = f"Reading L0 Apache Parquet file at {filepath} started."
+    log_info(logger=logger, msg=msg, verbose=verbose)
+    # Open file
+    df = pd.read_parquet(filepath)
+    if debugging_mode:
+        df = df.iloc[0:100]
+    # Log
+    msg = f"Reading L0 Apache Parquet file at {filepath} ended."
+    log_info(logger=logger, msg=msg, verbose=verbose)
+    return df
+def read_l0a_dataframe(
+    filepaths: Union[str, list],
+    verbose: bool = False,
+    logger=None,
+    debugging_mode: bool = False,
+) -> pd.DataFrame:
+    """Read DISDRODB L0A Apache Parquet file(s).
+    Parameters
+    ----------
+    filepaths : str or list
+        Either a list or a single filepath.
+    verbose : bool
+        Whether to print detailed processing information into terminal.
+        The default is ``False``.
+    debugging_mode : bool
+        If ``True``, it reduces the amount of data to process.
+        If filepaths is a list, it reads only the first 3 files.
+        For each file it select only the first 100 rows.
+        The default is ``False``.
+    Returns
+    -------
+    pandas.DataFrame
+        L0A Dataframe.
+    """
+    from disdrodb.l0.l0a_processing import concatenate_dataframe
+    # ----------------------------------------
+    # Check filepaths validity
+    if not isinstance(filepaths, (list, str)):
+        raise TypeError("Expecting filepaths to be a string or a list of strings.")
+    # ----------------------------------------
+    # If filepath is a string, convert to list
+    if isinstance(filepaths, str):
+        filepaths = [filepaths]
+    # ---------------------------------------------------
+    # If debugging_mode=True, it reads only the first 3 filepaths
+    if debugging_mode:
+        filepaths = filepaths[0:3]  # select first 3 filepaths
+    # ---------------------------------------------------
+    # Define the list of dataframe
+    list_df = [
+        _read_l0a(filepath, verbose=verbose, logger=logger, debugging_mode=debugging_mode) for filepath in filepaths
+    ]
+    # Concatenate dataframe
+    df = concatenate_dataframe(list_df, logger=logger, verbose=verbose)
+    # Ensure time is in nanoseconds
+    df["time"] = df["time"].astype("M8[ns]")
+    # ---------------------------------------------------
+    # Return dataframe
+    return df
+####---------------------------------------------------------------------------.
+#### L0A Utility
+def read_raw_text_files(
     filepaths: Union[list, str],
-    column_names: list,
-    reader_kwargs: dict,
-    sensor_name: str,
-    verbose: bool,
-    df_sanitizer_fun: object = None,
+    reader,
+    sensor_name,
+    verbose=True,
+    logger=None,
 ) -> pd.DataFrame:
     """Read and parse a list for raw files into a dataframe.
@@ -809,20 +846,17 @@ def read_raw_files(
     ----------
     filepaths : Union[list,str]
         File(s) path(s)
-    column_names : list
-        Columns names.
-    reader_kwargs : dict
-         Pandas `read_csv` arguments.
+    reader:
+        DISDRODB reader function.
+        Format: reader(filepath, logger=None)
     sensor_name : str
         Name of the sensor.
     verbose : bool
-        Whether to verbose the processing.
-    df_sanitizer_fun : object, optional
-        Sanitizer function to format the datafame.
+        Whether to verbose the processing. The default is ``True``.
     Returns
     -------
-    pd.DataFrame
+    pandas.DataFrame
         Dataframe
     Raises
@@ -831,7 +865,6 @@ def read_raw_files(
         Input parameters can not be used or the raw file can not be processed.
     """
     # ------------------------------------------------------.
     # Check input list
     if isinstance(filepaths, str):
@@ -840,54 +873,50 @@ def read_raw_files(
         raise ValueError("'filepaths' must contains at least 1 filepath.")
     # ------------------------------------------------------.
-    ### - Loop over all raw files
+    # Loop over all raw files
     n_files = len(filepaths)
     processed_file_counter = 0
     list_skipped_files_msg = []
     list_df = []
     for filepath in filepaths:
+        # Try read the raw text file
         try:
-            # Try to process a raw file
-            df = process_raw_file(
-                filepath=filepath,
-                column_names=column_names,
-                reader_kwargs=reader_kwargs,
-                df_sanitizer_fun=df_sanitizer_fun,
+            df = reader(filepath, logger=logger)
+            # Sanitize the dataframe
+            df = sanitize_df(
+                df=df,
                 sensor_name=sensor_name,
+                logger=logger,
                 verbose=verbose,
             )
             # Append dataframe to the list
             list_df.append(df)
             # Update the logger
             processed_file_counter += 1
-            msg = f"{processed_file_counter} / {n_files} processed successfully. File name: {filepath}"
-            log_debug(logger=logger, msg=msg, verbose=verbose)
+            msg = f"Raw file '{filepath}' processed successfully ({processed_file_counter}/{n_files})."
+            log_info(logger=logger, msg=msg, verbose=verbose)
-        # If processing of raw file fails
+        # Skip the file if the processing fails
         except Exception as e:
             # Update the logger
-            msg = f" - {filepath} has been skipped. \n -- The error is: {e}."
-            log_warning(logger=logger, msg=msg, verbose=verbose)
+            msg = f"{filepath} has been skipped. The error is: {e}."
+            log_error(logger=logger, msg=msg, verbose=verbose)
             list_skipped_files_msg.append(msg)
     # Update logger
-    msg = f" - {len(list_skipped_files_msg)} of {n_files} have been skipped."
+    msg = f"{len(list_skipped_files_msg)} of {n_files} have been skipped."
     log_info(logger=logger, msg=msg, verbose=verbose)
-    logger.info("---")
-    logger.info(msg)
-    logger.info("---")
     ##----------------------------------------------------------------.
-    #### - Concatenate the dataframe
+    # Concatenate the dataframe
     if len(list_df) == 0:
-        raise ValueError(f"No dataframe to return. Impossible to parse {filepaths}.")
-    df = concatenate_dataframe(list_df, verbose=verbose)
-    # - Remove rows with duplicate timestep (keep the first)
-    df = df.drop_duplicates(subset=["time"], keep="first")
+        raise ValueError("Any raw file could be read!")
+    df = concatenate_dataframe(list_df, verbose=verbose, logger=logger)
     # ------------------------------------------------------.
+    # Enforce output time to be [ns]
+    # --> For compatibility with xarray
+    df["time"] = df["time"].astype("M8[ns]")
     # Return the dataframe
     return df

disdrodb 0.0.21__py3-none-any.whl → 0.1.0__py3-none-any.whl

disdrodb 0.0.21py3-none-any.whl → 0.1.0py3-none-any.whl