disdrodb 0.0.21__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. disdrodb/__init__.py +132 -15
  2. disdrodb/_config.py +4 -2
  3. disdrodb/_version.py +9 -4
  4. disdrodb/api/checks.py +264 -237
  5. disdrodb/api/configs.py +4 -8
  6. disdrodb/api/create_directories.py +235 -290
  7. disdrodb/api/info.py +217 -26
  8. disdrodb/api/io.py +295 -269
  9. disdrodb/api/path.py +597 -173
  10. disdrodb/api/search.py +486 -0
  11. disdrodb/{metadata/scripts → cli}/disdrodb_check_metadata_archive.py +12 -7
  12. disdrodb/{utils/pandas.py → cli/disdrodb_data_archive_directory.py} +9 -18
  13. disdrodb/cli/disdrodb_download_archive.py +86 -0
  14. disdrodb/cli/disdrodb_download_metadata_archive.py +53 -0
  15. disdrodb/cli/disdrodb_download_station.py +84 -0
  16. disdrodb/{api/scripts → cli}/disdrodb_initialize_station.py +22 -10
  17. disdrodb/cli/disdrodb_metadata_archive_directory.py +32 -0
  18. disdrodb/{data_transfer/scripts/disdrodb_download_station.py → cli/disdrodb_open_data_archive.py} +22 -22
  19. disdrodb/cli/disdrodb_open_logs_directory.py +69 -0
  20. disdrodb/{data_transfer/scripts/disdrodb_upload_station.py → cli/disdrodb_open_metadata_archive.py} +22 -24
  21. disdrodb/cli/disdrodb_open_metadata_directory.py +71 -0
  22. disdrodb/cli/disdrodb_open_product_directory.py +74 -0
  23. disdrodb/cli/disdrodb_open_readers_directory.py +32 -0
  24. disdrodb/{l0/scripts → cli}/disdrodb_run_l0.py +38 -31
  25. disdrodb/{l0/scripts → cli}/disdrodb_run_l0_station.py +32 -30
  26. disdrodb/{l0/scripts → cli}/disdrodb_run_l0a.py +30 -21
  27. disdrodb/{l0/scripts → cli}/disdrodb_run_l0a_station.py +24 -33
  28. disdrodb/{l0/scripts → cli}/disdrodb_run_l0b.py +30 -21
  29. disdrodb/{l0/scripts → cli}/disdrodb_run_l0b_station.py +25 -34
  30. disdrodb/cli/disdrodb_run_l0c.py +130 -0
  31. disdrodb/cli/disdrodb_run_l0c_station.py +129 -0
  32. disdrodb/cli/disdrodb_run_l1.py +122 -0
  33. disdrodb/cli/disdrodb_run_l1_station.py +121 -0
  34. disdrodb/cli/disdrodb_run_l2e.py +122 -0
  35. disdrodb/cli/disdrodb_run_l2e_station.py +122 -0
  36. disdrodb/cli/disdrodb_run_l2m.py +122 -0
  37. disdrodb/cli/disdrodb_run_l2m_station.py +122 -0
  38. disdrodb/cli/disdrodb_upload_archive.py +105 -0
  39. disdrodb/cli/disdrodb_upload_station.py +98 -0
  40. disdrodb/configs.py +90 -25
  41. disdrodb/data_transfer/__init__.py +22 -0
  42. disdrodb/data_transfer/download_data.py +87 -90
  43. disdrodb/data_transfer/upload_data.py +64 -37
  44. disdrodb/data_transfer/zenodo.py +15 -18
  45. disdrodb/docs.py +1 -1
  46. disdrodb/issue/__init__.py +17 -4
  47. disdrodb/issue/checks.py +10 -23
  48. disdrodb/issue/reader.py +9 -12
  49. disdrodb/issue/writer.py +14 -17
  50. disdrodb/l0/__init__.py +17 -26
  51. disdrodb/l0/check_configs.py +35 -23
  52. disdrodb/l0/check_standards.py +32 -42
  53. disdrodb/l0/configs/{Thies_LPM → LPM}/bins_diameter.yml +44 -44
  54. disdrodb/l0/configs/{Thies_LPM → LPM}/bins_velocity.yml +40 -40
  55. disdrodb/l0/configs/LPM/l0a_encodings.yml +80 -0
  56. disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_cf_attrs.yml +62 -59
  57. disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_encodings.yml +9 -9
  58. disdrodb/l0/configs/{Thies_LPM → LPM}/raw_data_format.yml +245 -245
  59. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_diameter.yml +66 -66
  60. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_velocity.yml +64 -64
  61. disdrodb/l0/configs/PARSIVEL/l0a_encodings.yml +32 -0
  62. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_cf_attrs.yml +22 -20
  63. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_encodings.yml +17 -17
  64. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/raw_data_format.yml +77 -77
  65. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_diameter.yml +64 -64
  66. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_velocity.yml +64 -64
  67. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +39 -0
  68. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_cf_attrs.yml +24 -22
  69. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_encodings.yml +20 -20
  70. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/raw_data_format.yml +98 -98
  71. disdrodb/l0/configs/{RD_80 → RD80}/bins_diameter.yml +40 -40
  72. disdrodb/l0/configs/RD80/l0a_encodings.yml +16 -0
  73. disdrodb/l0/configs/{RD_80 → RD80}/l0b_cf_attrs.yml +3 -3
  74. disdrodb/l0/configs/RD80/l0b_encodings.yml +135 -0
  75. disdrodb/l0/configs/{RD_80 → RD80}/raw_data_format.yml +48 -48
  76. disdrodb/l0/l0_reader.py +216 -340
  77. disdrodb/l0/l0a_processing.py +237 -208
  78. disdrodb/l0/l0b_nc_processing.py +227 -80
  79. disdrodb/l0/l0b_processing.py +93 -173
  80. disdrodb/l0/l0c_processing.py +627 -0
  81. disdrodb/l0/readers/{ARM → LPM/ARM}/ARM_LPM.py +36 -58
  82. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +226 -0
  83. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +185 -0
  84. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +183 -0
  85. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +179 -0
  86. disdrodb/l0/readers/{UK → LPM/UK}/DIVEN.py +14 -35
  87. disdrodb/l0/readers/PARSIVEL/AUSTRALIA/MELBOURNE_2007_PARSIVEL.py +157 -0
  88. disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +113 -0
  89. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/ARCTIC_2021.py +40 -57
  90. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/COMMON_2011.py +37 -54
  91. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/DAVOS_2009_2011.py +34 -51
  92. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_2009.py +34 -51
  93. disdrodb/l0/readers/{EPFL/PARADISO_2014.py → PARSIVEL/EPFL/EPFL_ROOF_2008.py} +38 -50
  94. disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +105 -0
  95. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2011.py +34 -51
  96. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2012.py +33 -51
  97. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GENEPI_2007.py +25 -44
  98. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007.py +25 -44
  99. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007_2.py +25 -44
  100. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HPICONET_2010.py +34 -51
  101. disdrodb/l0/readers/{EPFL/EPFL_ROOF_2010.py → PARSIVEL/EPFL/HYMEX_LTE_SOP2.py} +37 -50
  102. disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +111 -0
  103. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HYMEX_LTE_SOP4.py +36 -54
  104. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2018.py +34 -52
  105. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2019.py +38 -56
  106. disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +105 -0
  107. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PARSIVEL_2007.py +27 -45
  108. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PLATO_2019.py +24 -44
  109. disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +140 -0
  110. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RACLETS_2019_WJF.py +41 -59
  111. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RIETHOLZBACH_2011.py +34 -51
  112. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +117 -0
  113. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +137 -0
  114. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/UNIL_2022.py +42 -55
  115. disdrodb/l0/readers/PARSIVEL/GPM/IFLOODS.py +104 -0
  116. disdrodb/l0/readers/{GPM → PARSIVEL/GPM}/LPVEX.py +29 -48
  117. disdrodb/l0/readers/PARSIVEL/GPM/MC3E.py +184 -0
  118. disdrodb/l0/readers/PARSIVEL/NCAR/CCOPE_2015.py +113 -0
  119. disdrodb/l0/readers/{NCAR/VORTEX_SE_2016_P1.py → PARSIVEL/NCAR/OWLES_MIPS.py} +46 -72
  120. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +125 -0
  121. disdrodb/l0/readers/{NCAR/OWLES_MIPS.py → PARSIVEL/NCAR/PLOWS_MIPS.py} +45 -64
  122. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +114 -0
  123. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +176 -0
  124. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +183 -0
  125. disdrodb/l0/readers/{ARM/ARM_LD.py → PARSIVEL2/ARM/ARM_PARSIVEL2.py} +27 -50
  126. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +163 -0
  127. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +163 -0
  128. disdrodb/l0/readers/{DENMARK → PARSIVEL2/DENMARK}/EROSION_nc.py +14 -35
  129. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +119 -0
  130. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +104 -0
  131. disdrodb/l0/readers/PARSIVEL2/GPM/NSSTC.py +176 -0
  132. disdrodb/l0/readers/PARSIVEL2/ITALY/GID_PARSIVEL2.py +32 -0
  133. disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +56 -0
  134. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +120 -0
  135. disdrodb/l0/readers/{NCAR → PARSIVEL2/NCAR}/PECAN_MIPS.py +45 -64
  136. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +181 -0
  137. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +160 -0
  138. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +160 -0
  139. disdrodb/l0/readers/{NCAR/PLOWS_MIPS.py → PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py} +49 -66
  140. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +118 -0
  141. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +152 -0
  142. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT.py +166 -0
  143. disdrodb/l0/readers/{NCAR/RELAMPAGO_RD80.py → RD80/BRAZIL/CHUVA_RD80.py} +36 -60
  144. disdrodb/l0/readers/{BRAZIL → RD80/BRAZIL}/GOAMAZON_RD80.py +36 -55
  145. disdrodb/l0/readers/{NCAR → RD80/NCAR}/CINDY_2011_RD80.py +35 -54
  146. disdrodb/l0/readers/{BRAZIL/CHUVA_RD80.py → RD80/NCAR/RELAMPAGO_RD80.py} +40 -54
  147. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +62 -0
  148. disdrodb/l0/readers/{reader_template.py → template_reader_raw_text_data.py} +20 -44
  149. disdrodb/l0/routines.py +885 -581
  150. disdrodb/l0/standards.py +72 -236
  151. disdrodb/l0/template_tools.py +104 -109
  152. disdrodb/l1/__init__.py +17 -0
  153. disdrodb/l1/beard_model.py +716 -0
  154. disdrodb/l1/encoding_attrs.py +620 -0
  155. disdrodb/l1/fall_velocity.py +260 -0
  156. disdrodb/l1/filters.py +192 -0
  157. disdrodb/l1/processing.py +200 -0
  158. disdrodb/l1/resampling.py +236 -0
  159. disdrodb/l1/routines.py +357 -0
  160. disdrodb/l1_env/__init__.py +17 -0
  161. disdrodb/l1_env/routines.py +38 -0
  162. disdrodb/l2/__init__.py +17 -0
  163. disdrodb/l2/empirical_dsd.py +1735 -0
  164. disdrodb/l2/event.py +388 -0
  165. disdrodb/l2/processing.py +519 -0
  166. disdrodb/l2/processing_options.py +213 -0
  167. disdrodb/l2/routines.py +868 -0
  168. disdrodb/metadata/__init__.py +9 -2
  169. disdrodb/metadata/checks.py +165 -118
  170. disdrodb/metadata/download.py +81 -0
  171. disdrodb/metadata/geolocation.py +146 -0
  172. disdrodb/metadata/info.py +20 -13
  173. disdrodb/metadata/manipulation.py +1 -1
  174. disdrodb/metadata/reader.py +59 -8
  175. disdrodb/metadata/search.py +77 -144
  176. disdrodb/metadata/standards.py +7 -8
  177. disdrodb/metadata/writer.py +8 -14
  178. disdrodb/psd/__init__.py +38 -0
  179. disdrodb/psd/fitting.py +2146 -0
  180. disdrodb/psd/models.py +774 -0
  181. disdrodb/routines.py +1176 -0
  182. disdrodb/scattering/__init__.py +28 -0
  183. disdrodb/scattering/axis_ratio.py +344 -0
  184. disdrodb/scattering/routines.py +456 -0
  185. disdrodb/utils/__init__.py +17 -0
  186. disdrodb/utils/attrs.py +208 -0
  187. disdrodb/utils/cli.py +269 -0
  188. disdrodb/utils/compression.py +60 -42
  189. disdrodb/utils/dask.py +62 -0
  190. disdrodb/utils/decorators.py +110 -0
  191. disdrodb/utils/directories.py +107 -46
  192. disdrodb/utils/encoding.py +127 -0
  193. disdrodb/utils/list.py +29 -0
  194. disdrodb/utils/logger.py +168 -46
  195. disdrodb/utils/time.py +657 -0
  196. disdrodb/utils/warnings.py +30 -0
  197. disdrodb/utils/writer.py +57 -0
  198. disdrodb/utils/xarray.py +138 -47
  199. disdrodb/utils/yaml.py +0 -1
  200. disdrodb/viz/__init__.py +17 -0
  201. disdrodb/viz/plots.py +17 -0
  202. disdrodb-0.1.0.dist-info/METADATA +321 -0
  203. disdrodb-0.1.0.dist-info/RECORD +216 -0
  204. {disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info}/WHEEL +1 -1
  205. disdrodb-0.1.0.dist-info/entry_points.txt +30 -0
  206. disdrodb/data_transfer/scripts/disdrodb_download_archive.py +0 -53
  207. disdrodb/data_transfer/scripts/disdrodb_upload_archive.py +0 -57
  208. disdrodb/l0/configs/OTT_Parsivel/l0a_encodings.yml +0 -32
  209. disdrodb/l0/configs/OTT_Parsivel2/l0a_encodings.yml +0 -39
  210. disdrodb/l0/configs/RD_80/l0a_encodings.yml +0 -16
  211. disdrodb/l0/configs/RD_80/l0b_encodings.yml +0 -135
  212. disdrodb/l0/configs/Thies_LPM/l0a_encodings.yml +0 -80
  213. disdrodb/l0/io.py +0 -257
  214. disdrodb/l0/l0_processing.py +0 -1091
  215. disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_OTT.py +0 -178
  216. disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_THIES.py +0 -247
  217. disdrodb/l0/readers/BRAZIL/CHUVA_LPM.py +0 -204
  218. disdrodb/l0/readers/BRAZIL/CHUVA_OTT.py +0 -183
  219. disdrodb/l0/readers/BRAZIL/GOAMAZON_LPM.py +0 -204
  220. disdrodb/l0/readers/BRAZIL/GOAMAZON_OTT.py +0 -183
  221. disdrodb/l0/readers/CHINA/CHONGQING.py +0 -131
  222. disdrodb/l0/readers/EPFL/EPFL_ROOF_2008.py +0 -128
  223. disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP2.py +0 -127
  224. disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP3.py +0 -129
  225. disdrodb/l0/readers/EPFL/RACLETS_2019.py +0 -158
  226. disdrodb/l0/readers/EPFL/SAMOYLOV_2017.py +0 -136
  227. disdrodb/l0/readers/EPFL/SAMOYLOV_2019.py +0 -158
  228. disdrodb/l0/readers/FRANCE/SIRTA_OTT2.py +0 -138
  229. disdrodb/l0/readers/GPM/GCPEX.py +0 -123
  230. disdrodb/l0/readers/GPM/IFLOODS.py +0 -123
  231. disdrodb/l0/readers/GPM/MC3E.py +0 -123
  232. disdrodb/l0/readers/GPM/NSSTC.py +0 -164
  233. disdrodb/l0/readers/ITALY/GID.py +0 -199
  234. disdrodb/l0/readers/MEXICO/OH_IIUNAM_nc.py +0 -92
  235. disdrodb/l0/readers/NCAR/CCOPE_2015.py +0 -133
  236. disdrodb/l0/readers/NCAR/PECAN_FP3.py +0 -137
  237. disdrodb/l0/readers/NCAR/PECAN_MOBILE.py +0 -144
  238. disdrodb/l0/readers/NCAR/RELAMPAGO_OTT.py +0 -195
  239. disdrodb/l0/readers/NCAR/SNOWIE_PJ.py +0 -172
  240. disdrodb/l0/readers/NCAR/SNOWIE_SB.py +0 -179
  241. disdrodb/l0/readers/NCAR/VORTEX2_2009.py +0 -133
  242. disdrodb/l0/readers/NCAR/VORTEX2_2010.py +0 -188
  243. disdrodb/l0/readers/NCAR/VORTEX2_2010_UF.py +0 -191
  244. disdrodb/l0/readers/NCAR/VORTEX_SE_2016_P2.py +0 -135
  245. disdrodb/l0/readers/NCAR/VORTEX_SE_2016_PIPS.py +0 -170
  246. disdrodb/l0/readers/NETHERLANDS/DELFT.py +0 -187
  247. disdrodb/l0/readers/SPAIN/SBEGUERIA.py +0 -179
  248. disdrodb/l0/scripts/disdrodb_run_l0b_concat.py +0 -93
  249. disdrodb/l0/scripts/disdrodb_run_l0b_concat_station.py +0 -85
  250. disdrodb/utils/netcdf.py +0 -452
  251. disdrodb/utils/scripts.py +0 -102
  252. disdrodb-0.0.21.dist-info/AUTHORS.md +0 -18
  253. disdrodb-0.0.21.dist-info/METADATA +0 -186
  254. disdrodb-0.0.21.dist-info/RECORD +0 -168
  255. disdrodb-0.0.21.dist-info/entry_points.txt +0 -15
  256. /disdrodb/l0/configs/{RD_80 → RD80}/bins_velocity.yml +0 -0
  257. /disdrodb/l0/manuals/{Thies_LPM.pdf → LPM.pdf} +0 -0
  258. /disdrodb/l0/manuals/{ODM_470.pdf → ODM470.pdf} +0 -0
  259. /disdrodb/l0/manuals/{OTT_Parsivel.pdf → PARSIVEL.pdf} +0 -0
  260. /disdrodb/l0/manuals/{OTT_Parsivel2.pdf → PARSIVEL2.pdf} +0 -0
  261. /disdrodb/l0/manuals/{PWS_100.pdf → PWS100.pdf} +0 -0
  262. /disdrodb/l0/manuals/{RD_80.pdf → RD80.pdf} +0 -0
  263. {disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info/licenses}/LICENSE +0 -0
  264. {disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,6 @@
19
19
  """Functions to process raw text files into DISDRODB L0A Apache Parquet."""
20
20
 
21
21
 
22
- import inspect
23
22
  import logging
24
23
  import os
25
24
  from typing import Union
@@ -39,7 +38,6 @@ from disdrodb.utils.directories import create_directory, remove_if_exists
39
38
 
40
39
  # Logger
41
40
  from disdrodb.utils.logger import (
42
- log_debug,
43
41
  log_error,
44
42
  log_info,
45
43
  log_warning,
@@ -55,7 +53,7 @@ pd.set_option("mode.chained_assignment", None) # Avoid SettingWithCopyWarning
55
53
  #### Raw file readers
56
54
 
57
55
 
58
- def _preprocess_reader_kwargs(reader_kwargs: dict) -> dict:
56
+ def preprocess_reader_kwargs(reader_kwargs: dict) -> dict:
59
57
  """Preprocess arguments required to read raw text file into Pandas.
60
58
 
61
59
  Parameters
@@ -86,10 +84,20 @@ def _preprocess_reader_kwargs(reader_kwargs: dict) -> dict:
86
84
  return reader_kwargs
87
85
 
88
86
 
89
- def read_raw_file(
87
+ def check_matching_column_number(df, column_names):
88
+ """Check the number of columns in the dataframe matches the length of column names."""
89
+ n_columns = len(df.columns)
90
+ n_expected_columns = len(column_names)
91
+ if n_columns != n_expected_columns:
92
+ msg = f"The dataframe has {n_columns} columns, while {n_expected_columns} are expected !."
93
+ raise ValueError(msg)
94
+
95
+
96
+ def read_raw_text_file(
90
97
  filepath: str,
91
98
  column_names: list,
92
99
  reader_kwargs: dict,
100
+ logger=None, # noqa
93
101
  ) -> pd.DataFrame:
94
102
  """Read a raw file into a dataframe.
95
103
 
@@ -100,7 +108,12 @@ def read_raw_file(
100
108
  column_names : list
101
109
  Column names.
102
110
  reader_kwargs : dict
103
- Pandas pd.read_csv arguments.
111
+ Pandas ``pd.read_csv`` arguments.
112
+ logger : logging.Logger
113
+ Logger object.
114
+ The default is ``None``.
115
+ If ``None``, the logger is created using the module name.
116
+ If ``logger`` is passed, it will be used to log messages.
104
117
 
105
118
  Returns
106
119
  -------
@@ -108,7 +121,7 @@ def read_raw_file(
108
121
  Pandas dataframe.
109
122
  """
110
123
  # Preprocess reader_kwargs
111
- reader_kwargs = _preprocess_reader_kwargs(reader_kwargs)
124
+ reader_kwargs = preprocess_reader_kwargs(reader_kwargs)
112
125
 
113
126
  # Enforce all raw files columns with dtype = 'object'
114
127
  dtype = "object"
@@ -117,8 +130,17 @@ def read_raw_file(
117
130
  try:
118
131
  df = pd.read_csv(filepath, names=column_names, dtype=dtype, **reader_kwargs)
119
132
  except pd.errors.EmptyDataError:
120
- msg = f" - Is empty, skip file: {filepath}"
121
- log_warning(logger=logger, msg=msg, verbose=False)
133
+ msg = f"The following file is empty: {filepath}"
134
+ raise ValueError(msg)
135
+
136
+ # Check the dataframe is not empty
137
+ if len(df.index) == 0:
138
+ msg = f"The following file is empty: {filepath}"
139
+ raise ValueError(msg)
140
+
141
+ # Check dataframe column number matches columns_names
142
+ if column_names is not None:
143
+ check_matching_column_number(df, column_names)
122
144
 
123
145
  # Return dataframe
124
146
  return df
@@ -128,45 +150,19 @@ def read_raw_file(
128
150
  #### L0A checks and homogenization
129
151
 
130
152
 
131
- def _check_df_sanitizer_fun(df_sanitizer_fun):
132
- """Check the argument of df_sanitizer_fun is only df."""
133
- if df_sanitizer_fun is None:
134
- return None
135
- if not callable(df_sanitizer_fun):
136
- raise ValueError("'df_sanitizer_fun' must be a function.")
137
- if not np.all(np.isin(inspect.getfullargspec(df_sanitizer_fun).args, ["df"])):
138
- raise ValueError("The `df_sanitizer_fun` must have only `df` as input argument!")
139
-
140
-
141
- def _check_not_empty_dataframe(df, verbose=False):
142
- if len(df.index) == 0:
143
- msg = " - The file is empty and has been skipped."
144
- log_error(logger=logger, msg=msg, verbose=False)
145
- raise ValueError(msg)
146
-
147
-
148
- def _check_matching_column_number(df, column_names, verbose=False):
149
- n_columns = len(df.columns)
150
- n_expected_columns = len(column_names)
151
- if n_columns != n_expected_columns:
152
- msg = f" - The dataframe has {n_columns} columns, while {n_expected_columns} are expected !."
153
- log_error(logger, msg, verbose)
154
- raise ValueError(msg)
155
-
156
-
157
- def remove_rows_with_missing_time(df: pd.DataFrame, verbose: bool = False):
158
- """Remove dataframe rows where the "time" is NaT.
153
+ def remove_rows_with_missing_time(df: pd.DataFrame, logger=logger, verbose: bool = False):
154
+ """Remove dataframe rows where the ``"time"`` is ``NaT``.
159
155
 
160
156
  Parameters
161
157
  ----------
162
- df : pd.DataFrame
158
+ df : pandas.DataFrame
163
159
  Input dataframe.
164
160
  verbose : bool
165
- Whether to verbose the processing.
161
+ Whether to verbose the processing. The default is ``False``.
166
162
 
167
163
  Returns
168
164
  -------
169
- pd.DataFrame
165
+ pandas.DataFrame
170
166
  Dataframe with valid timesteps.
171
167
  """
172
168
  # Get the number of rows of the dataframe
@@ -175,32 +171,31 @@ def remove_rows_with_missing_time(df: pd.DataFrame, verbose: bool = False):
175
171
  df = df.dropna(subset="time", axis=0)
176
172
  # If no valid timesteps, raise error
177
173
  if len(df.index) == 0:
178
- msg = " - There are not valid timestep."
179
- log_error(logger=logger, msg=msg, verbose=False)
174
+ msg = "There are not valid timestep."
180
175
  raise ValueError(msg)
181
176
  # Otherwise, report the number of invalid timesteps
182
177
  n_invalid_timesteps = n_rows - len(df)
183
178
  if n_invalid_timesteps > 0:
184
- msg = f" - {n_invalid_timesteps} rows had invalid timesteps and were discarded."
179
+ msg = f"{n_invalid_timesteps} rows had invalid timesteps and were discarded."
185
180
  log_warning(logger=logger, msg=msg, verbose=verbose)
186
181
  return df
187
182
 
188
183
 
189
- def remove_duplicated_timesteps(df: pd.DataFrame, verbose: bool = False):
184
+ def remove_duplicated_timesteps(df: pd.DataFrame, logger=None, verbose: bool = False):
190
185
  """Remove duplicated timesteps.
191
186
 
192
187
  It keep only the first timestep occurrence !
193
188
 
194
189
  Parameters
195
190
  ----------
196
- df : pd.DataFrame
191
+ df : pandas.DataFrame
197
192
  Input dataframe.
198
193
  verbose : bool
199
- Whether to verbose the processing.
194
+ Whether to verbose the processing. The default is ``False``.
200
195
 
201
196
  Returns
202
197
  -------
203
- pd.DataFrame
198
+ pandas.DataFrame
204
199
  Dataframe with valid unique timesteps.
205
200
  """
206
201
  values, counts = np.unique(df["time"], return_counts=True)
@@ -208,11 +203,13 @@ def remove_duplicated_timesteps(df: pd.DataFrame, verbose: bool = False):
208
203
  values_duplicates = values[idx_duplicates].astype("M8[s]")
209
204
  # If there are duplicated timesteps
210
205
  if len(values_duplicates) > 0:
206
+ # TODO: raise error if duplicated timesteps have different values !
207
+
211
208
  # Drop duplicated timesteps (keeping the first occurrence)
212
209
  df = df.drop_duplicates(subset="time", keep="first")
213
210
  # Report the values of duplicated timesteps
214
211
  msg = (
215
- f" - The following timesteps occurred more than once: {values_duplicates}. Only the first occurrence"
212
+ f"The following timesteps occurred more than once: {values_duplicates}. Only the first occurrence"
216
213
  " selected."
217
214
  )
218
215
  log_warning(logger=logger, msg=msg, verbose=verbose)
@@ -225,13 +222,12 @@ def drop_timesteps(df, timesteps):
225
222
  # Check there are row left
226
223
  if len(df) == 0:
227
224
  msg = "No rows left after removing problematic timesteps. Maybe you need to adjust the issue YAML file."
228
- log_warning(logger=logger, msg=msg, verbose=False)
229
225
  raise ValueError(msg)
230
226
  return df
231
227
 
232
228
 
233
229
  def drop_time_periods(df, time_periods):
234
- """Drop problematic time_period."""
230
+ """Drop problematic time periods."""
235
231
  for time_period in time_periods:
236
232
  if len(df) > 0:
237
233
  start_time = time_period[0]
@@ -240,25 +236,26 @@ def drop_time_periods(df, time_periods):
240
236
  # Check there are row left
241
237
  if len(df) == 0:
242
238
  msg = "No rows left after removing problematic time_periods. Maybe you need to adjust the issue YAML file."
243
- log_warning(logger=logger, msg=msg, verbose=False)
244
239
  raise ValueError(msg)
245
240
 
246
241
  return df
247
242
 
248
243
 
249
- def remove_issue_timesteps(df, issue_dict, verbose=False):
244
+ def remove_issue_timesteps(df, issue_dict, logger=None, verbose=False):
250
245
  """Drop dataframe rows with timesteps listed in the issue dictionary.
251
246
 
252
247
  Parameters
253
248
  ----------
254
- df : pd.DataFrame
249
+ df : pandas.DataFrame
255
250
  Input dataframe.
256
251
  issue_dict : dict
257
- Issue dictionary
252
+ Issue dictionary.
253
+ verbose : bool
254
+ Whether to verbose the processing. The default is ``False``.
258
255
 
259
256
  Returns
260
257
  -------
261
- pd.DataFrame
258
+ pandas.DataFrame
262
259
  Dataframe with problematic timesteps removed.
263
260
 
264
261
  """
@@ -286,24 +283,21 @@ def remove_issue_timesteps(df, issue_dict, verbose=False):
286
283
  return df
287
284
 
288
285
 
289
- def cast_column_dtypes(df: pd.DataFrame, sensor_name: str, verbose: bool = False) -> pd.DataFrame:
290
- """Convert 'object' dataframe columns into DISDRODB L0A dtype standards.
286
+ def cast_column_dtypes(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame:
287
+ """Convert ``'object'`` dataframe columns into DISDRODB L0A dtype standards.
291
288
 
292
289
  Parameters
293
290
  ----------
294
- df : pd.DataFrame
291
+ df : pandas.DataFrame
295
292
  Input dataframe.
296
293
  sensor_name : str
297
294
  Name of the sensor.
298
- verbose : bool
299
- Whether to verbose the processing.
300
295
 
301
296
  Returns
302
297
  -------
303
- pd.DataFrame
298
+ pandas.DataFrame
304
299
  Dataframe with corrected columns types.
305
300
  """
306
-
307
301
  # Cast dataframe to dtypes
308
302
  dtype_dict = get_l0a_dtype(sensor_name)
309
303
  # Ensure time column is saved with seconds resolution
@@ -321,26 +315,23 @@ def cast_column_dtypes(df: pd.DataFrame, sensor_name: str, verbose: bool = False
321
315
  df[column] = df[column].astype(dtype_dict[column])
322
316
  except ValueError as e:
323
317
  msg = f"ValueError: The column {column} has {e}"
324
- log_error(logger=logger, msg=msg, verbose=False)
325
318
  raise ValueError(msg)
326
319
  return df
327
320
 
328
321
 
329
- def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str, verbose: bool = False) -> pd.DataFrame:
330
- """Coerce corrupted values in dataframe numeric columns to np.nan.
322
+ def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame:
323
+ """Coerce corrupted values in dataframe numeric columns to ``np.nan``.
331
324
 
332
325
  Parameters
333
326
  ----------
334
- df : pd.DataFrame
327
+ df : pandas.DataFrame
335
328
  Input dataframe.
336
329
  sensor_name : str
337
330
  Name of the sensor.
338
- verbose : bool
339
- Whether to verbose the processing.
340
331
 
341
332
  Returns
342
333
  -------
343
- pd.DataFrame
334
+ pandas.DataFrame
344
335
  Dataframe with string columns without corrupted values.
345
336
  """
346
337
  # Cast dataframe to dtypes
@@ -359,21 +350,19 @@ def coerce_corrupted_values_to_nan(df: pd.DataFrame, sensor_name: str, verbose:
359
350
  return df
360
351
 
361
352
 
362
- def strip_string_spaces(df: pd.DataFrame, sensor_name: str, verbose: bool = False) -> pd.DataFrame:
353
+ def strip_string_spaces(df: pd.DataFrame, sensor_name: str) -> pd.DataFrame:
363
354
  """Strip leading/trailing spaces from dataframe string columns.
364
355
 
365
356
  Parameters
366
357
  ----------
367
- df : pd.DataFrame
358
+ df : pandas.DataFrame
368
359
  Input dataframe.
369
360
  sensor_name : str
370
361
  Name of the sensor.
371
- verbose : bool
372
- Whether to verbose the processing.
373
362
 
374
363
  Returns
375
364
  -------
376
- pd.DataFrame
365
+ pandas.DataFrame
377
366
  Dataframe with string columns without leading/trailing spaces.
378
367
  """
379
368
  # Cast dataframe to dtypes
@@ -390,13 +379,13 @@ def strip_string_spaces(df: pd.DataFrame, sensor_name: str, verbose: bool = Fals
390
379
  try:
391
380
  df[column] = df[column].str.strip()
392
381
  except AttributeError:
393
- msg = f"AttributeError: The column {column} is not a string/object dtype."
394
- log_error(logger=logger, msg=msg, verbose=False)
382
+ msg = f"The column {column} is not a string/object dtype."
395
383
  raise AttributeError(msg)
396
384
  return df
397
385
 
398
386
 
399
- def _strip_delimiter(string):
387
+ def strip_delimiter(string):
388
+ """Remove the first and last delimiter occurrence from a string."""
400
389
  if not isinstance(string, str):
401
390
  return string
402
391
  split_str = infer_split_str(string=string)
@@ -415,12 +404,12 @@ def strip_delimiter_from_raw_arrays(df):
415
404
  available_fields = list(df.columns[np.isin(df.columns, possible_fields)])
416
405
  # Loop over the fields and strip away the delimiter
417
406
  for field in available_fields:
418
- df[field] = df[field].apply(_strip_delimiter)
407
+ df[field] = df[field].apply(strip_delimiter)
419
408
  # Return the dataframe
420
409
  return df
421
410
 
422
411
 
423
- def _is_not_corrupted(string):
412
+ def is_raw_array_string_not_corrupted(string):
424
413
  """Check if the raw array is corrupted."""
425
414
  if not isinstance(string, str):
426
415
  return False
@@ -445,32 +434,32 @@ def remove_corrupted_rows(df):
445
434
  # Loop over the fields and remove corrupted ones
446
435
  for field in available_fields:
447
436
  if len(df) != 0:
448
- df = df[df[field].apply(_is_not_corrupted)]
437
+ df = df[df[field].apply(is_raw_array_string_not_corrupted)]
449
438
  # Check if there are rows left
450
439
  if len(df) == 0:
451
440
  raise ValueError("No remaining rows after data corruption checks.")
452
441
  # If only one row available, raise also error
453
442
  if len(df) == 1:
454
- raise ValueError("Only 1 row remains after data corruption checks. Check the file.")
443
+ raise ValueError("Only 1 row remains after data corruption checks. Check the raw file and maybe delete it.")
455
444
  # Return the dataframe
456
445
  return df
457
446
 
458
447
 
459
- def replace_nan_flags(df, sensor_name, verbose):
460
- """Set values corresponding to nan_flags to np.nan.
448
+ def replace_nan_flags(df, sensor_name, logger=None, verbose=False):
449
+ """Set values corresponding to ``nan_flags`` to ``np.nan``.
461
450
 
462
451
  Parameters
463
452
  ----------
464
- df : pd.DataFrame
453
+ df : pandas.DataFrame
465
454
  Input dataframe.
466
455
  sensor_name : str
467
456
  Name of the sensor.
468
457
  verbose : bool
469
- Whether to verbose the processing.
458
+ Whether to verbose the processing. The default is ``False``.
470
459
 
471
460
  Returns
472
461
  -------
473
- pd.DataFrame
462
+ pandas.DataFrame
474
463
  Dataframe without nan_flags values.
475
464
  """
476
465
  # Get dictionary of nan flags
@@ -486,26 +475,26 @@ def replace_nan_flags(df, sensor_name, verbose):
486
475
  if n_nan_flags_values > 0:
487
476
  msg = f"In variable {var}, {n_nan_flags_values} values were nan_flags and were replaced to np.nan."
488
477
  log_info(logger=logger, msg=msg, verbose=verbose)
489
- df[var][is_a_nan_flag] = np.nan
478
+ df.loc[is_a_nan_flag, var] = np.nan
490
479
  # Return dataframe
491
480
  return df
492
481
 
493
482
 
494
- def set_nan_outside_data_range(df, sensor_name, verbose):
495
- """Set values outside the data range as np.nan.
483
+ def set_nan_outside_data_range(df, sensor_name, logger=None, verbose=False):
484
+ """Set values outside the data range as ``np.nan``.
496
485
 
497
486
  Parameters
498
487
  ----------
499
- df : pd.DataFrame
488
+ df : pandas.DataFrame
500
489
  Input dataframe.
501
490
  sensor_name : str
502
491
  Name of the sensor.
503
492
  verbose : bool
504
- Whether to verbose the processing.
493
+ Whether to verbose the processing. The default is ``False``.
505
494
 
506
495
  Returns
507
496
  -------
508
- pd.DataFrame
497
+ pandas.DataFrame
509
498
  Dataframe without values outside the expected data range.
510
499
  """
511
500
  # Get dictionary of data_range
@@ -530,21 +519,21 @@ def set_nan_outside_data_range(df, sensor_name, verbose):
530
519
  return df
531
520
 
532
521
 
533
- def set_nan_invalid_values(df, sensor_name, verbose):
534
- """Set invalid (class) values to np.nan.
522
+ def set_nan_invalid_values(df, sensor_name, logger=None, verbose=False):
523
+ """Set invalid (class) values to ``np.nan``.
535
524
 
536
525
  Parameters
537
526
  ----------
538
- df : pd.DataFrame
527
+ df : pandas.DataFrame
539
528
  Input dataframe.
540
529
  sensor_name : str
541
530
  Name of the sensor.
542
531
  verbose : bool
543
- Whether to verbose the processing.
532
+ Whether to verbose the processing. The default is ``False``.
544
533
 
545
534
  Returns
546
535
  -------
547
- pd.DataFrame
536
+ pandas.DataFrame
548
537
  Dataframe without invalid values.
549
538
  """
550
539
  # Get dictionary of valid values
@@ -566,14 +555,12 @@ def set_nan_invalid_values(df, sensor_name, verbose):
566
555
  return df
567
556
 
568
557
 
569
- def process_raw_file(
570
- filepath,
571
- column_names,
572
- reader_kwargs,
573
- df_sanitizer_fun,
558
+ def sanitize_df(
559
+ df,
574
560
  sensor_name,
575
561
  verbose=True,
576
- issue_dict={},
562
+ issue_dict=None,
563
+ logger=None,
577
564
  ):
578
565
  """Read and parse a raw text files into a L0A dataframe.
579
566
 
@@ -581,63 +568,41 @@ def process_raw_file(
581
568
  ----------
582
569
  filepath : str
583
570
  File path
584
- column_names : list
585
- Columns names.
586
- reader_kwargs : dict
587
- Pandas `read_csv` arguments.
588
- df_sanitizer_fun : object, optional
589
- Sanitizer function to format the datafame.
590
571
  sensor_name : str
591
572
  Name of the sensor.
592
573
  verbose : bool
593
- Whether to verbose the processing.
594
- The default is True
574
+ Whether to verbose the processing. The default is ``True``.
595
575
  issue_dict : dict
596
576
  Issue dictionary providing information on timesteps to remove.
597
- The default is an empty dictionary {}.
598
- Valid issue_dict key are 'timesteps' and 'time_periods'.
577
+ The default is an empty dictionary ``{}``.
578
+ Valid issue_dict key are ``'timesteps'`` and ``'time_periods'``.
599
579
  Valid issue_dict values are list of datetime64 values (with second accuracy).
600
- To correctly format and check the validity of the issue_dict, use
601
- the disdrodb.l0.issue.check_issue_dict function.
580
+ To correctly format and check the validity of the ``issue_dict``, use
581
+ the ``disdrodb.l0.issue.check_issue_dict`` function.
602
582
 
603
583
  Returns
604
584
  -------
605
- pd.DataFrame
585
+ pandas.DataFrame
606
586
  Dataframe
607
587
  """
608
- _check_df_sanitizer_fun(df_sanitizer_fun)
609
-
610
- # Read the data
611
- df = read_raw_file(
612
- filepath=filepath,
613
- column_names=column_names,
614
- reader_kwargs=reader_kwargs,
615
- )
616
-
617
- # - Check if file empty
618
- _check_not_empty_dataframe(df=df, verbose=verbose)
619
-
620
- # - Check dataframe column number matches columns_names
621
- _check_matching_column_number(df, column_names, verbose=False)
622
-
623
- # - Sanitize the dataframe with a custom function
624
- if df_sanitizer_fun is not None:
625
- df = df_sanitizer_fun(df)
588
+ # Define the issue dictionary
589
+ # - If None, set to empty dictionary
590
+ issue_dict = {} if issue_dict is None else issue_dict
626
591
 
627
592
  # - Remove rows with time NaT
628
- df = remove_rows_with_missing_time(df, verbose=verbose)
593
+ df = remove_rows_with_missing_time(df, logger=logger, verbose=verbose)
629
594
 
630
595
  # - Remove duplicated timesteps
631
- df = remove_duplicated_timesteps(df, verbose=verbose)
596
+ df = remove_duplicated_timesteps(df, logger=logger, verbose=verbose)
632
597
 
633
598
  # - Filter out problematic tiemsteps reported in the issue YAML file
634
- df = remove_issue_timesteps(df, issue_dict=issue_dict, verbose=verbose)
599
+ df = remove_issue_timesteps(df, issue_dict=issue_dict, logger=logger, verbose=verbose)
635
600
 
636
601
  # - Coerce numeric columns corrupted values to np.nan
637
- df = coerce_corrupted_values_to_nan(df, sensor_name=sensor_name, verbose=verbose)
602
+ df = coerce_corrupted_values_to_nan(df, sensor_name=sensor_name)
638
603
 
639
604
  # - Strip trailing/leading space from string columns
640
- df = strip_string_spaces(df, sensor_name=sensor_name, verbose=verbose)
605
+ df = strip_string_spaces(df, sensor_name=sensor_name)
641
606
 
642
607
  # - Strip first and last delimiter from the raw arrays
643
608
  df = strip_delimiter_from_raw_arrays(df)
@@ -646,16 +611,19 @@ def process_raw_file(
646
611
  df = remove_corrupted_rows(df)
647
612
 
648
613
  # - Cast dataframe to dtypes
649
- df = cast_column_dtypes(df, sensor_name=sensor_name, verbose=verbose)
614
+ df = cast_column_dtypes(df, sensor_name=sensor_name)
650
615
 
651
616
  # - Replace nan flags values with np.nans
652
- df = replace_nan_flags(df, sensor_name=sensor_name, verbose=verbose)
617
+ df = replace_nan_flags(df, sensor_name=sensor_name, logger=logger, verbose=verbose)
653
618
 
654
619
  # - Set values outside the data range to np.nan
655
- df = set_nan_outside_data_range(df, sensor_name=sensor_name, verbose=verbose)
620
+ df = set_nan_outside_data_range(df, sensor_name=sensor_name, logger=logger, verbose=verbose)
656
621
 
657
622
  # - Replace invalid values with np.nan
658
- df = set_nan_invalid_values(df, sensor_name=sensor_name, verbose=verbose)
623
+ df = set_nan_invalid_values(df, sensor_name=sensor_name, logger=logger, verbose=verbose)
624
+
625
+ # - Sort by time
626
+ df = df.sort_values("time")
659
627
 
660
628
  # ------------------------------------------------------.
661
629
  # - Check column names agrees to DISDRODB standards
@@ -677,23 +645,23 @@ def write_l0a(
677
645
  df: pd.DataFrame,
678
646
  filepath: str,
679
647
  force: bool = False,
648
+ logger=None,
680
649
  verbose: bool = False,
681
650
  ):
682
651
  """Save the dataframe into an Apache Parquet file.
683
652
 
684
653
  Parameters
685
654
  ----------
686
- df : pd.DataFrame
655
+ df : pandas.DataFrame
687
656
  Input dataframe.
688
657
  filepath : str
689
658
  Output file path.
690
659
  force : bool, optional
691
660
  Whether to overwrite existing data.
692
- If True, overwrite existing data into destination directories.
693
- If False, raise an error if there are already data into destination directories. This is the default.
661
+ If ``True``, overwrite existing data into destination directories.
662
+ If ``False``, raise an error if there are already data into destination directories. This is the default.
694
663
  verbose : bool, optional
695
- Whether to verbose the processing.
696
- The default is False.
664
+ Whether to verbose the processing. The default is ``False``.
697
665
 
698
666
  Raises
699
667
  ------
@@ -702,7 +670,6 @@ def write_l0a(
702
670
  NotImplementedError
703
671
  The input dataframe can not be processed.
704
672
  """
705
-
706
673
  # -------------------------------------------------------------------------.
707
674
  # Create station directory if does not exist
708
675
  create_directory(os.path.dirname(filepath))
@@ -710,7 +677,7 @@ def write_l0a(
710
677
  # Check if the file already exists
711
678
  # - If force=True --> Remove it
712
679
  # - If force=False --> Raise error
713
- remove_if_exists(filepath, force=force)
680
+ remove_if_exists(filepath, force=force, logger=logger)
714
681
 
715
682
  # -------------------------------------------------------------------------.
716
683
  # Define writing options
@@ -727,20 +694,18 @@ def write_l0a(
727
694
  row_group_size=row_group_size,
728
695
  )
729
696
  msg = f"The Pandas Dataframe has been written as an Apache Parquet file to {filepath}."
730
- log_info(logger=logger, msg=msg, verbose=False)
697
+ log_info(logger=logger, msg=msg, verbose=verbose)
731
698
  except Exception as e:
732
- msg = f" - The Pandas DataFrame cannot be written as an Apache Parquet file. The error is: \n {e}."
733
- log_error(logger=logger, msg=msg, verbose=False)
699
+ msg = f"The Pandas DataFrame cannot be written as an Apache Parquet file. The error is: \n {e}."
734
700
  raise ValueError(msg)
735
701
  # -------------------------------------------------------------------------.
736
- return None
737
702
 
738
703
 
739
- ####---------------------------------------------------------------------------.
740
- #### L0A Utility
704
+ ####--------------------------------------------------------------------------.
705
+ #### DISDRODB L0A product reader
741
706
 
742
707
 
743
- def concatenate_dataframe(list_df: list, verbose: bool = False) -> pd.DataFrame:
708
+ def concatenate_dataframe(list_df: list, logger=None, verbose: bool = False) -> pd.DataFrame:
744
709
  """Concatenate a list of dataframes.
745
710
 
746
711
  Parameters
@@ -748,12 +713,12 @@ def concatenate_dataframe(list_df: list, verbose: bool = False) -> pd.DataFrame:
748
713
  list_df : list
749
714
  List of dataframes.
750
715
  verbose : bool, optional
751
- If True, print messages.
752
- If False, no print.
716
+ If ``True``, print messages.
717
+ If ``False``, no print.
753
718
 
754
719
  Returns
755
720
  -------
756
- pd.DataFrame
721
+ pandas.DataFrame
757
722
  Concatenated dataframe.
758
723
 
759
724
  Raises
@@ -769,39 +734,111 @@ def concatenate_dataframe(list_df: list, verbose: bool = False) -> pd.DataFrame:
769
734
  return df
770
735
 
771
736
  # Log
772
- msg = " - Concatenation of dataframes started."
773
- log_info(logger, msg, verbose)
737
+ msg = "Concatenation of dataframes started."
738
+ log_info(logger=logger, msg=msg, verbose=verbose)
774
739
 
775
740
  # Concatenate the dataframe
776
741
  try:
777
742
  df = pd.concat(list_df, axis=0, ignore_index=True)
778
-
779
- # Drop duplicated values
780
- df = df.drop_duplicates(subset="time")
781
-
782
743
  # Sort by increasing time
783
744
  df = df.sort_values(by="time")
784
745
 
785
746
  except (AttributeError, TypeError) as e:
786
- msg = f" - Can not concatenate the files. \n Error: {e}"
787
- log_error(logger=logger, msg=msg, verbose=False)
747
+ msg = f"Can not concatenate the files. \n Error: {e}"
788
748
  raise ValueError(msg)
789
749
 
790
750
  # Log
791
- msg = " - Concatenation of dataframes has finished."
792
- log_info(logger, msg, verbose)
751
+ msg = "Concatenation of dataframes has finished."
752
+ log_info(logger=logger, msg=msg, verbose=verbose)
793
753
 
794
754
  # Return dataframe
795
755
  return df
796
756
 
797
757
 
798
- def read_raw_files(
758
+ def _read_l0a(filepath: str, verbose: bool = False, logger=None, debugging_mode: bool = False) -> pd.DataFrame:
759
+ # Log
760
+ msg = f"Reading L0 Apache Parquet file at {filepath} started."
761
+ log_info(logger=logger, msg=msg, verbose=verbose)
762
+ # Open file
763
+ df = pd.read_parquet(filepath)
764
+ if debugging_mode:
765
+ df = df.iloc[0:100]
766
+ # Log
767
+ msg = f"Reading L0 Apache Parquet file at {filepath} ended."
768
+ log_info(logger=logger, msg=msg, verbose=verbose)
769
+ return df
770
+
771
+
772
+ def read_l0a_dataframe(
773
+ filepaths: Union[str, list],
774
+ verbose: bool = False,
775
+ logger=None,
776
+ debugging_mode: bool = False,
777
+ ) -> pd.DataFrame:
778
+ """Read DISDRODB L0A Apache Parquet file(s).
779
+
780
+ Parameters
781
+ ----------
782
+ filepaths : str or list
783
+ Either a list or a single filepath.
784
+ verbose : bool
785
+ Whether to print detailed processing information into terminal.
786
+ The default is ``False``.
787
+ debugging_mode : bool
788
+ If ``True``, it reduces the amount of data to process.
789
+ If filepaths is a list, it reads only the first 3 files.
790
+ For each file it select only the first 100 rows.
791
+ The default is ``False``.
792
+
793
+ Returns
794
+ -------
795
+ pandas.DataFrame
796
+ L0A Dataframe.
797
+
798
+ """
799
+ from disdrodb.l0.l0a_processing import concatenate_dataframe
800
+
801
+ # ----------------------------------------
802
+ # Check filepaths validity
803
+ if not isinstance(filepaths, (list, str)):
804
+ raise TypeError("Expecting filepaths to be a string or a list of strings.")
805
+
806
+ # ----------------------------------------
807
+ # If filepath is a string, convert to list
808
+ if isinstance(filepaths, str):
809
+ filepaths = [filepaths]
810
+ # ---------------------------------------------------
811
+ # If debugging_mode=True, it reads only the first 3 filepaths
812
+ if debugging_mode:
813
+ filepaths = filepaths[0:3] # select first 3 filepaths
814
+
815
+ # ---------------------------------------------------
816
+ # Define the list of dataframe
817
+ list_df = [
818
+ _read_l0a(filepath, verbose=verbose, logger=logger, debugging_mode=debugging_mode) for filepath in filepaths
819
+ ]
820
+
821
+ # Concatenate dataframe
822
+ df = concatenate_dataframe(list_df, logger=logger, verbose=verbose)
823
+
824
+ # Ensure time is in nanoseconds
825
+ df["time"] = df["time"].astype("M8[ns]")
826
+
827
+ # ---------------------------------------------------
828
+ # Return dataframe
829
+ return df
830
+
831
+
832
+ ####---------------------------------------------------------------------------.
833
+ #### L0A Utility
834
+
835
+
836
+ def read_raw_text_files(
799
837
  filepaths: Union[list, str],
800
- column_names: list,
801
- reader_kwargs: dict,
802
- sensor_name: str,
803
- verbose: bool,
804
- df_sanitizer_fun: object = None,
838
+ reader,
839
+ sensor_name,
840
+ verbose=True,
841
+ logger=None,
805
842
  ) -> pd.DataFrame:
806
843
  """Read and parse a list for raw files into a dataframe.
807
844
 
@@ -809,20 +846,17 @@ def read_raw_files(
809
846
  ----------
810
847
  filepaths : Union[list,str]
811
848
  File(s) path(s)
812
- column_names : list
813
- Columns names.
814
- reader_kwargs : dict
815
- Pandas `read_csv` arguments.
849
+ reader:
850
+ DISDRODB reader function.
851
+ Format: reader(filepath, logger=None)
816
852
  sensor_name : str
817
853
  Name of the sensor.
818
854
  verbose : bool
819
- Whether to verbose the processing.
820
- df_sanitizer_fun : object, optional
821
- Sanitizer function to format the datafame.
855
+ Whether to verbose the processing. The default is ``True``.
822
856
 
823
857
  Returns
824
858
  -------
825
- pd.DataFrame
859
+ pandas.DataFrame
826
860
  Dataframe
827
861
 
828
862
  Raises
@@ -831,7 +865,6 @@ def read_raw_files(
831
865
  Input parameters can not be used or the raw file can not be processed.
832
866
 
833
867
  """
834
-
835
868
  # ------------------------------------------------------.
836
869
  # Check input list
837
870
  if isinstance(filepaths, str):
@@ -840,54 +873,50 @@ def read_raw_files(
840
873
  raise ValueError("'filepaths' must contains at least 1 filepath.")
841
874
 
842
875
  # ------------------------------------------------------.
843
- ### - Loop over all raw files
876
+ # Loop over all raw files
844
877
  n_files = len(filepaths)
845
878
  processed_file_counter = 0
846
879
  list_skipped_files_msg = []
847
880
  list_df = []
848
881
  for filepath in filepaths:
882
+ # Try read the raw text file
849
883
  try:
850
- # Try to process a raw file
851
- df = process_raw_file(
852
- filepath=filepath,
853
- column_names=column_names,
854
- reader_kwargs=reader_kwargs,
855
- df_sanitizer_fun=df_sanitizer_fun,
884
+ df = reader(filepath, logger=logger)
885
+ # Sanitize the dataframe
886
+ df = sanitize_df(
887
+ df=df,
856
888
  sensor_name=sensor_name,
889
+ logger=logger,
857
890
  verbose=verbose,
858
891
  )
859
-
860
892
  # Append dataframe to the list
861
893
  list_df.append(df)
862
-
863
894
  # Update the logger
864
895
  processed_file_counter += 1
865
- msg = f"{processed_file_counter} / {n_files} processed successfully. File name: {filepath}"
866
- log_debug(logger=logger, msg=msg, verbose=verbose)
896
+ msg = f"Raw file '{filepath}' processed successfully ({processed_file_counter}/{n_files})."
897
+ log_info(logger=logger, msg=msg, verbose=verbose)
867
898
 
868
- # If processing of raw file fails
899
+ # Skip the file if the processing fails
869
900
  except Exception as e:
870
901
  # Update the logger
871
- msg = f" - {filepath} has been skipped. \n -- The error is: {e}."
872
- log_warning(logger=logger, msg=msg, verbose=verbose)
902
+ msg = f"{filepath} has been skipped. The error is: {e}."
903
+ log_error(logger=logger, msg=msg, verbose=verbose)
873
904
  list_skipped_files_msg.append(msg)
874
905
 
875
906
  # Update logger
876
- msg = f" - {len(list_skipped_files_msg)} of {n_files} have been skipped."
907
+ msg = f"{len(list_skipped_files_msg)} of {n_files} have been skipped."
877
908
  log_info(logger=logger, msg=msg, verbose=verbose)
878
- logger.info("---")
879
- logger.info(msg)
880
- logger.info("---")
881
909
 
882
910
  ##----------------------------------------------------------------.
883
- #### - Concatenate the dataframe
911
+ # Concatenate the dataframe
884
912
  if len(list_df) == 0:
885
- raise ValueError(f"No dataframe to return. Impossible to parse {filepaths}.")
886
- df = concatenate_dataframe(list_df, verbose=verbose)
887
-
888
- # - Remove rows with duplicate timestep (keep the first)
889
- df = df.drop_duplicates(subset=["time"], keep="first")
913
+ raise ValueError("Any raw file could be read!")
914
+ df = concatenate_dataframe(list_df, verbose=verbose, logger=logger)
890
915
 
891
916
  # ------------------------------------------------------.
917
+ # Enforce output time to be [ns]
918
+ # --> For compatibility with xarray
919
+ df["time"] = df["time"].astype("M8[ns]")
920
+
892
921
  # Return the dataframe
893
922
  return df