disdrodb 0.0.20__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. disdrodb/__init__.py +132 -15
  2. disdrodb/_config.py +4 -2
  3. disdrodb/_version.py +9 -4
  4. disdrodb/api/checks.py +264 -237
  5. disdrodb/api/configs.py +4 -8
  6. disdrodb/api/create_directories.py +235 -290
  7. disdrodb/api/info.py +217 -26
  8. disdrodb/api/io.py +295 -269
  9. disdrodb/api/path.py +597 -173
  10. disdrodb/api/search.py +486 -0
  11. disdrodb/{metadata/scripts → cli}/disdrodb_check_metadata_archive.py +12 -7
  12. disdrodb/{utils/pandas.py → cli/disdrodb_data_archive_directory.py} +9 -18
  13. disdrodb/cli/disdrodb_download_archive.py +86 -0
  14. disdrodb/cli/disdrodb_download_metadata_archive.py +53 -0
  15. disdrodb/cli/disdrodb_download_station.py +84 -0
  16. disdrodb/{api/scripts → cli}/disdrodb_initialize_station.py +22 -10
  17. disdrodb/cli/disdrodb_metadata_archive_directory.py +32 -0
  18. disdrodb/{data_transfer/scripts/disdrodb_download_station.py → cli/disdrodb_open_data_archive.py} +22 -22
  19. disdrodb/cli/disdrodb_open_logs_directory.py +69 -0
  20. disdrodb/{data_transfer/scripts/disdrodb_upload_station.py → cli/disdrodb_open_metadata_archive.py} +22 -24
  21. disdrodb/cli/disdrodb_open_metadata_directory.py +71 -0
  22. disdrodb/cli/disdrodb_open_product_directory.py +74 -0
  23. disdrodb/cli/disdrodb_open_readers_directory.py +32 -0
  24. disdrodb/{l0/scripts → cli}/disdrodb_run_l0.py +38 -31
  25. disdrodb/{l0/scripts → cli}/disdrodb_run_l0_station.py +32 -30
  26. disdrodb/{l0/scripts → cli}/disdrodb_run_l0a.py +30 -21
  27. disdrodb/{l0/scripts → cli}/disdrodb_run_l0a_station.py +24 -33
  28. disdrodb/{l0/scripts → cli}/disdrodb_run_l0b.py +30 -21
  29. disdrodb/{l0/scripts → cli}/disdrodb_run_l0b_station.py +25 -34
  30. disdrodb/cli/disdrodb_run_l0c.py +130 -0
  31. disdrodb/cli/disdrodb_run_l0c_station.py +129 -0
  32. disdrodb/cli/disdrodb_run_l1.py +122 -0
  33. disdrodb/cli/disdrodb_run_l1_station.py +121 -0
  34. disdrodb/cli/disdrodb_run_l2e.py +122 -0
  35. disdrodb/cli/disdrodb_run_l2e_station.py +122 -0
  36. disdrodb/cli/disdrodb_run_l2m.py +122 -0
  37. disdrodb/cli/disdrodb_run_l2m_station.py +122 -0
  38. disdrodb/cli/disdrodb_upload_archive.py +105 -0
  39. disdrodb/cli/disdrodb_upload_station.py +98 -0
  40. disdrodb/configs.py +90 -25
  41. disdrodb/data_transfer/__init__.py +22 -0
  42. disdrodb/data_transfer/download_data.py +87 -90
  43. disdrodb/data_transfer/upload_data.py +64 -37
  44. disdrodb/data_transfer/zenodo.py +15 -18
  45. disdrodb/docs.py +1 -1
  46. disdrodb/issue/__init__.py +17 -4
  47. disdrodb/issue/checks.py +10 -23
  48. disdrodb/issue/reader.py +9 -12
  49. disdrodb/issue/writer.py +14 -17
  50. disdrodb/l0/__init__.py +17 -26
  51. disdrodb/l0/check_configs.py +35 -23
  52. disdrodb/l0/check_standards.py +32 -42
  53. disdrodb/l0/configs/{Thies_LPM → LPM}/bins_diameter.yml +44 -44
  54. disdrodb/l0/configs/{Thies_LPM → LPM}/bins_velocity.yml +40 -40
  55. disdrodb/l0/configs/LPM/l0a_encodings.yml +80 -0
  56. disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_cf_attrs.yml +62 -59
  57. disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_encodings.yml +9 -9
  58. disdrodb/l0/configs/{Thies_LPM → LPM}/raw_data_format.yml +245 -245
  59. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_diameter.yml +66 -66
  60. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_velocity.yml +64 -64
  61. disdrodb/l0/configs/PARSIVEL/l0a_encodings.yml +32 -0
  62. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_cf_attrs.yml +22 -20
  63. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_encodings.yml +17 -17
  64. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/raw_data_format.yml +77 -77
  65. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_diameter.yml +64 -64
  66. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_velocity.yml +64 -64
  67. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +39 -0
  68. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_cf_attrs.yml +24 -22
  69. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_encodings.yml +20 -20
  70. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/raw_data_format.yml +98 -98
  71. disdrodb/l0/configs/{RD_80 → RD80}/bins_diameter.yml +40 -40
  72. disdrodb/l0/configs/RD80/l0a_encodings.yml +16 -0
  73. disdrodb/l0/configs/{RD_80 → RD80}/l0b_cf_attrs.yml +3 -3
  74. disdrodb/l0/configs/RD80/l0b_encodings.yml +135 -0
  75. disdrodb/l0/configs/{RD_80 → RD80}/raw_data_format.yml +48 -48
  76. disdrodb/l0/l0_reader.py +216 -340
  77. disdrodb/l0/l0a_processing.py +237 -208
  78. disdrodb/l0/l0b_nc_processing.py +227 -80
  79. disdrodb/l0/l0b_processing.py +93 -173
  80. disdrodb/l0/l0c_processing.py +627 -0
  81. disdrodb/l0/readers/{ARM → LPM/ARM}/ARM_LPM.py +36 -58
  82. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +226 -0
  83. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +185 -0
  84. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +183 -0
  85. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +179 -0
  86. disdrodb/l0/readers/{UK → LPM/UK}/DIVEN.py +14 -35
  87. disdrodb/l0/readers/PARSIVEL/AUSTRALIA/MELBOURNE_2007_PARSIVEL.py +157 -0
  88. disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +113 -0
  89. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/ARCTIC_2021.py +40 -57
  90. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/COMMON_2011.py +37 -54
  91. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/DAVOS_2009_2011.py +34 -51
  92. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_2009.py +34 -51
  93. disdrodb/l0/readers/{EPFL/PARADISO_2014.py → PARSIVEL/EPFL/EPFL_ROOF_2008.py} +38 -50
  94. disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +105 -0
  95. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2011.py +34 -51
  96. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2012.py +33 -51
  97. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GENEPI_2007.py +25 -44
  98. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007.py +25 -44
  99. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007_2.py +25 -44
  100. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HPICONET_2010.py +34 -51
  101. disdrodb/l0/readers/{EPFL/EPFL_ROOF_2010.py → PARSIVEL/EPFL/HYMEX_LTE_SOP2.py} +37 -50
  102. disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +111 -0
  103. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HYMEX_LTE_SOP4.py +36 -54
  104. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2018.py +34 -52
  105. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2019.py +38 -56
  106. disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +105 -0
  107. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PARSIVEL_2007.py +27 -45
  108. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PLATO_2019.py +24 -44
  109. disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +140 -0
  110. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RACLETS_2019_WJF.py +41 -59
  111. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RIETHOLZBACH_2011.py +34 -51
  112. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +117 -0
  113. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +137 -0
  114. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/UNIL_2022.py +42 -55
  115. disdrodb/l0/readers/PARSIVEL/GPM/IFLOODS.py +104 -0
  116. disdrodb/l0/readers/{GPM → PARSIVEL/GPM}/LPVEX.py +29 -48
  117. disdrodb/l0/readers/PARSIVEL/GPM/MC3E.py +184 -0
  118. disdrodb/l0/readers/PARSIVEL/NCAR/CCOPE_2015.py +113 -0
  119. disdrodb/l0/readers/{NCAR/VORTEX_SE_2016_P1.py → PARSIVEL/NCAR/OWLES_MIPS.py} +46 -72
  120. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +125 -0
  121. disdrodb/l0/readers/{NCAR/OWLES_MIPS.py → PARSIVEL/NCAR/PLOWS_MIPS.py} +45 -64
  122. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +114 -0
  123. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +176 -0
  124. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +183 -0
  125. disdrodb/l0/readers/{ARM/ARM_LD.py → PARSIVEL2/ARM/ARM_PARSIVEL2.py} +27 -50
  126. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +163 -0
  127. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +163 -0
  128. disdrodb/l0/readers/{DENMARK → PARSIVEL2/DENMARK}/EROSION_nc.py +14 -35
  129. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +119 -0
  130. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +104 -0
  131. disdrodb/l0/readers/PARSIVEL2/GPM/NSSTC.py +176 -0
  132. disdrodb/l0/readers/PARSIVEL2/ITALY/GID_PARSIVEL2.py +32 -0
  133. disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +56 -0
  134. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +120 -0
  135. disdrodb/l0/readers/{NCAR → PARSIVEL2/NCAR}/PECAN_MIPS.py +45 -64
  136. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +181 -0
  137. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +160 -0
  138. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +160 -0
  139. disdrodb/l0/readers/{NCAR/PLOWS_MIPS.py → PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py} +49 -66
  140. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +118 -0
  141. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +152 -0
  142. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT.py +166 -0
  143. disdrodb/l0/readers/{NCAR/RELAMPAGO_RD80.py → RD80/BRAZIL/CHUVA_RD80.py} +36 -60
  144. disdrodb/l0/readers/{BRAZIL → RD80/BRAZIL}/GOAMAZON_RD80.py +36 -55
  145. disdrodb/l0/readers/{NCAR → RD80/NCAR}/CINDY_2011_RD80.py +35 -54
  146. disdrodb/l0/readers/{BRAZIL/CHUVA_RD80.py → RD80/NCAR/RELAMPAGO_RD80.py} +40 -54
  147. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +62 -0
  148. disdrodb/l0/readers/{reader_template.py → template_reader_raw_text_data.py} +20 -44
  149. disdrodb/l0/routines.py +885 -581
  150. disdrodb/l0/standards.py +72 -236
  151. disdrodb/l0/template_tools.py +104 -109
  152. disdrodb/l1/__init__.py +17 -0
  153. disdrodb/l1/beard_model.py +716 -0
  154. disdrodb/l1/encoding_attrs.py +620 -0
  155. disdrodb/l1/fall_velocity.py +260 -0
  156. disdrodb/l1/filters.py +192 -0
  157. disdrodb/l1/processing.py +200 -0
  158. disdrodb/l1/resampling.py +236 -0
  159. disdrodb/l1/routines.py +357 -0
  160. disdrodb/l1_env/__init__.py +17 -0
  161. disdrodb/l1_env/routines.py +38 -0
  162. disdrodb/l2/__init__.py +17 -0
  163. disdrodb/l2/empirical_dsd.py +1735 -0
  164. disdrodb/l2/event.py +388 -0
  165. disdrodb/l2/processing.py +519 -0
  166. disdrodb/l2/processing_options.py +213 -0
  167. disdrodb/l2/routines.py +868 -0
  168. disdrodb/metadata/__init__.py +9 -2
  169. disdrodb/metadata/checks.py +165 -118
  170. disdrodb/metadata/download.py +81 -0
  171. disdrodb/metadata/geolocation.py +146 -0
  172. disdrodb/metadata/info.py +20 -13
  173. disdrodb/metadata/manipulation.py +1 -1
  174. disdrodb/metadata/reader.py +59 -8
  175. disdrodb/metadata/search.py +77 -144
  176. disdrodb/metadata/standards.py +7 -8
  177. disdrodb/metadata/writer.py +8 -14
  178. disdrodb/psd/__init__.py +38 -0
  179. disdrodb/psd/fitting.py +2146 -0
  180. disdrodb/psd/models.py +774 -0
  181. disdrodb/routines.py +1176 -0
  182. disdrodb/scattering/__init__.py +28 -0
  183. disdrodb/scattering/axis_ratio.py +344 -0
  184. disdrodb/scattering/routines.py +456 -0
  185. disdrodb/utils/__init__.py +17 -0
  186. disdrodb/utils/attrs.py +208 -0
  187. disdrodb/utils/cli.py +269 -0
  188. disdrodb/utils/compression.py +60 -42
  189. disdrodb/utils/dask.py +62 -0
  190. disdrodb/utils/decorators.py +110 -0
  191. disdrodb/utils/directories.py +107 -46
  192. disdrodb/utils/encoding.py +127 -0
  193. disdrodb/utils/list.py +29 -0
  194. disdrodb/utils/logger.py +168 -46
  195. disdrodb/utils/time.py +657 -0
  196. disdrodb/utils/warnings.py +30 -0
  197. disdrodb/utils/writer.py +57 -0
  198. disdrodb/utils/xarray.py +138 -47
  199. disdrodb/utils/yaml.py +0 -1
  200. disdrodb/viz/__init__.py +17 -0
  201. disdrodb/viz/plots.py +17 -0
  202. disdrodb-0.1.0.dist-info/METADATA +321 -0
  203. disdrodb-0.1.0.dist-info/RECORD +216 -0
  204. {disdrodb-0.0.20.dist-info → disdrodb-0.1.0.dist-info}/WHEEL +1 -1
  205. disdrodb-0.1.0.dist-info/entry_points.txt +30 -0
  206. disdrodb/data_transfer/scripts/disdrodb_download_archive.py +0 -53
  207. disdrodb/data_transfer/scripts/disdrodb_upload_archive.py +0 -57
  208. disdrodb/l0/configs/OTT_Parsivel/l0a_encodings.yml +0 -32
  209. disdrodb/l0/configs/OTT_Parsivel2/l0a_encodings.yml +0 -39
  210. disdrodb/l0/configs/RD_80/l0a_encodings.yml +0 -16
  211. disdrodb/l0/configs/RD_80/l0b_encodings.yml +0 -135
  212. disdrodb/l0/configs/Thies_LPM/l0a_encodings.yml +0 -80
  213. disdrodb/l0/io.py +0 -257
  214. disdrodb/l0/l0_processing.py +0 -1091
  215. disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_OTT.py +0 -178
  216. disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_THIES.py +0 -247
  217. disdrodb/l0/readers/BRAZIL/CHUVA_LPM.py +0 -204
  218. disdrodb/l0/readers/BRAZIL/CHUVA_OTT.py +0 -183
  219. disdrodb/l0/readers/BRAZIL/GOAMAZON_LPM.py +0 -204
  220. disdrodb/l0/readers/BRAZIL/GOAMAZON_OTT.py +0 -183
  221. disdrodb/l0/readers/CHINA/CHONGQING.py +0 -131
  222. disdrodb/l0/readers/EPFL/EPFL_ROOF_2008.py +0 -128
  223. disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP2.py +0 -127
  224. disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP3.py +0 -129
  225. disdrodb/l0/readers/EPFL/RACLETS_2019.py +0 -158
  226. disdrodb/l0/readers/EPFL/SAMOYLOV_2017.py +0 -136
  227. disdrodb/l0/readers/EPFL/SAMOYLOV_2019.py +0 -158
  228. disdrodb/l0/readers/FRANCE/SIRTA_OTT2.py +0 -138
  229. disdrodb/l0/readers/GPM/GCPEX.py +0 -123
  230. disdrodb/l0/readers/GPM/IFLOODS.py +0 -123
  231. disdrodb/l0/readers/GPM/MC3E.py +0 -123
  232. disdrodb/l0/readers/GPM/NSSTC.py +0 -164
  233. disdrodb/l0/readers/ITALY/GID.py +0 -199
  234. disdrodb/l0/readers/MEXICO/OH_IIUNAM_nc.py +0 -92
  235. disdrodb/l0/readers/NCAR/CCOPE_2015.py +0 -133
  236. disdrodb/l0/readers/NCAR/PECAN_FP3.py +0 -137
  237. disdrodb/l0/readers/NCAR/PECAN_MOBILE.py +0 -144
  238. disdrodb/l0/readers/NCAR/RELAMPAGO_OTT.py +0 -195
  239. disdrodb/l0/readers/NCAR/SNOWIE_PJ.py +0 -172
  240. disdrodb/l0/readers/NCAR/SNOWIE_SB.py +0 -179
  241. disdrodb/l0/readers/NCAR/VORTEX2_2009.py +0 -133
  242. disdrodb/l0/readers/NCAR/VORTEX2_2010.py +0 -188
  243. disdrodb/l0/readers/NCAR/VORTEX2_2010_UF.py +0 -191
  244. disdrodb/l0/readers/NCAR/VORTEX_SE_2016_P2.py +0 -135
  245. disdrodb/l0/readers/NCAR/VORTEX_SE_2016_PIPS.py +0 -170
  246. disdrodb/l0/readers/NETHERLANDS/DELFT.py +0 -187
  247. disdrodb/l0/readers/SPAIN/SBEGUERIA.py +0 -179
  248. disdrodb/l0/scripts/disdrodb_run_l0b_concat.py +0 -93
  249. disdrodb/l0/scripts/disdrodb_run_l0b_concat_station.py +0 -85
  250. disdrodb/utils/netcdf.py +0 -452
  251. disdrodb/utils/scripts.py +0 -102
  252. disdrodb-0.0.20.dist-info/AUTHORS.md +0 -18
  253. disdrodb-0.0.20.dist-info/METADATA +0 -186
  254. disdrodb-0.0.20.dist-info/RECORD +0 -168
  255. disdrodb-0.0.20.dist-info/entry_points.txt +0 -15
  256. /disdrodb/l0/configs/{RD_80 → RD80}/bins_velocity.yml +0 -0
  257. /disdrodb/l0/manuals/{Thies_LPM.pdf → LPM.pdf} +0 -0
  258. /disdrodb/l0/manuals/{ODM_470.pdf → ODM470.pdf} +0 -0
  259. /disdrodb/l0/manuals/{OTT_Parsivel.pdf → PARSIVEL.pdf} +0 -0
  260. /disdrodb/l0/manuals/{OTT_Parsivel2.pdf → PARSIVEL2.pdf} +0 -0
  261. /disdrodb/l0/manuals/{PWS_100.pdf → PWS100.pdf} +0 -0
  262. /disdrodb/l0/manuals/{RD_80.pdf → RD80.pdf} +0 -0
  263. {disdrodb-0.0.20.dist-info → disdrodb-0.1.0.dist-info/licenses}/LICENSE +0 -0
  264. {disdrodb-0.0.20.dist-info → disdrodb-0.1.0.dist-info}/top_level.txt +0 -0
@@ -1,1091 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- # -----------------------------------------------------------------------------.
4
- # Copyright (c) 2021-2023 DISDRODB developers
5
- #
6
- # This program is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # This program is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
- # -----------------------------------------------------------------------------.
19
- """Implement DISDRODB L0 processing."""
20
-
21
- import datetime
22
- import functools
23
- import logging
24
- import os
25
- import shutil
26
- import time
27
-
28
- import dask
29
- import dask.bag as db
30
- import xarray as xr
31
-
32
- from disdrodb.api.checks import check_sensor_name
33
-
34
- # Directory
35
- from disdrodb.api.create_directories import (
36
- create_directory_structure,
37
- create_l0_directory_structure,
38
- )
39
- from disdrodb.api.info import infer_path_info_dict
40
- from disdrodb.api.path import (
41
- define_campaign_dir,
42
- define_l0a_filepath,
43
- define_l0b_filepath,
44
- define_l0b_station_dir,
45
- define_station_dir,
46
- get_disdrodb_path,
47
- )
48
- from disdrodb.configs import get_base_dir
49
- from disdrodb.issue import read_station_issue
50
- from disdrodb.l0.io import (
51
- get_l0a_filepaths,
52
- get_raw_filepaths,
53
- read_l0a_dataframe,
54
- )
55
- from disdrodb.l0.l0_reader import get_station_reader_function
56
- from disdrodb.metadata import read_station_metadata
57
- from disdrodb.utils.directories import list_files
58
-
59
- # Logger
60
- from disdrodb.utils.logger import (
61
- close_logger,
62
- create_file_logger,
63
- define_summary_log,
64
- log_error,
65
- log_info,
66
- log_warning,
67
- )
68
-
69
- logger = logging.getLogger(__name__)
70
-
71
- # -----------------------------------------------------------------------------.
72
- #### Creation of L0A and L0B Single Station File
73
-
74
-
75
- def _delayed_based_on_kwargs(function):
76
- """Decorator to make the function delayed if its `parallel` argument is True."""
77
-
78
- @functools.wraps(function)
79
- def wrapper(*args, **kwargs):
80
- # Check if it must be a delayed function
81
- parallel = kwargs.get("parallel")
82
- # If parallel is True
83
- if parallel:
84
- # Enforce verbose to be False
85
- kwargs["verbose"] = False
86
- # Define the delayed task
87
- result = dask.delayed(function)(*args, **kwargs)
88
- else:
89
- # Else run the function
90
- result = function(*args, **kwargs)
91
- return result
92
-
93
- return wrapper
94
-
95
-
96
- @_delayed_based_on_kwargs
97
- def _generate_l0a(
98
- filepath,
99
- processed_dir,
100
- station_name, # retrievable from filepath
101
- column_names,
102
- reader_kwargs,
103
- df_sanitizer_fun,
104
- force,
105
- verbose,
106
- parallel,
107
- issue_dict={},
108
- ):
109
- """Generate L0A file from raw file."""
110
- from disdrodb.l0.l0a_processing import (
111
- process_raw_file,
112
- write_l0a,
113
- )
114
-
115
- ##------------------------------------------------------------------------.
116
- # Create file logger
117
- filename = os.path.basename(filepath)
118
- logger = create_file_logger(
119
- processed_dir=processed_dir,
120
- product="L0A",
121
- station_name=station_name,
122
- filename=filename,
123
- parallel=parallel,
124
- )
125
-
126
- if not os.environ.get("PYTEST_CURRENT_TEST"):
127
- logger_filepath = logger.handlers[0].baseFilename
128
- else:
129
- # LogCaptureHandler of pytest does not have baseFilename attribute
130
- logger_filepath = None
131
-
132
- ##------------------------------------------------------------------------.
133
- # Log start processing
134
- msg = f"L0A processing of {filename} has started."
135
- log_info(logger=logger, msg=msg, verbose=verbose)
136
-
137
- ##------------------------------------------------------------------------.
138
- # Retrieve metadata
139
- attrs = read_station_metadata(station_name=station_name, product="L0A", **infer_path_info_dict(processed_dir))
140
-
141
- # Retrieve sensor name
142
- sensor_name = attrs["sensor_name"]
143
- check_sensor_name(sensor_name)
144
-
145
- ##------------------------------------------------------------------------.
146
- try:
147
- #### - Read raw file into a dataframe and sanitize to L0A format
148
- df = process_raw_file(
149
- filepath=filepath,
150
- column_names=column_names,
151
- reader_kwargs=reader_kwargs,
152
- df_sanitizer_fun=df_sanitizer_fun,
153
- sensor_name=sensor_name,
154
- verbose=verbose,
155
- issue_dict=issue_dict,
156
- )
157
-
158
- ##--------------------------------------------------------------------.
159
- #### - Write to Parquet
160
- filepath = define_l0a_filepath(df=df, processed_dir=processed_dir, station_name=station_name)
161
- write_l0a(df=df, filepath=filepath, force=force, verbose=verbose)
162
-
163
- ##--------------------------------------------------------------------.
164
- # Clean environment
165
- del df
166
-
167
- # Log end processing
168
- msg = f"L0A processing of {filename} has ended."
169
- log_info(logger=logger, msg=msg, verbose=verbose)
170
-
171
- # Otherwise log the error
172
- except Exception as e:
173
- error_type = str(type(e).__name__)
174
- msg = f"{error_type}: {e}"
175
- log_error(logger=logger, msg=msg, verbose=False)
176
-
177
- # Close the file logger
178
- close_logger(logger)
179
-
180
- # Return the logger file path
181
- return logger_filepath
182
-
183
-
184
- def _generate_l0b(
185
- filepath,
186
- processed_dir, # retrievable from filepath
187
- station_name, # retrievable from filepath
188
- force,
189
- verbose,
190
- debugging_mode,
191
- parallel,
192
- ):
193
- from disdrodb.l0.l0b_processing import (
194
- create_l0b_from_l0a,
195
- write_l0b,
196
- )
197
-
198
- # -----------------------------------------------------------------.
199
- # Create file logger
200
- filename = os.path.basename(filepath)
201
- logger = create_file_logger(
202
- processed_dir=processed_dir,
203
- product="L0B",
204
- station_name=station_name,
205
- filename=filename,
206
- parallel=parallel,
207
- )
208
- if not os.environ.get("PYTEST_CURRENT_TEST"):
209
- logger_filepath = logger.handlers[0].baseFilename
210
- else:
211
- # LogCaptureHandler of pytest does not have baseFilename attribute
212
- logger_filepath = None
213
-
214
- ##------------------------------------------------------------------------.
215
- # Log start processing
216
- msg = f"L0B processing of {filename} has started."
217
- log_info(logger, msg, verbose=verbose)
218
-
219
- ##------------------------------------------------------------------------.
220
- # Retrieve metadata
221
- attrs = read_station_metadata(station_name=station_name, product="L0A", **infer_path_info_dict(processed_dir))
222
-
223
- # Retrieve sensor name
224
- sensor_name = attrs["sensor_name"]
225
- check_sensor_name(sensor_name)
226
-
227
- ##------------------------------------------------------------------------.
228
- try:
229
- # Read L0A Apache Parquet file
230
- df = read_l0a_dataframe(filepath, verbose=verbose, debugging_mode=debugging_mode)
231
- # -----------------------------------------------------------------.
232
- # Create xarray Dataset
233
- ds = create_l0b_from_l0a(df=df, attrs=attrs, verbose=verbose)
234
-
235
- # -----------------------------------------------------------------.
236
- # Write L0B netCDF4 dataset
237
- filepath = define_l0b_filepath(ds, processed_dir, station_name)
238
- write_l0b(ds, filepath=filepath, force=force)
239
-
240
- ##--------------------------------------------------------------------.
241
- # Clean environment
242
- del ds, df
243
-
244
- # Log end processing
245
- msg = f"L0B processing of {filename} has ended."
246
- log_info(logger, msg, verbose=verbose)
247
-
248
- # Otherwise log the error
249
- except Exception as e:
250
- error_type = str(type(e).__name__)
251
- msg = f"{error_type}: {e}"
252
- log_error(logger, msg, verbose=verbose)
253
-
254
- # Close the file logger
255
- close_logger(logger)
256
-
257
- # Return the logger file path
258
- return logger_filepath
259
-
260
-
261
- def _generate_l0b_from_nc(
262
- filepath,
263
- processed_dir,
264
- station_name, # retrievable from filepath
265
- dict_names,
266
- ds_sanitizer_fun,
267
- force,
268
- verbose,
269
- parallel,
270
- ):
271
- from disdrodb.l0.l0b_nc_processing import create_l0b_from_raw_nc
272
- from disdrodb.l0.l0b_processing import write_l0b
273
-
274
- # -----------------------------------------------------------------.
275
- # Create file logger
276
- filename = os.path.basename(filepath)
277
- logger = create_file_logger(
278
- processed_dir=processed_dir,
279
- product="L0B",
280
- station_name=station_name,
281
- filename=filename,
282
- parallel=parallel,
283
- )
284
-
285
- if not os.environ.get("PYTEST_CURRENT_TEST"):
286
- logger_filepath = logger.handlers[0].baseFilename
287
- else:
288
- # LogCaptureHandler of pytest does not have baseFilename attribute
289
- logger_filepath = None
290
-
291
- ##------------------------------------------------------------------------.
292
- # Log start processing
293
- msg = f"L0B processing of {filename} has started."
294
- log_info(logger, msg, verbose=verbose)
295
-
296
- ##------------------------------------------------------------------------.
297
- # Retrieve metadata
298
- attrs = read_station_metadata(station_name=station_name, product="L0A", **infer_path_info_dict(processed_dir))
299
-
300
- # Retrieve sensor name
301
- sensor_name = attrs["sensor_name"]
302
- check_sensor_name(sensor_name)
303
-
304
- ##------------------------------------------------------------------------.
305
- try:
306
- # Open the raw netCDF
307
- with xr.open_dataset(filepath, cache=False) as data:
308
- ds = data.load()
309
-
310
- # Convert to DISDRODB L0 format
311
- ds = create_l0b_from_raw_nc(
312
- ds=ds,
313
- dict_names=dict_names,
314
- ds_sanitizer_fun=ds_sanitizer_fun,
315
- sensor_name=sensor_name,
316
- verbose=verbose,
317
- attrs=attrs,
318
- )
319
- # -----------------------------------------------------------------.
320
- # Write L0B netCDF4 dataset
321
- filepath = define_l0b_filepath(ds, processed_dir, station_name)
322
- write_l0b(ds, filepath=filepath, force=force)
323
-
324
- ##--------------------------------------------------------------------.
325
- # Clean environment
326
- del ds
327
-
328
- # Log end processing
329
- msg = f"L0B processing of {filename} has ended."
330
- log_info(logger, msg, verbose=verbose)
331
-
332
- # Otherwise log the error
333
- except Exception as e:
334
- error_type = str(type(e).__name__)
335
- msg = f"{error_type}: {e}"
336
- log_error(logger, msg, verbose=verbose)
337
-
338
- # Close the file logger
339
- close_logger(logger)
340
-
341
- # Return the logger file path
342
- return logger_filepath
343
-
344
-
345
- ####------------------------------------------------------------------------.
346
- #### Creation of L0A and L0B Single Station Files
347
-
348
-
349
- def run_l0a(
350
- raw_dir,
351
- processed_dir,
352
- station_name,
353
- # L0A reader argument
354
- glob_patterns,
355
- column_names,
356
- reader_kwargs,
357
- df_sanitizer_fun,
358
- # Processing options
359
- parallel,
360
- verbose,
361
- force,
362
- debugging_mode,
363
- ):
364
- """Run the L0A processing for a specific DISDRODB station.
365
-
366
- This function is called in each reader to convert raw text files into DISDRODB L0A products.
367
-
368
- Parameters
369
- ----------
370
- raw_dir : str
371
- The directory path where all the raw content of a specific campaign is stored.
372
- The path must have the following structure: ``<...>/DISDRODB/Raw/<DATA_SOURCE>/<CAMPAIGN_NAME>``.
373
- Inside the ``raw_dir`` directory, it is required to adopt the following structure::
374
-
375
- - ``/data/<station_name>/<raw_files>``
376
- - ``/metadata/<station_name>.yml``
377
-
378
- **Important points:**
379
-
380
- - For each ``<station_name>``, there must be a corresponding YAML file in the metadata subdirectory.
381
- - The ``campaign_name`` are expected to be UPPER CASE.
382
- - The ``<CAMPAIGN_NAME>`` must semantically match between:
383
- - the ``raw_dir`` and ``processed_dir`` directory paths;
384
- - with the key ``campaign_name`` within the metadata YAML files.
385
-
386
- processed_dir : str
387
- The desired directory path for the processed DISDRODB L0A and L0B products.
388
- The path should have the following structure: ``<...>/DISDRODB/Processed/<DATA_SOURCE>/<CAMPAIGN_NAME>``.
389
- For testing purposes, this function exceptionally accepts also a directory path simply ending
390
- with ``<CAMPAIGN_NAME>`` (e.g., ``/tmp/<CAMPAIGN_NAME>``).
391
-
392
- station_name : str
393
- The name of the station.
394
-
395
- glob_patterns : str
396
- Glob pattern to search for data files in ``<raw_dir>/data/<station_name>``.
397
-
398
- column_names : list
399
- Column names of the raw text file.
400
-
401
- reader_kwargs : dict
402
- Arguments for Pandas ``read_csv`` function to open the text file.
403
-
404
- df_sanitizer_fun : callable, optional
405
- Sanitizer function to format the DataFrame into DISDRODB L0A standard.
406
- Default is ``None``.
407
-
408
- parallel : bool, optional
409
- If ``True``, process the files simultaneously in multiple processes.
410
- The number of simultaneous processes can be customized using the ``dask.distributed.LocalCluster``.
411
- If ``False``, process the files sequentially in a single process.
412
- Default is ``False``.
413
-
414
- verbose : bool, optional
415
- If ``True``, print detailed processing information to the terminal.
416
- Default is ``False``.
417
-
418
- force : bool, optional
419
- If ``True``, overwrite existing data in destination directories.
420
- If ``False``, raise an error if data already exists in destination directories.
421
- Default is ``False``.
422
-
423
- debugging_mode : bool, optional
424
- If ``True``, reduce the amount of data to process.
425
- Processes only the first 100 rows of 3 raw data files.
426
- Default is ``False``.
427
-
428
- """
429
- # ------------------------------------------------------------------------.
430
- # Start L0A processing
431
- if verbose:
432
- t_i = time.time()
433
- msg = f"L0A processing of station {station_name} has started."
434
- log_info(logger=logger, msg=msg, verbose=verbose)
435
-
436
- # ------------------------------------------------------------------------.
437
- # Create directory structure
438
- create_l0_directory_structure(
439
- raw_dir=raw_dir,
440
- processed_dir=processed_dir,
441
- product="L0A",
442
- station_name=station_name,
443
- force=force,
444
- verbose=verbose,
445
- )
446
-
447
- # -------------------------------------------------------------------------.
448
- # List files to process
449
- filepaths = get_raw_filepaths(
450
- raw_dir=raw_dir,
451
- station_name=station_name,
452
- # L0A reader argument
453
- glob_patterns=glob_patterns,
454
- # Processing options
455
- verbose=verbose,
456
- debugging_mode=debugging_mode,
457
- )
458
-
459
- # -----------------------------------------------------------------.
460
- # Read issue YAML file
461
- issue_dict = read_station_issue(station_name=station_name, **infer_path_info_dict(raw_dir))
462
-
463
- # -----------------------------------------------------------------.
464
- # Generate L0A files
465
- # - Loop over the files and save the L0A Apache Parquet files.
466
- # - If parallel=True, it does that in parallel using dask.delayed
467
- list_tasks = []
468
- for filepath in filepaths:
469
- list_tasks.append(
470
- _generate_l0a(
471
- filepath=filepath,
472
- processed_dir=processed_dir,
473
- station_name=station_name,
474
- # L0A reader argument
475
- column_names=column_names,
476
- reader_kwargs=reader_kwargs,
477
- df_sanitizer_fun=df_sanitizer_fun,
478
- issue_dict=issue_dict,
479
- # Processing options
480
- force=force,
481
- verbose=verbose,
482
- parallel=parallel,
483
- )
484
- )
485
- if parallel:
486
- list_logs = dask.compute(*list_tasks)
487
- else:
488
- list_logs = list_tasks
489
- # -----------------------------------------------------------------.
490
- # Define L0A summary logs
491
- define_summary_log(list_logs)
492
-
493
- # ---------------------------------------------------------------------.
494
- # End L0A processing
495
- if verbose:
496
- timedelta_str = str(datetime.timedelta(seconds=time.time() - t_i))
497
- msg = f"L0A processing of station {station_name} completed in {timedelta_str}"
498
- log_info(logger=logger, msg=msg, verbose=verbose)
499
- return None
500
-
501
-
502
- def run_l0b(
503
- processed_dir,
504
- station_name,
505
- # Processing options
506
- parallel,
507
- force,
508
- verbose,
509
- debugging_mode,
510
- ):
511
- """
512
- Run the L0B processing for a specific DISDRODB station.
513
-
514
- Parameters
515
- ----------
516
- raw_dir : str
517
- The directory path where all the raw content of a specific campaign is stored.
518
- The path must have the following structure: ``<...>/DISDRODB/Raw/<DATA_SOURCE>/<CAMPAIGN_NAME>``.
519
- Inside the ``raw_dir`` directory, it is required to adopt the following structure::
520
-
521
- - ``/data/<station_name>/<raw_files>``
522
- - ``/metadata/<station_name>.yml``
523
-
524
- **Important points:**
525
-
526
- - For each ``<station_name>``, there must be a corresponding YAML file in the metadata subdirectory.
527
- - The ``campaign_name`` are expected to be UPPER CASE.
528
- - The ``<CAMPAIGN_NAME>`` must semantically match between:
529
- - the ``raw_dir`` and ``processed_dir`` directory paths;
530
- - with the key ``campaign_name`` within the metadata YAML files.
531
-
532
- processed_dir : str
533
- The desired directory path for the processed DISDRODB L0A and L0B products.
534
- The path should have the following structure: ``<...>/DISDRODB/Processed/<DATA_SOURCE>/<CAMPAIGN_NAME>``.
535
- For testing purposes, this function exceptionally accepts also a directory path simply ending
536
- with ``<CAMPAIGN_NAME>`` (e.g., ``/tmp/<CAMPAIGN_NAME>``).
537
-
538
- station_name : str
539
- The name of the station.
540
-
541
- force : bool, optional
542
- If ``True``, overwrite existing data in destination directories.
543
- If ``False``, raise an error if data already exists in destination directories.
544
- Default is ``False``.
545
-
546
- verbose : bool, optional
547
- If ``True``, print detailed processing information to the terminal.
548
- Default is ``True``.
549
-
550
- parallel : bool, optional
551
- If ``True``, process the files simultaneously in multiple processes.
552
- The number of simultaneous processes can be customized using the ``dask.distributed.LocalCluster``.
553
- Ensure that the ``threads_per_worker`` (number of thread per process) is set to 1 to avoid HDF errors.
554
- Also, ensure to set the ``HDF5_USE_FILE_LOCKING`` environment variable to ``False``.
555
- If ``False``, process the files sequentially in a single process.
556
- Default is ``False``.
557
-
558
- debugging_mode : bool, optional
559
- If ``True``, reduce the amount of data to process.
560
- Only the first 3 raw data files will be processed.
561
- Default is ``False``.
562
-
563
- """
564
- # -----------------------------------------------------------------.
565
- # Retrieve metadata
566
- attrs = read_station_metadata(station_name=station_name, product="L0A", **infer_path_info_dict(processed_dir))
567
-
568
- # Skip run_l0b processing if the raw data are netCDFs
569
- if attrs["raw_data_format"] == "netcdf":
570
- return None
571
-
572
- # -----------------------------------------------------------------.
573
- # Start L0B processing
574
- if verbose:
575
- t_i = time.time()
576
- msg = f"L0B processing of station_name {station_name} has started."
577
- log_info(logger=logger, msg=msg, verbose=verbose)
578
-
579
- # -------------------------------------------------------------------------.
580
- # Create directory structure
581
- create_directory_structure(
582
- processed_dir=processed_dir,
583
- product="L0B",
584
- station_name=station_name,
585
- force=force,
586
- )
587
-
588
- ##----------------------------------------------------------------.
589
- # Get L0A files for the station
590
- filepaths = get_l0a_filepaths(
591
- processed_dir=processed_dir,
592
- station_name=station_name,
593
- debugging_mode=debugging_mode,
594
- )
595
-
596
- # -----------------------------------------------------------------.
597
- # Generate L0B files
598
- # Loop over the L0A files and save the L0B netCDF files.
599
- # - If parallel=True, it does that in parallel using dask.bag
600
- # Settings npartitions=len(filepaths) enable to wait prior task on a core
601
- # finish before starting a new one.
602
- if not parallel:
603
- list_logs = []
604
- for filepath in filepaths:
605
- list_logs.append(
606
- _generate_l0b(
607
- filepath=filepath,
608
- processed_dir=processed_dir,
609
- station_name=station_name,
610
- force=force,
611
- verbose=verbose,
612
- debugging_mode=debugging_mode,
613
- parallel=parallel,
614
- )
615
- )
616
- else:
617
- bag = db.from_sequence(filepaths, npartitions=len(filepaths))
618
- list_logs = bag.map(
619
- _generate_l0b,
620
- processed_dir=processed_dir,
621
- station_name=station_name,
622
- force=force,
623
- verbose=verbose,
624
- debugging_mode=debugging_mode,
625
- parallel=parallel,
626
- ).compute()
627
-
628
- # -----------------------------------------------------------------.
629
- # Define L0B summary logs
630
- define_summary_log(list_logs)
631
-
632
- # -----------------------------------------------------------------.
633
- # End L0B processing
634
- if verbose:
635
- timedelta_str = str(datetime.timedelta(seconds=time.time() - t_i))
636
- msg = f"L0B processing of station_name {station_name} completed in {timedelta_str}"
637
- log_info(logger=logger, msg=msg, verbose=verbose)
638
- return None
639
-
640
-
641
- def run_l0b_from_nc(
642
- raw_dir,
643
- processed_dir,
644
- station_name,
645
- # Reader argument
646
- glob_patterns,
647
- dict_names,
648
- ds_sanitizer_fun,
649
- # Processing options
650
- parallel,
651
- verbose,
652
- force,
653
- debugging_mode,
654
- ):
655
- """Run the L0B processing for a specific DISDRODB station with raw netCDFs.
656
-
657
- This function is called in the reader where raw netCDF files must be converted into DISDRODB L0B format.
658
-
659
- Parameters
660
- ----------
661
- raw_dir : str
662
- The directory path where all the raw content of a specific campaign is stored.
663
- The path must have the following structure: ``<...>/DISDRODB/Raw/<DATA_SOURCE>/<CAMPAIGN_NAME>``.
664
- Inside the ``raw_dir`` directory, it is required to adopt the following structure::
665
-
666
- - ``/data/<station_name>/<raw_files>``
667
- - ``/metadata/<station_name>.yml``
668
-
669
- **Important points:**
670
-
671
- - For each ``<station_name>``, there must be a corresponding YAML file in the metadata subdirectory.
672
- - The ``campaign_name`` are expected to be UPPER CASE.
673
- - The ``<CAMPAIGN_NAME>`` must semantically match between:
674
- - the ``raw_dir`` and ``processed_dir`` directory paths;
675
- - with the key ``campaign_name`` within the metadata YAML files.
676
-
677
- processed_dir : str
678
- The desired directory path for the processed DISDRODB L0A and L0B products.
679
- The path should have the following structure: ``<...>/DISDRODB/Processed/<DATA_SOURCE>/<CAMPAIGN_NAME>``.
680
- For testing purposes, this function exceptionally accepts also a directory path simply ending
681
- with ``<CAMPAIGN_NAME>`` (e.g., ``/tmp/<CAMPAIGN_NAME>``).
682
-
683
- station_name : str
684
- The name of the station.
685
-
686
- glob_patterns: str
687
- Glob pattern to search data files in ``<raw_dir>/data/<station_name>``.
688
- Example: ``glob_patterns = "*.nc"``
689
-
690
- dict_names : dict
691
- Dictionary mapping raw netCDF variables/coordinates/dimension names
692
- to DISDRODB standards.
693
-
694
- ds_sanitizer_fun : object, optional
695
- Sanitizer function to format the raw netCDF into DISDRODB L0B standard.
696
-
697
- force : bool, optional
698
- If ``True``, overwrite existing data in destination directories.
699
- If ``False``, raise an error if data already exists in destination directories.
700
- Default is ``False``.
701
-
702
- verbose : bool, optional
703
- If ``True``, print detailed processing information to the terminal.
704
- Default is ``True``.
705
-
706
- parallel : bool, optional
707
- If ``True``, process the files simultaneously in multiple processes.
708
- The number of simultaneous processes can be customized using the ``dask.distributed.LocalCluster``.
709
- Ensure that the ``threads_per_worker`` (number of thread per process) is set to 1 to avoid HDF errors.
710
- Also, ensure to set the ``HDF5_USE_FILE_LOCKING`` environment variable to ``False``.
711
- If ``False``, process the files sequentially in a single process.
712
- If ``False``, multi-threading is automatically exploited to speed up I/0 tasks.
713
- Default is ``False``.
714
-
715
- debugging_mode : bool, optional
716
- If ``True``, reduce the amount of data to process.
717
- Only the first 3 raw netCDF files will be processed.
718
- Default is ``False``.
719
-
720
- """
721
-
722
- # ------------------------------------------------------------------------.
723
- # Start L0A processing
724
- if verbose:
725
- t_i = time.time()
726
- msg = f"L0B processing of station {station_name} has started."
727
- log_info(logger=logger, msg=msg, verbose=verbose)
728
-
729
- # ------------------------------------------------------------------------.
730
- # Create directory structure
731
- create_l0_directory_structure(
732
- raw_dir=raw_dir,
733
- processed_dir=processed_dir,
734
- product="L0B",
735
- station_name=station_name,
736
- force=force,
737
- verbose=verbose,
738
- )
739
-
740
- # -------------------------------------------------------------------------.
741
- # List files to process
742
- filepaths = get_raw_filepaths(
743
- raw_dir=raw_dir,
744
- station_name=station_name,
745
- # Reader argument
746
- glob_patterns=glob_patterns,
747
- # Processing options
748
- verbose=verbose,
749
- debugging_mode=debugging_mode,
750
- )
751
-
752
- # -----------------------------------------------------------------.
753
- # Generate L0B files
754
- # - Loop over the raw netCDF files and convert it to DISDRODB netCDF format.
755
- # - If parallel=True, it does that in parallel using dask.bag
756
- # Settings npartitions=len(filepaths) enable to wait prior task on a core
757
- # finish before starting a new one.
758
- if not parallel:
759
- list_logs = []
760
- for filepath in filepaths:
761
- list_logs.append(
762
- _generate_l0b_from_nc(
763
- filepath=filepath,
764
- processed_dir=processed_dir,
765
- station_name=station_name,
766
- # Reader arguments
767
- dict_names=dict_names,
768
- ds_sanitizer_fun=ds_sanitizer_fun,
769
- # Processing options
770
- force=force,
771
- verbose=verbose,
772
- parallel=parallel,
773
- )
774
- )
775
- else:
776
- bag = db.from_sequence(filepaths, npartitions=len(filepaths))
777
- list_logs = bag.map(
778
- _generate_l0b_from_nc,
779
- processed_dir=processed_dir,
780
- station_name=station_name,
781
- # Reader arguments
782
- dict_names=dict_names,
783
- ds_sanitizer_fun=ds_sanitizer_fun,
784
- # Processing options
785
- force=force,
786
- verbose=verbose,
787
- parallel=parallel,
788
- ).compute()
789
-
790
- # -----------------------------------------------------------------.
791
- # Define L0B summary logs
792
- define_summary_log(list_logs)
793
-
794
- # ---------------------------------------------------------------------.
795
- # End L0B processing
796
- if verbose:
797
- timedelta_str = str(datetime.timedelta(seconds=time.time() - t_i))
798
- msg = f"L0B processing of station {station_name} completed in {timedelta_str}"
799
- log_info(logger=logger, msg=msg, verbose=verbose)
800
- return None
801
-
802
-
803
- def run_l0b_concat(processed_dir, station_name, verbose=False):
804
- """Concatenate all L0B netCDF files into a single netCDF file.
805
-
806
- The single netCDF file is saved at <processed_dir>/L0B.
807
- """
808
- from disdrodb.l0.l0b_processing import write_l0b
809
- from disdrodb.utils.netcdf import xr_concat_datasets
810
-
811
- # Create logger
812
- filename = f"concatenatation_{station_name}"
813
- logger = create_file_logger(
814
- processed_dir=processed_dir,
815
- product="L0B",
816
- station_name="", # locate outside the station directory
817
- filename=filename,
818
- parallel=False,
819
- )
820
-
821
- # -------------------------------------------------------------------------.
822
- # Retrieve L0B files
823
- station_dir = define_l0b_station_dir(processed_dir, station_name)
824
- filepaths = list_files(station_dir, glob_pattern="*.nc", recursive=True)
825
- filepaths = sorted(filepaths)
826
-
827
- # -------------------------------------------------------------------------.
828
- # Check there are at least two files
829
- n_files = len(filepaths)
830
- if n_files == 0:
831
- msg = f"No L0B file is available for concatenation in {station_dir}."
832
- log_error(logger=logger, msg=msg, verbose=False)
833
- raise ValueError(msg)
834
-
835
- if n_files == 1:
836
- msg = f"Only a single file is available for concatenation in {station_dir}."
837
- log_warning(logger=logger, msg=msg, verbose=verbose)
838
-
839
- # -------------------------------------------------------------------------.
840
- # Concatenate the files
841
- ds = xr_concat_datasets(filepaths)
842
-
843
- # -------------------------------------------------------------------------.
844
- # Define the filepath of the concatenated L0B netCDF
845
- single_nc_filepath = define_l0b_filepath(ds, processed_dir, station_name, l0b_concat=True)
846
- force = True # TODO add as argument
847
- write_l0b(ds, filepath=single_nc_filepath, force=force)
848
-
849
- # -------------------------------------------------------------------------.
850
- # Close file and delete
851
- ds.close()
852
- del ds
853
-
854
- # -------------------------------------------------------------------------.
855
- # Close the file logger
856
- close_logger(logger)
857
-
858
- # Return the dataset
859
- return None
860
-
861
-
862
- ####--------------------------------------------------------------------------.
863
- #### DISDRODB Station Functions
864
-
865
-
866
- def run_l0a_station(
867
- # Station arguments
868
- data_source,
869
- campaign_name,
870
- station_name,
871
- # Processing options
872
- force: bool = False,
873
- verbose: bool = False,
874
- debugging_mode: bool = False,
875
- parallel: bool = True,
876
- base_dir: str = None,
877
- ):
878
- """
879
- Run the L0A processing of a specific DISDRODB station when invoked from the terminal.
880
-
881
- This function is intended to be called through the ``disdrodb_run_l0a_station``
882
- command-line interface.
883
-
884
- Parameters
885
- ----------
886
- data_source : str
887
- The name of the institution (for campaigns spanning multiple countries) or
888
- the name of the country (for campaigns or sensor networks within a single country).
889
- Must be provided in UPPER CASE.
890
- campaign_name : str
891
- The name of the campaign. Must be provided in UPPER CASE.
892
- station_name : str
893
- The name of the station.
894
- force : bool, optional
895
- If ``True``, existing data in the destination directories will be overwritten.
896
- If ``False`` (default), an error will be raised if data already exists in the destination directories.
897
- verbose : bool, optional
898
- If ``True`` (default), detailed processing information will be printed to the terminal.
899
- If ``False``, less information will be displayed.
900
- parallel : bool, optional
901
- If ``True``, files will be processed in multiple processes simultaneously
902
- with each process using a single thread.
903
- If ``False`` (default), files will be processed sequentially in a single process,
904
- and multi-threading will be automatically exploited to speed up I/O tasks.
905
- debugging_mode : bool, optional
906
- If ``True``, the amount of data processed will be reduced.
907
- Only the first 3 raw data files will be processed. By default, ``False``.
908
- base_dir : str, optional
909
- The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``.
910
- If not specified, the path specified in the DISDRODB active configuration will be used.
911
- """
912
- base_dir = get_base_dir(base_dir)
913
- reader = get_station_reader_function(
914
- base_dir=base_dir,
915
- data_source=data_source,
916
- campaign_name=campaign_name,
917
- station_name=station_name,
918
- )
919
- # Define campaign raw_dir and process_dir
920
- raw_dir = define_campaign_dir(
921
- base_dir=base_dir,
922
- product="RAW",
923
- data_source=data_source,
924
- campaign_name=campaign_name,
925
- )
926
- processed_dir = define_campaign_dir(
927
- base_dir=base_dir,
928
- product="L0A", # also works for raw netCDFs
929
- data_source=data_source,
930
- campaign_name=campaign_name,
931
- )
932
- # Run L0A processing
933
- # --> The reader call the run_l0a within the custom defined reader function
934
- # --> For the special case of raw netCDF data, it calls the run_l0b_from_nc function
935
- reader(
936
- raw_dir=raw_dir,
937
- processed_dir=processed_dir,
938
- station_name=station_name,
939
- # Processing options
940
- force=force,
941
- verbose=verbose,
942
- debugging_mode=debugging_mode,
943
- parallel=parallel,
944
- )
945
-
946
-
947
- def run_l0b_station(
948
- # Station arguments
949
- data_source,
950
- campaign_name,
951
- station_name,
952
- # Processing options
953
- force: bool = False,
954
- verbose: bool = True,
955
- parallel: bool = True,
956
- debugging_mode: bool = False,
957
- remove_l0a: bool = False,
958
- base_dir: str = None,
959
- ):
960
- """
961
- Run the L0B processing of a specific DISDRODB station when invoked from the terminal.
962
-
963
- This function is intended to be called through the ``disdrodb_run_l0b_station``
964
- command-line interface.
965
-
966
- Parameters
967
- ----------
968
- data_source : str
969
- The name of the institution (for campaigns spanning multiple countries) or
970
- the name of the country (for campaigns or sensor networks within a single country).
971
- Must be provided in UPPER CASE.
972
- campaign_name : str
973
- The name of the campaign. Must be provided in UPPER CASE.
974
- station_name : str
975
- The name of the station.
976
- force : bool, optional
977
- If ``True``, existing data in the destination directories will be overwritten.
978
- If ``False`` (default), an error will be raised if data already exists in the destination directories.
979
- verbose : bool, optional
980
- If ``True`` (default), detailed processing information will be printed to the terminal.
981
- If ``False``, less information will be displayed.
982
- parallel : bool, optional
983
- If ``True``, files will be processed in multiple processes simultaneously,
984
- with each process using a single thread to avoid issues with the HDF/netCDF library.
985
- If ``False`` (default), files will be processed sequentially in a single process,
986
- and multi-threading will be automatically exploited to speed up I/O tasks.
987
- debugging_mode : bool, optional
988
- If ``True``, the amount of data processed will be reduced.
989
- Only the first 100 rows of 3 L0A files will be processed. By default, ``False``.
990
- base_dir : str, optional
991
- The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``.
992
- If not specified, the path specified in the DISDRODB active configuration will be used.
993
-
994
- """
995
- # Define campaign processed dir
996
- base_dir = get_base_dir(base_dir)
997
- processed_dir = get_disdrodb_path(
998
- base_dir=base_dir,
999
- product="L0B",
1000
- data_source=data_source,
1001
- campaign_name=campaign_name,
1002
- check_exists=False,
1003
- )
1004
- # Run L0B
1005
- run_l0b(
1006
- processed_dir=processed_dir,
1007
- station_name=station_name,
1008
- # Processing options
1009
- force=force,
1010
- verbose=verbose,
1011
- debugging_mode=debugging_mode,
1012
- parallel=parallel,
1013
- )
1014
-
1015
- if remove_l0a:
1016
- station_dir = define_station_dir(
1017
- base_dir=base_dir,
1018
- product="L0A",
1019
- data_source=data_source,
1020
- campaign_name=campaign_name,
1021
- station_name=station_name,
1022
- )
1023
- log_info(logger=logger, msg="Removal of single L0A files started.", verbose=verbose)
1024
- shutil.rmtree(station_dir)
1025
- log_info(logger=logger, msg="Removal of single L0A files ended.", verbose=verbose)
1026
-
1027
-
1028
- def run_l0b_concat_station(
1029
- # Station arguments
1030
- data_source,
1031
- campaign_name,
1032
- station_name,
1033
- # L0B concat options
1034
- remove_l0b=False,
1035
- verbose=True,
1036
- base_dir: str = None,
1037
- ):
1038
- """Define the L0B file concatenation of a station.
1039
-
1040
- This function is intended to be called through the ``disdrodb_run_l0b_concat station``
1041
- command-line interface.
1042
-
1043
- Parameters
1044
- ----------
1045
- data_source : str
1046
- The name of the institution (for campaigns spanning multiple countries) or
1047
- the name of the country (for campaigns or sensor networks within a single country).
1048
- Must be provided in UPPER CASE.
1049
- campaign_name : str
1050
- The name of the campaign. Must be provided in UPPER CASE.
1051
- station_name : str
1052
- The name of the station.
1053
- verbose : bool, optional
1054
- If ``True`` (default), detailed processing information will be printed to the terminal.
1055
- If ``False``, less information will be displayed.
1056
- base_dir : str, optional
1057
- The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``.
1058
- If not specified, the path specified in the DISDRODB active configuration will be used.
1059
-
1060
- """
1061
- # Retrieve processed_dir
1062
- base_dir = get_base_dir(base_dir)
1063
- processed_dir = get_disdrodb_path(
1064
- base_dir=base_dir,
1065
- product="L0B",
1066
- data_source=data_source,
1067
- campaign_name=campaign_name,
1068
- check_exists=True,
1069
- )
1070
-
1071
- # Run concatenation
1072
- run_l0b_concat(
1073
- processed_dir=processed_dir,
1074
- station_name=station_name,
1075
- verbose=verbose,
1076
- )
1077
-
1078
- if remove_l0b:
1079
- station_dir = define_station_dir(
1080
- base_dir=base_dir,
1081
- product="L0B",
1082
- data_source=data_source,
1083
- campaign_name=campaign_name,
1084
- station_name=station_name,
1085
- )
1086
- log_info(logger=logger, msg="Removal of single L0B files started.", verbose=verbose)
1087
- shutil.rmtree(station_dir)
1088
- log_info(logger=logger, msg="Removal of single L0B files ended.", verbose=verbose)
1089
-
1090
-
1091
- ####---------------------------------------------------------------------------.