disdrodb 0.0.20__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. disdrodb/__init__.py +132 -15
  2. disdrodb/_config.py +4 -2
  3. disdrodb/_version.py +9 -4
  4. disdrodb/api/checks.py +264 -237
  5. disdrodb/api/configs.py +4 -8
  6. disdrodb/api/create_directories.py +235 -290
  7. disdrodb/api/info.py +217 -26
  8. disdrodb/api/io.py +295 -269
  9. disdrodb/api/path.py +597 -173
  10. disdrodb/api/search.py +486 -0
  11. disdrodb/{metadata/scripts → cli}/disdrodb_check_metadata_archive.py +12 -7
  12. disdrodb/{utils/pandas.py → cli/disdrodb_data_archive_directory.py} +9 -18
  13. disdrodb/cli/disdrodb_download_archive.py +86 -0
  14. disdrodb/cli/disdrodb_download_metadata_archive.py +53 -0
  15. disdrodb/cli/disdrodb_download_station.py +84 -0
  16. disdrodb/{api/scripts → cli}/disdrodb_initialize_station.py +22 -10
  17. disdrodb/cli/disdrodb_metadata_archive_directory.py +32 -0
  18. disdrodb/{data_transfer/scripts/disdrodb_download_station.py → cli/disdrodb_open_data_archive.py} +22 -22
  19. disdrodb/cli/disdrodb_open_logs_directory.py +69 -0
  20. disdrodb/{data_transfer/scripts/disdrodb_upload_station.py → cli/disdrodb_open_metadata_archive.py} +22 -24
  21. disdrodb/cli/disdrodb_open_metadata_directory.py +71 -0
  22. disdrodb/cli/disdrodb_open_product_directory.py +74 -0
  23. disdrodb/cli/disdrodb_open_readers_directory.py +32 -0
  24. disdrodb/{l0/scripts → cli}/disdrodb_run_l0.py +38 -31
  25. disdrodb/{l0/scripts → cli}/disdrodb_run_l0_station.py +32 -30
  26. disdrodb/{l0/scripts → cli}/disdrodb_run_l0a.py +30 -21
  27. disdrodb/{l0/scripts → cli}/disdrodb_run_l0a_station.py +24 -33
  28. disdrodb/{l0/scripts → cli}/disdrodb_run_l0b.py +30 -21
  29. disdrodb/{l0/scripts → cli}/disdrodb_run_l0b_station.py +25 -34
  30. disdrodb/cli/disdrodb_run_l0c.py +130 -0
  31. disdrodb/cli/disdrodb_run_l0c_station.py +129 -0
  32. disdrodb/cli/disdrodb_run_l1.py +122 -0
  33. disdrodb/cli/disdrodb_run_l1_station.py +121 -0
  34. disdrodb/cli/disdrodb_run_l2e.py +122 -0
  35. disdrodb/cli/disdrodb_run_l2e_station.py +122 -0
  36. disdrodb/cli/disdrodb_run_l2m.py +122 -0
  37. disdrodb/cli/disdrodb_run_l2m_station.py +122 -0
  38. disdrodb/cli/disdrodb_upload_archive.py +105 -0
  39. disdrodb/cli/disdrodb_upload_station.py +98 -0
  40. disdrodb/configs.py +90 -25
  41. disdrodb/data_transfer/__init__.py +22 -0
  42. disdrodb/data_transfer/download_data.py +87 -90
  43. disdrodb/data_transfer/upload_data.py +64 -37
  44. disdrodb/data_transfer/zenodo.py +15 -18
  45. disdrodb/docs.py +1 -1
  46. disdrodb/issue/__init__.py +17 -4
  47. disdrodb/issue/checks.py +10 -23
  48. disdrodb/issue/reader.py +9 -12
  49. disdrodb/issue/writer.py +14 -17
  50. disdrodb/l0/__init__.py +17 -26
  51. disdrodb/l0/check_configs.py +35 -23
  52. disdrodb/l0/check_standards.py +32 -42
  53. disdrodb/l0/configs/{Thies_LPM → LPM}/bins_diameter.yml +44 -44
  54. disdrodb/l0/configs/{Thies_LPM → LPM}/bins_velocity.yml +40 -40
  55. disdrodb/l0/configs/LPM/l0a_encodings.yml +80 -0
  56. disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_cf_attrs.yml +62 -59
  57. disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_encodings.yml +9 -9
  58. disdrodb/l0/configs/{Thies_LPM → LPM}/raw_data_format.yml +245 -245
  59. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_diameter.yml +66 -66
  60. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_velocity.yml +64 -64
  61. disdrodb/l0/configs/PARSIVEL/l0a_encodings.yml +32 -0
  62. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_cf_attrs.yml +22 -20
  63. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_encodings.yml +17 -17
  64. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/raw_data_format.yml +77 -77
  65. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_diameter.yml +64 -64
  66. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_velocity.yml +64 -64
  67. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +39 -0
  68. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_cf_attrs.yml +24 -22
  69. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_encodings.yml +20 -20
  70. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/raw_data_format.yml +98 -98
  71. disdrodb/l0/configs/{RD_80 → RD80}/bins_diameter.yml +40 -40
  72. disdrodb/l0/configs/RD80/l0a_encodings.yml +16 -0
  73. disdrodb/l0/configs/{RD_80 → RD80}/l0b_cf_attrs.yml +3 -3
  74. disdrodb/l0/configs/RD80/l0b_encodings.yml +135 -0
  75. disdrodb/l0/configs/{RD_80 → RD80}/raw_data_format.yml +48 -48
  76. disdrodb/l0/l0_reader.py +216 -340
  77. disdrodb/l0/l0a_processing.py +237 -208
  78. disdrodb/l0/l0b_nc_processing.py +227 -80
  79. disdrodb/l0/l0b_processing.py +93 -173
  80. disdrodb/l0/l0c_processing.py +627 -0
  81. disdrodb/l0/readers/{ARM → LPM/ARM}/ARM_LPM.py +36 -58
  82. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +226 -0
  83. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +185 -0
  84. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +183 -0
  85. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +179 -0
  86. disdrodb/l0/readers/{UK → LPM/UK}/DIVEN.py +14 -35
  87. disdrodb/l0/readers/PARSIVEL/AUSTRALIA/MELBOURNE_2007_PARSIVEL.py +157 -0
  88. disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +113 -0
  89. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/ARCTIC_2021.py +40 -57
  90. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/COMMON_2011.py +37 -54
  91. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/DAVOS_2009_2011.py +34 -51
  92. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_2009.py +34 -51
  93. disdrodb/l0/readers/{EPFL/PARADISO_2014.py → PARSIVEL/EPFL/EPFL_ROOF_2008.py} +38 -50
  94. disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +105 -0
  95. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2011.py +34 -51
  96. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2012.py +33 -51
  97. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GENEPI_2007.py +25 -44
  98. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007.py +25 -44
  99. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007_2.py +25 -44
  100. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HPICONET_2010.py +34 -51
  101. disdrodb/l0/readers/{EPFL/EPFL_ROOF_2010.py → PARSIVEL/EPFL/HYMEX_LTE_SOP2.py} +37 -50
  102. disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +111 -0
  103. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HYMEX_LTE_SOP4.py +36 -54
  104. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2018.py +34 -52
  105. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2019.py +38 -56
  106. disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +105 -0
  107. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PARSIVEL_2007.py +27 -45
  108. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PLATO_2019.py +24 -44
  109. disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +140 -0
  110. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RACLETS_2019_WJF.py +41 -59
  111. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RIETHOLZBACH_2011.py +34 -51
  112. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +117 -0
  113. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +137 -0
  114. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/UNIL_2022.py +42 -55
  115. disdrodb/l0/readers/PARSIVEL/GPM/IFLOODS.py +104 -0
  116. disdrodb/l0/readers/{GPM → PARSIVEL/GPM}/LPVEX.py +29 -48
  117. disdrodb/l0/readers/PARSIVEL/GPM/MC3E.py +184 -0
  118. disdrodb/l0/readers/PARSIVEL/NCAR/CCOPE_2015.py +113 -0
  119. disdrodb/l0/readers/{NCAR/VORTEX_SE_2016_P1.py → PARSIVEL/NCAR/OWLES_MIPS.py} +46 -72
  120. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +125 -0
  121. disdrodb/l0/readers/{NCAR/OWLES_MIPS.py → PARSIVEL/NCAR/PLOWS_MIPS.py} +45 -64
  122. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +114 -0
  123. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +176 -0
  124. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +183 -0
  125. disdrodb/l0/readers/{ARM/ARM_LD.py → PARSIVEL2/ARM/ARM_PARSIVEL2.py} +27 -50
  126. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +163 -0
  127. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +163 -0
  128. disdrodb/l0/readers/{DENMARK → PARSIVEL2/DENMARK}/EROSION_nc.py +14 -35
  129. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +119 -0
  130. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +104 -0
  131. disdrodb/l0/readers/PARSIVEL2/GPM/NSSTC.py +176 -0
  132. disdrodb/l0/readers/PARSIVEL2/ITALY/GID_PARSIVEL2.py +32 -0
  133. disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +56 -0
  134. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +120 -0
  135. disdrodb/l0/readers/{NCAR → PARSIVEL2/NCAR}/PECAN_MIPS.py +45 -64
  136. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +181 -0
  137. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +160 -0
  138. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +160 -0
  139. disdrodb/l0/readers/{NCAR/PLOWS_MIPS.py → PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py} +49 -66
  140. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +118 -0
  141. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +152 -0
  142. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT.py +166 -0
  143. disdrodb/l0/readers/{NCAR/RELAMPAGO_RD80.py → RD80/BRAZIL/CHUVA_RD80.py} +36 -60
  144. disdrodb/l0/readers/{BRAZIL → RD80/BRAZIL}/GOAMAZON_RD80.py +36 -55
  145. disdrodb/l0/readers/{NCAR → RD80/NCAR}/CINDY_2011_RD80.py +35 -54
  146. disdrodb/l0/readers/{BRAZIL/CHUVA_RD80.py → RD80/NCAR/RELAMPAGO_RD80.py} +40 -54
  147. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +62 -0
  148. disdrodb/l0/readers/{reader_template.py → template_reader_raw_text_data.py} +20 -44
  149. disdrodb/l0/routines.py +885 -581
  150. disdrodb/l0/standards.py +72 -236
  151. disdrodb/l0/template_tools.py +104 -109
  152. disdrodb/l1/__init__.py +17 -0
  153. disdrodb/l1/beard_model.py +716 -0
  154. disdrodb/l1/encoding_attrs.py +620 -0
  155. disdrodb/l1/fall_velocity.py +260 -0
  156. disdrodb/l1/filters.py +192 -0
  157. disdrodb/l1/processing.py +200 -0
  158. disdrodb/l1/resampling.py +236 -0
  159. disdrodb/l1/routines.py +357 -0
  160. disdrodb/l1_env/__init__.py +17 -0
  161. disdrodb/l1_env/routines.py +38 -0
  162. disdrodb/l2/__init__.py +17 -0
  163. disdrodb/l2/empirical_dsd.py +1735 -0
  164. disdrodb/l2/event.py +388 -0
  165. disdrodb/l2/processing.py +519 -0
  166. disdrodb/l2/processing_options.py +213 -0
  167. disdrodb/l2/routines.py +868 -0
  168. disdrodb/metadata/__init__.py +9 -2
  169. disdrodb/metadata/checks.py +165 -118
  170. disdrodb/metadata/download.py +81 -0
  171. disdrodb/metadata/geolocation.py +146 -0
  172. disdrodb/metadata/info.py +20 -13
  173. disdrodb/metadata/manipulation.py +1 -1
  174. disdrodb/metadata/reader.py +59 -8
  175. disdrodb/metadata/search.py +77 -144
  176. disdrodb/metadata/standards.py +7 -8
  177. disdrodb/metadata/writer.py +8 -14
  178. disdrodb/psd/__init__.py +38 -0
  179. disdrodb/psd/fitting.py +2146 -0
  180. disdrodb/psd/models.py +774 -0
  181. disdrodb/routines.py +1176 -0
  182. disdrodb/scattering/__init__.py +28 -0
  183. disdrodb/scattering/axis_ratio.py +344 -0
  184. disdrodb/scattering/routines.py +456 -0
  185. disdrodb/utils/__init__.py +17 -0
  186. disdrodb/utils/attrs.py +208 -0
  187. disdrodb/utils/cli.py +269 -0
  188. disdrodb/utils/compression.py +60 -42
  189. disdrodb/utils/dask.py +62 -0
  190. disdrodb/utils/decorators.py +110 -0
  191. disdrodb/utils/directories.py +107 -46
  192. disdrodb/utils/encoding.py +127 -0
  193. disdrodb/utils/list.py +29 -0
  194. disdrodb/utils/logger.py +168 -46
  195. disdrodb/utils/time.py +657 -0
  196. disdrodb/utils/warnings.py +30 -0
  197. disdrodb/utils/writer.py +57 -0
  198. disdrodb/utils/xarray.py +138 -47
  199. disdrodb/utils/yaml.py +0 -1
  200. disdrodb/viz/__init__.py +17 -0
  201. disdrodb/viz/plots.py +17 -0
  202. disdrodb-0.1.0.dist-info/METADATA +321 -0
  203. disdrodb-0.1.0.dist-info/RECORD +216 -0
  204. {disdrodb-0.0.20.dist-info → disdrodb-0.1.0.dist-info}/WHEEL +1 -1
  205. disdrodb-0.1.0.dist-info/entry_points.txt +30 -0
  206. disdrodb/data_transfer/scripts/disdrodb_download_archive.py +0 -53
  207. disdrodb/data_transfer/scripts/disdrodb_upload_archive.py +0 -57
  208. disdrodb/l0/configs/OTT_Parsivel/l0a_encodings.yml +0 -32
  209. disdrodb/l0/configs/OTT_Parsivel2/l0a_encodings.yml +0 -39
  210. disdrodb/l0/configs/RD_80/l0a_encodings.yml +0 -16
  211. disdrodb/l0/configs/RD_80/l0b_encodings.yml +0 -135
  212. disdrodb/l0/configs/Thies_LPM/l0a_encodings.yml +0 -80
  213. disdrodb/l0/io.py +0 -257
  214. disdrodb/l0/l0_processing.py +0 -1091
  215. disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_OTT.py +0 -178
  216. disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_THIES.py +0 -247
  217. disdrodb/l0/readers/BRAZIL/CHUVA_LPM.py +0 -204
  218. disdrodb/l0/readers/BRAZIL/CHUVA_OTT.py +0 -183
  219. disdrodb/l0/readers/BRAZIL/GOAMAZON_LPM.py +0 -204
  220. disdrodb/l0/readers/BRAZIL/GOAMAZON_OTT.py +0 -183
  221. disdrodb/l0/readers/CHINA/CHONGQING.py +0 -131
  222. disdrodb/l0/readers/EPFL/EPFL_ROOF_2008.py +0 -128
  223. disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP2.py +0 -127
  224. disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP3.py +0 -129
  225. disdrodb/l0/readers/EPFL/RACLETS_2019.py +0 -158
  226. disdrodb/l0/readers/EPFL/SAMOYLOV_2017.py +0 -136
  227. disdrodb/l0/readers/EPFL/SAMOYLOV_2019.py +0 -158
  228. disdrodb/l0/readers/FRANCE/SIRTA_OTT2.py +0 -138
  229. disdrodb/l0/readers/GPM/GCPEX.py +0 -123
  230. disdrodb/l0/readers/GPM/IFLOODS.py +0 -123
  231. disdrodb/l0/readers/GPM/MC3E.py +0 -123
  232. disdrodb/l0/readers/GPM/NSSTC.py +0 -164
  233. disdrodb/l0/readers/ITALY/GID.py +0 -199
  234. disdrodb/l0/readers/MEXICO/OH_IIUNAM_nc.py +0 -92
  235. disdrodb/l0/readers/NCAR/CCOPE_2015.py +0 -133
  236. disdrodb/l0/readers/NCAR/PECAN_FP3.py +0 -137
  237. disdrodb/l0/readers/NCAR/PECAN_MOBILE.py +0 -144
  238. disdrodb/l0/readers/NCAR/RELAMPAGO_OTT.py +0 -195
  239. disdrodb/l0/readers/NCAR/SNOWIE_PJ.py +0 -172
  240. disdrodb/l0/readers/NCAR/SNOWIE_SB.py +0 -179
  241. disdrodb/l0/readers/NCAR/VORTEX2_2009.py +0 -133
  242. disdrodb/l0/readers/NCAR/VORTEX2_2010.py +0 -188
  243. disdrodb/l0/readers/NCAR/VORTEX2_2010_UF.py +0 -191
  244. disdrodb/l0/readers/NCAR/VORTEX_SE_2016_P2.py +0 -135
  245. disdrodb/l0/readers/NCAR/VORTEX_SE_2016_PIPS.py +0 -170
  246. disdrodb/l0/readers/NETHERLANDS/DELFT.py +0 -187
  247. disdrodb/l0/readers/SPAIN/SBEGUERIA.py +0 -179
  248. disdrodb/l0/scripts/disdrodb_run_l0b_concat.py +0 -93
  249. disdrodb/l0/scripts/disdrodb_run_l0b_concat_station.py +0 -85
  250. disdrodb/utils/netcdf.py +0 -452
  251. disdrodb/utils/scripts.py +0 -102
  252. disdrodb-0.0.20.dist-info/AUTHORS.md +0 -18
  253. disdrodb-0.0.20.dist-info/METADATA +0 -186
  254. disdrodb-0.0.20.dist-info/RECORD +0 -168
  255. disdrodb-0.0.20.dist-info/entry_points.txt +0 -15
  256. /disdrodb/l0/configs/{RD_80 → RD80}/bins_velocity.yml +0 -0
  257. /disdrodb/l0/manuals/{Thies_LPM.pdf → LPM.pdf} +0 -0
  258. /disdrodb/l0/manuals/{ODM_470.pdf → ODM470.pdf} +0 -0
  259. /disdrodb/l0/manuals/{OTT_Parsivel.pdf → PARSIVEL.pdf} +0 -0
  260. /disdrodb/l0/manuals/{OTT_Parsivel2.pdf → PARSIVEL2.pdf} +0 -0
  261. /disdrodb/l0/manuals/{PWS_100.pdf → PWS100.pdf} +0 -0
  262. /disdrodb/l0/manuals/{RD_80.pdf → RD80.pdf} +0 -0
  263. {disdrodb-0.0.20.dist-info → disdrodb-0.1.0.dist-info/licenses}/LICENSE +0 -0
  264. {disdrodb-0.0.20.dist-info → disdrodb-0.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,627 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """Functions to process DISDRODB L0B files into DISDRODB L0C netCDF files."""
20
+ import logging
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from disdrodb.api.checks import check_measurement_intervals
26
+ from disdrodb.api.info import get_start_end_time_from_filepaths
27
+ from disdrodb.l1.resampling import add_sample_interval
28
+ from disdrodb.utils.logger import log_warning # , log_info
29
+ from disdrodb.utils.time import (
30
+ ensure_sorted_by_time,
31
+ regularize_timesteps,
32
+ )
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ TOLERANCE_SECONDS = 120
38
+
39
+
40
+ def get_files_per_days(filepaths):
41
+ """
42
+ Organize files by the days they cover based on their start and end times.
43
+
44
+ Parameters
45
+ ----------
46
+ filepaths : list of str
47
+ List of file paths to be processed.
48
+
49
+ Returns
50
+ -------
51
+ dict
52
+ Dictionary where keys are days (as strings) and values are lists of file paths
53
+ that cover those days.
54
+
55
+ Notes
56
+ -----
57
+ This function adds a tolerance of 60 seconds to account for imprecise time logging by the sensors.
58
+ """
59
+ # Retrieve file start_time and end_time
60
+ files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
61
+
62
+ # Add tolerance to account for imprecise time logging by the sensors
63
+ # - Example: timestep 23:59:30 might be 00.00 and goes into the next day file ...
64
+ files_start_time = files_start_time - np.array(TOLERANCE_SECONDS, dtype="m8[s]")
65
+ files_end_time = files_end_time + np.array(TOLERANCE_SECONDS, dtype="m8[s]")
66
+
67
+ # Retrieve file start day and end day
68
+ start_day = files_start_time.min().astype("M8[D]")
69
+ end_day = files_end_time.max().astype("M8[D]") + np.array(1, dtype="m8[D]")
70
+
71
+ # Create an array with all days in time period covered by the files
72
+ list_days = np.asanyarray(pd.date_range(start=start_day, end=end_day, freq="D")).astype("M8[D]")
73
+
74
+ # Expand dimension to match each day using broadcasting
75
+ files_start_time = files_start_time.astype("M8[D]")[:, np.newaxis] # shape (n_files, 1)
76
+ files_end_time = files_end_time.astype("M8[D]")[:, np.newaxis] # shape (n_files, 1)
77
+
78
+ # Create an array of all days
79
+ # - Expand dimension to match each day using broadcasting
80
+ days = list_days[np.newaxis, :] # shape (1, n_days)
81
+
82
+ # Use broadcasting to create a boolean matrix indicating which files cover which days
83
+ mask = (files_start_time <= days) & (files_end_time >= days) # shape (n_files, n_days)
84
+
85
+ # Build a mapping from days to file indices
86
+ # For each day (column), find the indices of files (rows) that cover that day
87
+ dict_days = {}
88
+ filepaths = np.array(filepaths)
89
+ for i, day in enumerate(list_days):
90
+ file_indices = np.where(mask[:, i])[0]
91
+ if file_indices.size > 0:
92
+ dict_days[str(day)] = filepaths[file_indices].tolist()
93
+
94
+ return dict_days
95
+
96
+
97
+ def retrieve_possible_measurement_intervals(metadata):
98
+ """Retrieve list of possible measurements intervals."""
99
+ measurement_intervals = metadata.get("measurement_interval", [])
100
+ return check_measurement_intervals(measurement_intervals)
101
+
102
+
103
+ def drop_timesteps_with_invalid_sample_interval(ds, measurement_intervals, verbose=True, logger=None):
104
+ """Drop timesteps with unexpected sample intervals."""
105
+ # TODO
106
+ # - correct logged sample_interval for trailing seconds. Example (58,59,61,62) converted to 60 s ?
107
+ # - Need to know more how Parsivel software computes sample_interval variable ...
108
+
109
+ # Retrieve logged sample_interval
110
+ sample_interval = ds["sample_interval"].compute().data
111
+ timesteps = ds["time"].compute().data
112
+ is_valid_sample_interval = np.isin(sample_interval.data, measurement_intervals)
113
+ indices_invalid_sample_interval = np.where(~is_valid_sample_interval)[0]
114
+ if len(indices_invalid_sample_interval) > 0:
115
+ # Log information for each invalid timestep
116
+ invalid_timesteps = pd.to_datetime(timesteps[indices_invalid_sample_interval]).strftime("%Y-%m-%d %H:%M:%S")
117
+ invalid_sample_intervals = sample_interval[indices_invalid_sample_interval]
118
+ for tt, ss in zip(invalid_timesteps, invalid_sample_intervals):
119
+ msg = f"Unexpected sampling interval ({ss} s) at {tt}. The measurement has been dropped."
120
+ log_warning(logger=logger, msg=msg, verbose=verbose)
121
+ # Remove timesteps with invalid sample intervals
122
+ indices_valid_sample_interval = np.where(is_valid_sample_interval)[0]
123
+ ds = ds.isel(time=indices_valid_sample_interval)
124
+ return ds
125
+
126
+
127
+ def split_dataset_by_sampling_intervals(ds, measurement_intervals, min_sample_interval=10, min_block_size=5):
128
+ """
129
+ Split a dataset into subsets where each subset has a consistent sampling interval.
130
+
131
+ Parameters
132
+ ----------
133
+ ds : xarray.Dataset
134
+ The input dataset with a 'time' dimension.
135
+ measurement_intervals : list or array-like
136
+ A list of possible primary sampling intervals (in seconds) that the dataset might have.
137
+ min_sample_interval : int, optional
138
+ The minimum expected sampling interval in seconds. Defaults to 10s.
139
+ min_block_size : float, optional
140
+ The minimum number of timesteps with a given sampling interval to be considered.
141
+ Otherwise such portion of data is discarded !
142
+ Defaults to 5 timesteps.
143
+
144
+ Returns
145
+ -------
146
+ dict
147
+ A dictionary where keys are the identified sampling intervals (in seconds),
148
+ and values are xarray.Datasets containing only data from those intervals.
149
+ """
150
+ # Define array of possible measurement intervals
151
+ measurement_intervals = np.array(measurement_intervals)
152
+
153
+ # If a single measurement interval expected, return dictionary with input dataset
154
+ if len(measurement_intervals) == 1:
155
+ dict_ds = {measurement_intervals[0]: ds}
156
+ return dict_ds
157
+
158
+ # Check sorted by time and sort if necessary
159
+ ds = ensure_sorted_by_time(ds)
160
+
161
+ # Calculate time differences in seconds
162
+ deltadt = np.diff(ds["time"].data).astype("timedelta64[s]").astype(int)
163
+
164
+ # Round each delta to the nearest multiple of 5 (because the smallest possible sample interval is 10 s)
165
+ # - This account for possible trailing seconds of the logger
166
+ # Example: for sample_interval = 10, deltat values like 8, 9, 11, 12 become 10 ...
167
+ # Example: for sample_interval = 10, deltat values like 6, 7 or 13, 14 become respectively 5 and 15 ...
168
+ # Example: for sample_interval = 30, deltat values like 28,29,30,31,32 deltat become 30 ...
169
+ # Example: for sample_interval = 30, deltat values like 26, 27 or 33, 34 become respectively 25 and 35 ...
170
+ min_half_sample_interval = min_sample_interval / 2
171
+ deltadt = np.round(deltadt / min_half_sample_interval) * min_half_sample_interval
172
+
173
+ # Map each delta to one of the possible_measurement_intervals if exact match, otherwise np.nan
174
+ mapped_intervals = np.where(np.isin(deltadt, measurement_intervals), deltadt, np.nan)
175
+ if np.all(np.isnan(mapped_intervals)):
176
+ raise ValueError("Impossible to identify timesteps with expected sampling intervals.")
177
+
178
+ # Infill np.nan values by using neighbor intervals
179
+ # Forward fill
180
+ for i in range(1, len(mapped_intervals)):
181
+ if np.isnan(mapped_intervals[i]):
182
+ mapped_intervals[i] = mapped_intervals[i - 1]
183
+
184
+ # Backward fill (in case the first entries were np.nan)
185
+ for i in range(len(mapped_intervals) - 2, -1, -1):
186
+ if np.isnan(mapped_intervals[i]):
187
+ mapped_intervals[i] = mapped_intervals[i + 1]
188
+
189
+ # Now all intervals are assigned to one of the possible measurement_intervals.
190
+ # Identify boundaries where interval changes
191
+ change_points = np.where(mapped_intervals[:-1] != mapped_intervals[1:])[0] + 1
192
+
193
+ # Split ds into segments according to change_points
194
+ segments = np.split(np.arange(ds.sizes["time"]), change_points)
195
+
196
+ # Remove segments with less than 10 points
197
+ segments = [seg for seg in segments if len(seg) >= min_block_size]
198
+ if len(segments) == 0:
199
+ raise ValueError(
200
+ f"No blocks of {min_block_size} consecutive timesteps with constant sampling interval are available.",
201
+ )
202
+
203
+ # Define dataset indices for each sampling interva
204
+ dict_sampling_interval_indices = {}
205
+ for seg in segments:
206
+ # Define the assumed sampling interval of such segment
207
+ start_idx = seg[0]
208
+ segment_sampling_interval = int(mapped_intervals[start_idx])
209
+ if segment_sampling_interval not in dict_sampling_interval_indices:
210
+ dict_sampling_interval_indices[segment_sampling_interval] = [seg]
211
+ else:
212
+ dict_sampling_interval_indices[segment_sampling_interval].append(seg)
213
+ dict_sampling_interval_indices = {
214
+ k: np.concatenate(list_indices) for k, list_indices in dict_sampling_interval_indices.items()
215
+ }
216
+
217
+ # Define dictionary of datasets
218
+ dict_ds = {k: ds.isel(time=indices) for k, indices in dict_sampling_interval_indices.items()}
219
+ return dict_ds
220
+
221
+
222
+ def has_same_value_over_time(da):
223
+ """
224
+ Check if a DataArray has the same value over all timesteps, considering NaNs as equal.
225
+
226
+ Parameters
227
+ ----------
228
+ da : xarray.DataArray
229
+ The DataArray to check. Must have a 'time' dimension.
230
+
231
+ Returns
232
+ -------
233
+ bool
234
+ True if the values are the same (or NaN in the same positions) across all timesteps,
235
+ False otherwise.
236
+ """
237
+ # Select the first timestep
238
+ da_first = da.isel(time=0)
239
+
240
+ # Create a boolean array that identifies where values are equal or both NaN
241
+ equal_or_nan = (da == da_first) | (da.isnull() & da_first.isnull()) # noqa: PD003
242
+
243
+ # Check if all values match this condition across all dimensions
244
+ return bool(equal_or_nan.all().item())
245
+
246
+
247
+ def remove_duplicated_timesteps(ds, ensure_variables_equality=True, logger=None, verbose=True):
248
+ """Removes duplicated timesteps from a xarray dataset."""
249
+ # Check for duplicated timesteps
250
+ timesteps, counts = np.unique(ds["time"].data, return_counts=True)
251
+ duplicated_timesteps = timesteps[counts > 1]
252
+
253
+ # If no duplicated timesteps, returns dataset as is
254
+ if len(duplicated_timesteps) == 0:
255
+ return ds
256
+
257
+ # If there are duplicated timesteps
258
+ # - First check for variables equality
259
+ # - Keep first occurrence of duplicated timesteps if values are equals
260
+ # - Drop duplicated timesteps where values are different
261
+ different_duplicated_timesteps = []
262
+ equal_duplicated_timesteps = []
263
+ for t in duplicated_timesteps:
264
+ # Select dataset at given duplicated timestep
265
+ ds_duplicated = ds.sel(time=t)
266
+ n_t = len(ds_duplicated["time"])
267
+
268
+ # Check raw_drop_number equality
269
+ if not has_same_value_over_time(ds_duplicated["raw_drop_number"]):
270
+ different_duplicated_timesteps.append(t)
271
+ msg = (
272
+ f"Presence of {n_t} duplicated timesteps at {t}."
273
+ "They have different 'raw_drop_number' values. These timesteps are dropped."
274
+ )
275
+ log_warning(logger=logger, msg=msg, verbose=verbose)
276
+
277
+ # Check other variables equality
278
+ other_variables_to_check = [v for v in ds.data_vars if v != "raw_drop_number"]
279
+ variables_with_different_values = [
280
+ var for var in other_variables_to_check if not has_same_value_over_time(ds_duplicated[var])
281
+ ]
282
+ if len(variables_with_different_values) > 0:
283
+ msg = (
284
+ f"Presence of {n_t} duplicated timesteps at {t}."
285
+ f"The duplicated timesteps have different values in variables {variables_with_different_values}. "
286
+ )
287
+ if ensure_variables_equality:
288
+ different_duplicated_timesteps.append(t)
289
+ msg = msg + "These timesteps are dropped."
290
+ else:
291
+ equal_duplicated_timesteps.append(t)
292
+ msg = msg + (
293
+ "These timesteps are not dropped because 'raw_drop_number' values are equals."
294
+ "'ensure_variables_equality' is False."
295
+ )
296
+ log_warning(logger=logger, msg=msg, verbose=verbose)
297
+ else:
298
+ equal_duplicated_timesteps.append(t)
299
+
300
+ # Ensure single occurrence of duplicated timesteps
301
+ equal_duplicated_timesteps = np.unique(equal_duplicated_timesteps)
302
+ different_duplicated_timesteps = np.unique(different_duplicated_timesteps)
303
+
304
+ # - Keep first occurrence of equal_duplicated_timesteps
305
+ if len(equal_duplicated_timesteps) > 0:
306
+ indices_to_drop = [np.where(ds["time"] == t)[0][1:] for t in equal_duplicated_timesteps]
307
+ indices_to_drop = np.concatenate(indices_to_drop)
308
+ # Keep only indices not in indices_to_drop
309
+ mask = ~np.isin(np.arange(ds["time"].size), indices_to_drop)
310
+ ds = ds.isel(time=np.where(mask)[0])
311
+
312
+ # - Drop different_duplicated_timesteps
313
+ if len(different_duplicated_timesteps) > 0:
314
+ mask = np.isin(ds["time"], different_duplicated_timesteps, invert=True)
315
+ ds = ds.isel(time=np.where(mask)[0])
316
+
317
+ return ds
318
+
319
+
320
+ def check_timesteps_regularity(ds, sample_interval, verbose=False, logger=None):
321
+ """Check for the regularity of timesteps."""
322
+ # Check sorted by time and sort if necessary
323
+ ds = ensure_sorted_by_time(ds)
324
+
325
+ # Calculate number of timesteps
326
+ n = len(ds["time"].data)
327
+
328
+ # Calculate time differences in seconds
329
+ deltadt = np.diff(ds["time"].data).astype("timedelta64[s]").astype(int)
330
+
331
+ # Identify unique time intervals and their occurrences
332
+ unique_deltadt, counts = np.unique(deltadt, return_counts=True)
333
+
334
+ # Determine the most frequent time interval (mode)
335
+ most_frequent_deltadt_idx = np.argmax(counts)
336
+ most_frequent_deltadt = unique_deltadt[most_frequent_deltadt_idx]
337
+
338
+ # Count fraction occurrence of deltadt
339
+ fractions = np.round(counts / len(deltadt) * 100, 2)
340
+
341
+ # Compute stats about expected deltadt
342
+ sample_interval_counts = counts[unique_deltadt == sample_interval].item()
343
+ sample_interval_fraction = fractions[unique_deltadt == sample_interval].item()
344
+
345
+ # Compute stats about most frequent deltadt
346
+ most_frequent_deltadt_counts = counts[unique_deltadt == most_frequent_deltadt].item()
347
+ most_frequent_deltadt_fraction = fractions[unique_deltadt == most_frequent_deltadt].item()
348
+
349
+ # Compute stats about unexpected deltadt
350
+ unexpected_intervals = unique_deltadt[unique_deltadt != sample_interval]
351
+ unexpected_intervals_counts = counts[unique_deltadt != sample_interval]
352
+ unexpected_intervals_fractions = fractions[unique_deltadt != sample_interval]
353
+ frequent_unexpected_intervals = unexpected_intervals[unexpected_intervals_fractions > 5]
354
+
355
+ # Report warning if the samplin_interval deltadt occurs less often than 60 % of times
356
+ # -> TODO: maybe only report in stations where the disdro does not log only data when rainy
357
+ if sample_interval_fraction < 60:
358
+ msg = (
359
+ f"The expected (sampling) interval between observations occurs only "
360
+ f"{sample_interval_counts}/{n} times ({sample_interval_fraction} %)."
361
+ )
362
+
363
+ # Report warning if a deltadt occurs more often then the sampling interval
364
+ if most_frequent_deltadt != sample_interval:
365
+ msg = (
366
+ f"The most frequent time interval between observations is {most_frequent_deltadt} s "
367
+ f"(occurs {most_frequent_deltadt_counts}/{n} times) ({most_frequent_deltadt_fraction}%) "
368
+ f"although the expected (sampling) interval is {sample_interval} s "
369
+ f"and occurs {sample_interval_counts}/{n} times ({sample_interval_fraction}%)."
370
+ )
371
+ log_warning(logger=logger, msg=msg, verbose=verbose)
372
+
373
+ # Report with a warning all unexpected deltadt with frequency larger than 5 %
374
+ if len(frequent_unexpected_intervals) > 0:
375
+ msg_parts = ["The following unexpected intervals occur frequently:"]
376
+ for interval in frequent_unexpected_intervals:
377
+ c = unexpected_intervals_counts[unexpected_intervals == interval].item()
378
+ f = unexpected_intervals_fractions[unexpected_intervals == interval].item()
379
+ msg_parts.append(f" {interval} ({f}%) ({c}/{n}) | ")
380
+ msg = " ".join(msg_parts)
381
+
382
+ msg = "The following time intervals between observations occurs often: "
383
+ for interval in frequent_unexpected_intervals:
384
+ c = unexpected_intervals_counts[unexpected_intervals == interval].item()
385
+ f = unexpected_intervals_fractions[unexpected_intervals == interval].item()
386
+ msg = msg + f"{interval} s ({f}%) ({c}/{n})"
387
+ log_warning(logger=logger, msg=msg, verbose=verbose)
388
+ return ds
389
+
390
+
391
+ def finalize_l0c_dataset(ds, sample_interval, start_day, end_day, verbose=True, logger=None):
392
+ """Finalize a L0C dataset with unique sampling interval.
393
+
394
+ It adds the sampling_interval coordinate and it regularizes
395
+ the timesteps for trailing seconds.
396
+ """
397
+ # Add sample interval as coordinate
398
+ ds = add_sample_interval(ds, sample_interval=sample_interval)
399
+
400
+ # Regularize timesteps (for trailing seconds)
401
+ ds = regularize_timesteps(
402
+ ds,
403
+ sample_interval=sample_interval,
404
+ robust=False, # if True, raise error if an error occur during regularization
405
+ add_quality_flag=True,
406
+ verbose=verbose,
407
+ logger=logger,
408
+ )
409
+
410
+ # Performs checks about timesteps regularity
411
+ ds = check_timesteps_regularity(ds=ds, sample_interval=sample_interval, verbose=verbose, logger=logger)
412
+
413
+ # Slice for requested day
414
+ ds = ds.sel({"time": slice(start_day, end_day)})
415
+ return ds
416
+
417
+
418
+ def create_daily_file(day, filepaths, measurement_intervals, ensure_variables_equality=True, logger=None, verbose=True):
419
+ """
420
+ Create a daily file by merging and processing data from multiple filepaths.
421
+
422
+ Parameters
423
+ ----------
424
+ day : str or numpy.datetime64
425
+ The day for which the daily file is to be created.
426
+ Should be in a format that can be converted to numpy.datetime64.
427
+ filepaths : list of str
428
+ List of filepaths to the data files to be processed.
429
+
430
+ Returns
431
+ -------
432
+ xarray.Dataset
433
+ The processed dataset containing data for the specified day.
434
+
435
+ Raises
436
+ ------
437
+ ValueError
438
+ If less than 5 timesteps are available for the specified day.
439
+
440
+ Notes
441
+ -----
442
+ - The function adds a tolerance for searching timesteps
443
+ before and after 00:00 to account for imprecise logging times.
444
+ - It checks that duplicated timesteps have the same raw drop number values.
445
+ - The function infers the time integration sample interval and
446
+ regularizes timesteps to handle trailing seconds.
447
+ - The data is loaded into memory and connections to source files
448
+ are closed before returning the dataset.
449
+ """
450
+ import xarray as xr # Load in each process when function is called !
451
+
452
+ # ---------------------------------------------------------------------------------------.
453
+ # Define start day and end of day
454
+ start_day = np.array(day).astype("M8[D]")
455
+ end_day = start_day + np.array(1, dtype="m8[D]") - np.array(1, dtype="m8[s]") # avoid 00:00 of next day !
456
+
457
+ # Add tolerance for searching timesteps before and after 00:00 to account for imprecise logging time
458
+ # - Example: timestep 23:59:30 that should be 00.00 goes into the next day ...
459
+ start_day_tol = start_day - np.array(TOLERANCE_SECONDS, dtype="m8[s]")
460
+ end_day_tol = end_day + np.array(TOLERANCE_SECONDS, dtype="m8[s]")
461
+
462
+ # ---------------------------------------------------------------------------------------.
463
+ # Open files with data within the provided day and concatenate them
464
+ # list_ds = [xr.open_dataset(filepath, chunks={}).sel({"time": slice(start_day_tol, end_day_tol)})
465
+ # for filepath in filepaths]
466
+ list_ds = [
467
+ xr.open_dataset(filepath, decode_timedelta=False, chunks={}, cache=False).sortby("time")
468
+ for filepath in filepaths
469
+ ]
470
+ list_ds = [ds.sel({"time": slice(start_day_tol, end_day_tol)}) for ds in list_ds]
471
+ if len(list_ds) > 1:
472
+ # Concatenate dataset
473
+ # - If some variable are missing in one file, it is filled with NaN. This should not occur anyway.
474
+ # - The resulting dataset can have duplicated timesteps !
475
+ ds = xr.concat(list_ds, dim="time", join="outer", compat="no_conflicts", combine_attrs="override").sortby(
476
+ "time",
477
+ )
478
+ else:
479
+ ds = list_ds[0]
480
+
481
+ # Compute data
482
+ ds = ds.compute()
483
+
484
+ # Close connection to source files
485
+ _ = [ds.close() for ds in list_ds]
486
+ ds.close()
487
+ del list_ds
488
+
489
+ # ---------------------------------------------------------------------------------------.
490
+ # If sample interval is a dataset variable, drop timesteps with unexpected measurement intervals !
491
+ if "sample_interval" in ds:
492
+ ds = drop_timesteps_with_invalid_sample_interval(
493
+ ds=ds,
494
+ measurement_intervals=measurement_intervals,
495
+ verbose=verbose,
496
+ logger=logger,
497
+ )
498
+
499
+ # ---------------------------------------------------------------------------------------.
500
+ # Remove duplicated timesteps
501
+ ds = remove_duplicated_timesteps(
502
+ ds,
503
+ ensure_variables_equality=ensure_variables_equality,
504
+ logger=logger,
505
+ verbose=verbose,
506
+ )
507
+
508
+ # Raise error if less than 3 timesteps left
509
+ n_timesteps = len(ds["time"])
510
+ if n_timesteps < 3:
511
+ raise ValueError(f"{n_timesteps} timesteps left after removing duplicated timesteps.")
512
+
513
+ # ---------------------------------------------------------------------------------------.
514
+ # Split dataset by sampling intervals
515
+ dict_ds = split_dataset_by_sampling_intervals(
516
+ ds=ds,
517
+ measurement_intervals=measurement_intervals,
518
+ min_sample_interval=10,
519
+ min_block_size=5,
520
+ )
521
+
522
+ # Log a warning if two sampling intervals are present within a given day
523
+ if len(dict_ds) > 1:
524
+ occuring_sampling_intervals = list(dict_ds)
525
+ msg = f"The dataset contains both sampling intervals {occuring_sampling_intervals}."
526
+ log_warning(logger=logger, msg=msg, verbose=verbose)
527
+
528
+ # ---------------------------------------------------------------------------------------.
529
+ # Finalize L0C datasets
530
+ # - Add sample_interval coordinate
531
+ # - Regularize timesteps for trailing seconds
532
+ dict_ds = {
533
+ sample_interval: finalize_l0c_dataset(
534
+ ds=ds,
535
+ sample_interval=sample_interval,
536
+ start_day=start_day,
537
+ end_day=end_day,
538
+ verbose=verbose,
539
+ logger=logger,
540
+ )
541
+ for sample_interval, ds in dict_ds.items()
542
+ }
543
+ return dict_ds
544
+
545
+
546
+ # ---------------------------------------------------------------------------------------.
547
+ #### DEPRECATED CODE
548
+
549
+
550
+ # def copy_l0b_to_l0c_directory(filepath):
551
+ # """Copy L0B file to L0C directory."""
552
+ # import netCDF4
553
+
554
+ # # Copy file
555
+ # l0c_filepath = filepath.replace("L0B", "L0C")
556
+ # _ = shutil.copy(filepath, l0c_filepath)
557
+
558
+ # # Edit DISDRODB product attribute
559
+ # with netCDF4.Dataset(l0c_filepath, mode="a") as nc_file:
560
+ # # Modify the global attribute
561
+ # nc_file.setncattr("disdrodb_product", "L0C")
562
+
563
+ # def find_isel_common_time(da1, da2):
564
+ # """
565
+ # Find the indices of common time steps between two data arrays.
566
+
567
+ # Parameters
568
+ # ----------
569
+ # da1 : xarray.DataArray
570
+ # The first data array with a time coordinate.
571
+ # da2 : xarray.DataArray
572
+ # The second data array with a time coordinate.
573
+
574
+ # Returns
575
+ # -------
576
+ # da1_isel : numpy.ndarray
577
+ # Indices of the common time steps in the first data array.
578
+ # da2_isel : numpy.ndarray
579
+ # Indices of the common time steps in the second data array.
580
+
581
+ # Notes
582
+ # -----
583
+ # This function assumes that both input data arrays have a "time" coordinate.
584
+ # The function finds the intersection of the time steps in both data arrays
585
+ # and returns the indices of these common time steps for each data array.
586
+ # """
587
+ # intersecting_timesteps = np.intersect1d(da1["time"], da2["time"])
588
+ # da1_isel = np.where(np.isin(da1["time"], intersecting_timesteps))[0]
589
+ # da2_isel = np.where(np.isin(da2["time"], intersecting_timesteps))[0]
590
+ # return da1_isel, da2_isel
591
+
592
+
593
+ # def check_same_raw_drop_number_values(list_ds, filepaths):
594
+ # """
595
+ # Check if the 'raw_drop_number' values are the same across multiple datasets.
596
+
597
+ # This function compares the 'raw_drop_number' values of multiple datasets to ensure they are identical
598
+ # at common timesteps.
599
+
600
+ # If any discrepancies are found, a ValueError is raised indicating which files
601
+ # have differing values.
602
+
603
+ # Parameters
604
+ # ----------
605
+ # list_ds : list of xarray.Dataset
606
+ # A list of xarray Datasets to be compared.
607
+ # filepaths : list of str
608
+ # A list of file paths corresponding to the datasets in `list_ds`.
609
+
610
+ # Raises
611
+ # ------
612
+ # ValueError
613
+ # If 'raw_drop_number' values differ at any common timestep between any two datasets.
614
+ # """
615
+ # # Retrieve variable to compare
616
+ # list_drop_number = [ds["raw_drop_number"].compute() for ds in list_ds]
617
+ # # Compare values
618
+ # combos = list(itertools.combinations(range(len(list_drop_number)), 2))
619
+ # for i, j in combos:
620
+ # da1 = list_drop_number[i]
621
+ # da2 = list_drop_number[j]
622
+ # da1_isel, da2_isel = find_isel_common_time(da1=da1, da2=da2)
623
+ # if not np.all(da1.isel(time=da1_isel).data == da2.isel(time=da2_isel).data):
624
+ # file1 = filepaths[i]
625
+ # file2 = filepaths[i]
626
+ # msg = f"Duplicated timesteps have different values between file {file1} and {file2}"
627
+ # raise ValueError(msg)