disdrodb 0.0.21__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. disdrodb/__init__.py +132 -15
  2. disdrodb/_config.py +4 -2
  3. disdrodb/_version.py +9 -4
  4. disdrodb/api/checks.py +264 -237
  5. disdrodb/api/configs.py +4 -8
  6. disdrodb/api/create_directories.py +235 -290
  7. disdrodb/api/info.py +217 -26
  8. disdrodb/api/io.py +295 -269
  9. disdrodb/api/path.py +597 -173
  10. disdrodb/api/search.py +486 -0
  11. disdrodb/{metadata/scripts → cli}/disdrodb_check_metadata_archive.py +12 -7
  12. disdrodb/{utils/pandas.py → cli/disdrodb_data_archive_directory.py} +9 -18
  13. disdrodb/cli/disdrodb_download_archive.py +86 -0
  14. disdrodb/cli/disdrodb_download_metadata_archive.py +53 -0
  15. disdrodb/cli/disdrodb_download_station.py +84 -0
  16. disdrodb/{api/scripts → cli}/disdrodb_initialize_station.py +22 -10
  17. disdrodb/cli/disdrodb_metadata_archive_directory.py +32 -0
  18. disdrodb/{data_transfer/scripts/disdrodb_download_station.py → cli/disdrodb_open_data_archive.py} +22 -22
  19. disdrodb/cli/disdrodb_open_logs_directory.py +69 -0
  20. disdrodb/{data_transfer/scripts/disdrodb_upload_station.py → cli/disdrodb_open_metadata_archive.py} +22 -24
  21. disdrodb/cli/disdrodb_open_metadata_directory.py +71 -0
  22. disdrodb/cli/disdrodb_open_product_directory.py +74 -0
  23. disdrodb/cli/disdrodb_open_readers_directory.py +32 -0
  24. disdrodb/{l0/scripts → cli}/disdrodb_run_l0.py +38 -31
  25. disdrodb/{l0/scripts → cli}/disdrodb_run_l0_station.py +32 -30
  26. disdrodb/{l0/scripts → cli}/disdrodb_run_l0a.py +30 -21
  27. disdrodb/{l0/scripts → cli}/disdrodb_run_l0a_station.py +24 -33
  28. disdrodb/{l0/scripts → cli}/disdrodb_run_l0b.py +30 -21
  29. disdrodb/{l0/scripts → cli}/disdrodb_run_l0b_station.py +25 -34
  30. disdrodb/cli/disdrodb_run_l0c.py +130 -0
  31. disdrodb/cli/disdrodb_run_l0c_station.py +129 -0
  32. disdrodb/cli/disdrodb_run_l1.py +122 -0
  33. disdrodb/cli/disdrodb_run_l1_station.py +121 -0
  34. disdrodb/cli/disdrodb_run_l2e.py +122 -0
  35. disdrodb/cli/disdrodb_run_l2e_station.py +122 -0
  36. disdrodb/cli/disdrodb_run_l2m.py +122 -0
  37. disdrodb/cli/disdrodb_run_l2m_station.py +122 -0
  38. disdrodb/cli/disdrodb_upload_archive.py +105 -0
  39. disdrodb/cli/disdrodb_upload_station.py +98 -0
  40. disdrodb/configs.py +90 -25
  41. disdrodb/data_transfer/__init__.py +22 -0
  42. disdrodb/data_transfer/download_data.py +87 -90
  43. disdrodb/data_transfer/upload_data.py +64 -37
  44. disdrodb/data_transfer/zenodo.py +15 -18
  45. disdrodb/docs.py +1 -1
  46. disdrodb/issue/__init__.py +17 -4
  47. disdrodb/issue/checks.py +10 -23
  48. disdrodb/issue/reader.py +9 -12
  49. disdrodb/issue/writer.py +14 -17
  50. disdrodb/l0/__init__.py +17 -26
  51. disdrodb/l0/check_configs.py +35 -23
  52. disdrodb/l0/check_standards.py +32 -42
  53. disdrodb/l0/configs/{Thies_LPM → LPM}/bins_diameter.yml +44 -44
  54. disdrodb/l0/configs/{Thies_LPM → LPM}/bins_velocity.yml +40 -40
  55. disdrodb/l0/configs/LPM/l0a_encodings.yml +80 -0
  56. disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_cf_attrs.yml +62 -59
  57. disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_encodings.yml +9 -9
  58. disdrodb/l0/configs/{Thies_LPM → LPM}/raw_data_format.yml +245 -245
  59. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_diameter.yml +66 -66
  60. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_velocity.yml +64 -64
  61. disdrodb/l0/configs/PARSIVEL/l0a_encodings.yml +32 -0
  62. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_cf_attrs.yml +22 -20
  63. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_encodings.yml +17 -17
  64. disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/raw_data_format.yml +77 -77
  65. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_diameter.yml +64 -64
  66. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_velocity.yml +64 -64
  67. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +39 -0
  68. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_cf_attrs.yml +24 -22
  69. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_encodings.yml +20 -20
  70. disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/raw_data_format.yml +98 -98
  71. disdrodb/l0/configs/{RD_80 → RD80}/bins_diameter.yml +40 -40
  72. disdrodb/l0/configs/RD80/l0a_encodings.yml +16 -0
  73. disdrodb/l0/configs/{RD_80 → RD80}/l0b_cf_attrs.yml +3 -3
  74. disdrodb/l0/configs/RD80/l0b_encodings.yml +135 -0
  75. disdrodb/l0/configs/{RD_80 → RD80}/raw_data_format.yml +48 -48
  76. disdrodb/l0/l0_reader.py +216 -340
  77. disdrodb/l0/l0a_processing.py +237 -208
  78. disdrodb/l0/l0b_nc_processing.py +227 -80
  79. disdrodb/l0/l0b_processing.py +93 -173
  80. disdrodb/l0/l0c_processing.py +627 -0
  81. disdrodb/l0/readers/{ARM → LPM/ARM}/ARM_LPM.py +36 -58
  82. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +226 -0
  83. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +185 -0
  84. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +183 -0
  85. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +179 -0
  86. disdrodb/l0/readers/{UK → LPM/UK}/DIVEN.py +14 -35
  87. disdrodb/l0/readers/PARSIVEL/AUSTRALIA/MELBOURNE_2007_PARSIVEL.py +157 -0
  88. disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +113 -0
  89. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/ARCTIC_2021.py +40 -57
  90. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/COMMON_2011.py +37 -54
  91. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/DAVOS_2009_2011.py +34 -51
  92. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_2009.py +34 -51
  93. disdrodb/l0/readers/{EPFL/PARADISO_2014.py → PARSIVEL/EPFL/EPFL_ROOF_2008.py} +38 -50
  94. disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +105 -0
  95. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2011.py +34 -51
  96. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2012.py +33 -51
  97. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GENEPI_2007.py +25 -44
  98. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007.py +25 -44
  99. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007_2.py +25 -44
  100. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HPICONET_2010.py +34 -51
  101. disdrodb/l0/readers/{EPFL/EPFL_ROOF_2010.py → PARSIVEL/EPFL/HYMEX_LTE_SOP2.py} +37 -50
  102. disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +111 -0
  103. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HYMEX_LTE_SOP4.py +36 -54
  104. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2018.py +34 -52
  105. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2019.py +38 -56
  106. disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +105 -0
  107. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PARSIVEL_2007.py +27 -45
  108. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PLATO_2019.py +24 -44
  109. disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +140 -0
  110. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RACLETS_2019_WJF.py +41 -59
  111. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RIETHOLZBACH_2011.py +34 -51
  112. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +117 -0
  113. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +137 -0
  114. disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/UNIL_2022.py +42 -55
  115. disdrodb/l0/readers/PARSIVEL/GPM/IFLOODS.py +104 -0
  116. disdrodb/l0/readers/{GPM → PARSIVEL/GPM}/LPVEX.py +29 -48
  117. disdrodb/l0/readers/PARSIVEL/GPM/MC3E.py +184 -0
  118. disdrodb/l0/readers/PARSIVEL/NCAR/CCOPE_2015.py +113 -0
  119. disdrodb/l0/readers/{NCAR/VORTEX_SE_2016_P1.py → PARSIVEL/NCAR/OWLES_MIPS.py} +46 -72
  120. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +125 -0
  121. disdrodb/l0/readers/{NCAR/OWLES_MIPS.py → PARSIVEL/NCAR/PLOWS_MIPS.py} +45 -64
  122. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +114 -0
  123. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +176 -0
  124. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +183 -0
  125. disdrodb/l0/readers/{ARM/ARM_LD.py → PARSIVEL2/ARM/ARM_PARSIVEL2.py} +27 -50
  126. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +163 -0
  127. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +163 -0
  128. disdrodb/l0/readers/{DENMARK → PARSIVEL2/DENMARK}/EROSION_nc.py +14 -35
  129. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +119 -0
  130. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +104 -0
  131. disdrodb/l0/readers/PARSIVEL2/GPM/NSSTC.py +176 -0
  132. disdrodb/l0/readers/PARSIVEL2/ITALY/GID_PARSIVEL2.py +32 -0
  133. disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +56 -0
  134. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +120 -0
  135. disdrodb/l0/readers/{NCAR → PARSIVEL2/NCAR}/PECAN_MIPS.py +45 -64
  136. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +181 -0
  137. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +160 -0
  138. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +160 -0
  139. disdrodb/l0/readers/{NCAR/PLOWS_MIPS.py → PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py} +49 -66
  140. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +118 -0
  141. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +152 -0
  142. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT.py +166 -0
  143. disdrodb/l0/readers/{NCAR/RELAMPAGO_RD80.py → RD80/BRAZIL/CHUVA_RD80.py} +36 -60
  144. disdrodb/l0/readers/{BRAZIL → RD80/BRAZIL}/GOAMAZON_RD80.py +36 -55
  145. disdrodb/l0/readers/{NCAR → RD80/NCAR}/CINDY_2011_RD80.py +35 -54
  146. disdrodb/l0/readers/{BRAZIL/CHUVA_RD80.py → RD80/NCAR/RELAMPAGO_RD80.py} +40 -54
  147. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +62 -0
  148. disdrodb/l0/readers/{reader_template.py → template_reader_raw_text_data.py} +20 -44
  149. disdrodb/l0/routines.py +885 -581
  150. disdrodb/l0/standards.py +72 -236
  151. disdrodb/l0/template_tools.py +104 -109
  152. disdrodb/l1/__init__.py +17 -0
  153. disdrodb/l1/beard_model.py +716 -0
  154. disdrodb/l1/encoding_attrs.py +620 -0
  155. disdrodb/l1/fall_velocity.py +260 -0
  156. disdrodb/l1/filters.py +192 -0
  157. disdrodb/l1/processing.py +200 -0
  158. disdrodb/l1/resampling.py +236 -0
  159. disdrodb/l1/routines.py +357 -0
  160. disdrodb/l1_env/__init__.py +17 -0
  161. disdrodb/l1_env/routines.py +38 -0
  162. disdrodb/l2/__init__.py +17 -0
  163. disdrodb/l2/empirical_dsd.py +1735 -0
  164. disdrodb/l2/event.py +388 -0
  165. disdrodb/l2/processing.py +519 -0
  166. disdrodb/l2/processing_options.py +213 -0
  167. disdrodb/l2/routines.py +868 -0
  168. disdrodb/metadata/__init__.py +9 -2
  169. disdrodb/metadata/checks.py +165 -118
  170. disdrodb/metadata/download.py +81 -0
  171. disdrodb/metadata/geolocation.py +146 -0
  172. disdrodb/metadata/info.py +20 -13
  173. disdrodb/metadata/manipulation.py +1 -1
  174. disdrodb/metadata/reader.py +59 -8
  175. disdrodb/metadata/search.py +77 -144
  176. disdrodb/metadata/standards.py +7 -8
  177. disdrodb/metadata/writer.py +8 -14
  178. disdrodb/psd/__init__.py +38 -0
  179. disdrodb/psd/fitting.py +2146 -0
  180. disdrodb/psd/models.py +774 -0
  181. disdrodb/routines.py +1176 -0
  182. disdrodb/scattering/__init__.py +28 -0
  183. disdrodb/scattering/axis_ratio.py +344 -0
  184. disdrodb/scattering/routines.py +456 -0
  185. disdrodb/utils/__init__.py +17 -0
  186. disdrodb/utils/attrs.py +208 -0
  187. disdrodb/utils/cli.py +269 -0
  188. disdrodb/utils/compression.py +60 -42
  189. disdrodb/utils/dask.py +62 -0
  190. disdrodb/utils/decorators.py +110 -0
  191. disdrodb/utils/directories.py +107 -46
  192. disdrodb/utils/encoding.py +127 -0
  193. disdrodb/utils/list.py +29 -0
  194. disdrodb/utils/logger.py +168 -46
  195. disdrodb/utils/time.py +657 -0
  196. disdrodb/utils/warnings.py +30 -0
  197. disdrodb/utils/writer.py +57 -0
  198. disdrodb/utils/xarray.py +138 -47
  199. disdrodb/utils/yaml.py +0 -1
  200. disdrodb/viz/__init__.py +17 -0
  201. disdrodb/viz/plots.py +17 -0
  202. disdrodb-0.1.0.dist-info/METADATA +321 -0
  203. disdrodb-0.1.0.dist-info/RECORD +216 -0
  204. {disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info}/WHEEL +1 -1
  205. disdrodb-0.1.0.dist-info/entry_points.txt +30 -0
  206. disdrodb/data_transfer/scripts/disdrodb_download_archive.py +0 -53
  207. disdrodb/data_transfer/scripts/disdrodb_upload_archive.py +0 -57
  208. disdrodb/l0/configs/OTT_Parsivel/l0a_encodings.yml +0 -32
  209. disdrodb/l0/configs/OTT_Parsivel2/l0a_encodings.yml +0 -39
  210. disdrodb/l0/configs/RD_80/l0a_encodings.yml +0 -16
  211. disdrodb/l0/configs/RD_80/l0b_encodings.yml +0 -135
  212. disdrodb/l0/configs/Thies_LPM/l0a_encodings.yml +0 -80
  213. disdrodb/l0/io.py +0 -257
  214. disdrodb/l0/l0_processing.py +0 -1091
  215. disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_OTT.py +0 -178
  216. disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_THIES.py +0 -247
  217. disdrodb/l0/readers/BRAZIL/CHUVA_LPM.py +0 -204
  218. disdrodb/l0/readers/BRAZIL/CHUVA_OTT.py +0 -183
  219. disdrodb/l0/readers/BRAZIL/GOAMAZON_LPM.py +0 -204
  220. disdrodb/l0/readers/BRAZIL/GOAMAZON_OTT.py +0 -183
  221. disdrodb/l0/readers/CHINA/CHONGQING.py +0 -131
  222. disdrodb/l0/readers/EPFL/EPFL_ROOF_2008.py +0 -128
  223. disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP2.py +0 -127
  224. disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP3.py +0 -129
  225. disdrodb/l0/readers/EPFL/RACLETS_2019.py +0 -158
  226. disdrodb/l0/readers/EPFL/SAMOYLOV_2017.py +0 -136
  227. disdrodb/l0/readers/EPFL/SAMOYLOV_2019.py +0 -158
  228. disdrodb/l0/readers/FRANCE/SIRTA_OTT2.py +0 -138
  229. disdrodb/l0/readers/GPM/GCPEX.py +0 -123
  230. disdrodb/l0/readers/GPM/IFLOODS.py +0 -123
  231. disdrodb/l0/readers/GPM/MC3E.py +0 -123
  232. disdrodb/l0/readers/GPM/NSSTC.py +0 -164
  233. disdrodb/l0/readers/ITALY/GID.py +0 -199
  234. disdrodb/l0/readers/MEXICO/OH_IIUNAM_nc.py +0 -92
  235. disdrodb/l0/readers/NCAR/CCOPE_2015.py +0 -133
  236. disdrodb/l0/readers/NCAR/PECAN_FP3.py +0 -137
  237. disdrodb/l0/readers/NCAR/PECAN_MOBILE.py +0 -144
  238. disdrodb/l0/readers/NCAR/RELAMPAGO_OTT.py +0 -195
  239. disdrodb/l0/readers/NCAR/SNOWIE_PJ.py +0 -172
  240. disdrodb/l0/readers/NCAR/SNOWIE_SB.py +0 -179
  241. disdrodb/l0/readers/NCAR/VORTEX2_2009.py +0 -133
  242. disdrodb/l0/readers/NCAR/VORTEX2_2010.py +0 -188
  243. disdrodb/l0/readers/NCAR/VORTEX2_2010_UF.py +0 -191
  244. disdrodb/l0/readers/NCAR/VORTEX_SE_2016_P2.py +0 -135
  245. disdrodb/l0/readers/NCAR/VORTEX_SE_2016_PIPS.py +0 -170
  246. disdrodb/l0/readers/NETHERLANDS/DELFT.py +0 -187
  247. disdrodb/l0/readers/SPAIN/SBEGUERIA.py +0 -179
  248. disdrodb/l0/scripts/disdrodb_run_l0b_concat.py +0 -93
  249. disdrodb/l0/scripts/disdrodb_run_l0b_concat_station.py +0 -85
  250. disdrodb/utils/netcdf.py +0 -452
  251. disdrodb/utils/scripts.py +0 -102
  252. disdrodb-0.0.21.dist-info/AUTHORS.md +0 -18
  253. disdrodb-0.0.21.dist-info/METADATA +0 -186
  254. disdrodb-0.0.21.dist-info/RECORD +0 -168
  255. disdrodb-0.0.21.dist-info/entry_points.txt +0 -15
  256. /disdrodb/l0/configs/{RD_80 → RD80}/bins_velocity.yml +0 -0
  257. /disdrodb/l0/manuals/{Thies_LPM.pdf → LPM.pdf} +0 -0
  258. /disdrodb/l0/manuals/{ODM_470.pdf → ODM470.pdf} +0 -0
  259. /disdrodb/l0/manuals/{OTT_Parsivel.pdf → PARSIVEL.pdf} +0 -0
  260. /disdrodb/l0/manuals/{OTT_Parsivel2.pdf → PARSIVEL2.pdf} +0 -0
  261. /disdrodb/l0/manuals/{PWS_100.pdf → PWS100.pdf} +0 -0
  262. /disdrodb/l0/manuals/{RD_80.pdf → RD80.pdf} +0 -0
  263. {disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info/licenses}/LICENSE +0 -0
  264. {disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info}/top_level.txt +0 -0
disdrodb/utils/time.py ADDED
@@ -0,0 +1,657 @@
1
+ # -----------------------------------------------------------------------------.
2
+ # Copyright (c) 2021-2023 DISDRODB developers
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ # -----------------------------------------------------------------------------.
17
+ """This module contains utilities related to the processing of temporal dataset."""
18
+ import logging
19
+ import numbers
20
+ import re
21
+ from typing import Optional
22
+
23
+ import numpy as np
24
+ import pandas as pd
25
+ import xarray as xr
26
+
27
+ from disdrodb.utils.logger import log_info, log_warning
28
+ from disdrodb.utils.xarray import define_fill_value_dictionary
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ ####------------------------------------------------------------------------------------.
33
+ #### Sampling Interval Acronyms
34
+
35
+
36
+ def seconds_to_acronym(seconds):
37
+ """
38
+ Convert a duration in seconds to a readable string format (e.g., "1H30", "1D2H").
39
+
40
+ Parameters
41
+ ----------
42
+ - seconds (int): The time duration in seconds.
43
+
44
+ Returns
45
+ -------
46
+ - str: The duration as a string in a format like "30S", "1MIN30S", "1H30MIN", or "1D2H".
47
+ """
48
+ timedelta = pd.Timedelta(seconds=seconds)
49
+ components = timedelta.components
50
+
51
+ parts = []
52
+ if components.days > 0:
53
+ parts.append(f"{components.days}D")
54
+ if components.hours > 0:
55
+ parts.append(f"{components.hours}H")
56
+ if components.minutes > 0:
57
+ parts.append(f"{components.minutes}MIN")
58
+ if components.seconds > 0:
59
+ parts.append(f"{components.seconds}S")
60
+ acronym = "".join(parts)
61
+ return acronym
62
+
63
+
64
+ def get_resampling_information(sample_interval_acronym):
65
+ """
66
+ Extract resampling information from the sample interval acronym.
67
+
68
+ Parameters
69
+ ----------
70
+ sample_interval_acronym: str
71
+ A string representing the sample interval: e.g., "1H30MIN", "ROLL1H30MIN".
72
+
73
+ Returns
74
+ -------
75
+ sample_interval_seconds, rolling: tuple
76
+ Sample_interval in seconds and whether rolling is enabled.
77
+ """
78
+ rolling = sample_interval_acronym.startswith("ROLL")
79
+ if rolling:
80
+ sample_interval_acronym = sample_interval_acronym[4:] # Remove "ROLL"
81
+
82
+ # Allowed pattern: one or more occurrences of "<number><unit>"
83
+ # where unit is exactly one of D, H, MIN, or S.
84
+ # Examples: 1H, 30MIN, 2D, 45S, and any concatenation like 1H30MIN.
85
+ pattern = r"^(\d+(?:D|H|MIN|S))+$"
86
+
87
+ # Check if the entire string matches the pattern
88
+ if not re.match(pattern, sample_interval_acronym):
89
+ raise ValueError(
90
+ f"Invalid sample interval acronym '{sample_interval_acronym}'. "
91
+ "Must be composed of one or more <number><unit> groups, where unit is D, H, MIN, or S.",
92
+ )
93
+
94
+ # Regular expression to match duration components and extract all (value, unit) pairs
95
+ pattern = r"(\d+)(D|H|MIN|S)"
96
+ matches = re.findall(pattern, sample_interval_acronym)
97
+
98
+ # Conversion factors for each unit
99
+ unit_to_seconds = {
100
+ "D": 86400, # Seconds in a day
101
+ "H": 3600, # Seconds in an hour
102
+ "MIN": 60, # Seconds in a minute
103
+ "S": 1, # Seconds in a second
104
+ }
105
+
106
+ # Parse matches and calculate total seconds
107
+ sample_interval = 0
108
+ for value, unit in matches:
109
+ value = int(value)
110
+ if unit in unit_to_seconds:
111
+ sample_interval += value * unit_to_seconds[unit]
112
+ return sample_interval, rolling
113
+
114
+
115
+ def acronym_to_seconds(acronym):
116
+ """
117
+ Extract the interval in seconds from the duration acronym.
118
+
119
+ Parameters
120
+ ----------
121
+ acronym: str
122
+ A string representing a duration: e.g., "1H30MIN", "ROLL1H30MIN".
123
+
124
+ Returns
125
+ -------
126
+ seconds
127
+ Duration in seconds.
128
+ """
129
+ seconds, _ = get_resampling_information(acronym)
130
+ return seconds
131
+
132
+
133
+ ####----------------------------------------------------------------------------.
134
+ #### File start and end time utilities
135
+ def get_dataframe_start_end_time(df: pd.DataFrame, time_column="time"):
136
+ """Retrieves dataframe starting and ending time.
137
+
138
+ Parameters
139
+ ----------
140
+ df : pandas.DataFrame
141
+ Input dataframe
142
+ time_column: str
143
+ Name of the time column.
144
+ The default is "time".
145
+ The column must be of type datetime.
146
+
147
+ Returns
148
+ -------
149
+ (start_time, end_time): tuple
150
+ File start and end time of type pandas.Timestamp.
151
+
152
+ """
153
+ starting_time = pd.to_datetime(df[time_column].iloc[0])
154
+ ending_time = pd.to_datetime(df[time_column].iloc[-1])
155
+ return (starting_time, ending_time)
156
+
157
+
158
+ def get_dataset_start_end_time(ds: xr.Dataset, time_dim="time"):
159
+ """Retrieves dataset starting and ending time.
160
+
161
+ Parameters
162
+ ----------
163
+ ds : xarray.Dataset
164
+ Input dataset
165
+ time_dim: str
166
+ Name of the time dimension.
167
+ The default is "time".
168
+
169
+ Returns
170
+ -------
171
+ (start_time, end_time): tuple
172
+ File start and end time of type pandas.Timestamp.
173
+
174
+ """
175
+ starting_time = pd.to_datetime(ds[time_dim].to_numpy()[0])
176
+ ending_time = pd.to_datetime(ds[time_dim].to_numpy()[-1])
177
+ return (starting_time, ending_time)
178
+
179
+
180
+ def get_file_start_end_time(obj, time="time"):
181
+ """Retrieves object starting and ending time.
182
+
183
+ Parameters
184
+ ----------
185
+ obj : xarray.Dataset or pandas.DataFrame
186
+ Input object with time dimension or column respectively.
187
+ time: str
188
+ Name of the time dimension or column.
189
+ The default is "time".
190
+
191
+ Returns
192
+ -------
193
+ (start_time, end_time): tuple
194
+ File start and end time of type pandas.Timestamp.
195
+
196
+ """
197
+ if isinstance(obj, xr.Dataset):
198
+ return get_dataset_start_end_time(obj, time_dim=time)
199
+ if isinstance(obj, pd.DataFrame):
200
+ return get_dataframe_start_end_time(obj, time_column=time)
201
+ raise TypeError("Expecting a xarray Dataset or a pandas Dataframe object.")
202
+
203
+
204
+ ####------------------------------------------------------------------------------------.
205
+ #### Xarray utilities
206
+
207
+
208
+ def ensure_sorted_by_time(obj, time="time"):
209
+ """Ensure a xarray object or pandas Dataframe is sorted by time."""
210
+ # Check sorted by time and sort if necessary
211
+ is_sorted = np.all(np.diff(obj[time].to_numpy().astype(int)) > 0)
212
+ if not is_sorted:
213
+ if isinstance(obj, pd.DataFrame):
214
+ return obj.sort_values(by="time")
215
+ # Else xarray DataArray or Dataset
216
+ obj = obj.sortby("time")
217
+ return obj
218
+
219
+
220
+ def _check_time_sorted(ds, time_dim):
221
+ """Ensure the xarray.Dataset is sorted."""
222
+ time_diff = np.diff(ds[time_dim].to_numpy().astype(int))
223
+ if np.any(time_diff == 0):
224
+ raise ValueError(f"In the {time_dim} dimension there are duplicated timesteps !")
225
+ if not np.all(time_diff > 0):
226
+ print(f"The {time_dim} dimension was not sorted. Sorting it now !")
227
+ ds = ds.sortby(time_dim)
228
+ return ds
229
+
230
+
231
+ def regularize_dataset(
232
+ xr_obj,
233
+ freq: str,
234
+ time_dim: str = "time",
235
+ method: Optional[str] = None,
236
+ fill_value=None,
237
+ ):
238
+ """Regularize a dataset across time dimension with uniform resolution.
239
+
240
+ Parameters
241
+ ----------
242
+ xr_obj : xarray.Dataset or xr.DataArray
243
+ xarray object with time dimension.
244
+ time_dim : str, optional
245
+ The time dimension in the xarray object. The default value is ``"time"``.
246
+ freq : str
247
+ The ``freq`` string to pass to `pd.date_range()` to define the new time coordinates.
248
+ Examples: ``freq="2min"``.
249
+ method : str, optional
250
+ Method to use for filling missing timesteps.
251
+ If ``None``, fill with ``fill_value``. The default value is ``None``.
252
+ For other possible methods, see xarray.Dataset.reindex()`.
253
+ fill_value : (float, dict), optional
254
+ Fill value to fill missing timesteps.
255
+ If not specified, for float variables it uses ``dtypes.NA`` while for
256
+ for integers variables it uses the maximum allowed integer value or,
257
+ in case of undecoded variables, the ``_FillValue`` DataArray attribute..
258
+
259
+ Returns
260
+ -------
261
+ ds_reindexed : xarray.Dataset
262
+ Regularized dataset.
263
+
264
+ """
265
+ xr_obj = _check_time_sorted(xr_obj, time_dim=time_dim)
266
+ start_time, end_time = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
267
+
268
+ # Define new time index
269
+ new_time_index = pd.date_range(
270
+ start=start_time,
271
+ end=end_time,
272
+ freq=freq,
273
+ )
274
+ # Check all existing timesteps are within the new time index
275
+ # - Otherwise raise error because it means that the desired frequency is not compatible
276
+ idx_missing = np.where(~np.isin(xr_obj[time_dim].data, new_time_index))[0]
277
+ if len(idx_missing) > 0:
278
+ not_included_timesteps = xr_obj[time_dim].data[idx_missing].astype("M8[s]")
279
+ raise ValueError(f"With freq='{freq}', the following timesteps would be dropped: {not_included_timesteps}")
280
+
281
+ # Define fill_value dictionary
282
+ if fill_value is None:
283
+ fill_value = define_fill_value_dictionary(xr_obj)
284
+
285
+ # Regularize dataset and fill with NA values
286
+ xr_obj = xr_obj.reindex(
287
+ {time_dim: new_time_index},
288
+ method=method, # do not fill gaps
289
+ # tolerance=tolerance, # mismatch in seconds
290
+ fill_value=fill_value,
291
+ )
292
+ return xr_obj
293
+
294
+
295
+ ####------------------------------------------
296
+ #### Sampling interval utilities
297
+
298
+
299
+ def ensure_sample_interval_in_seconds(sample_interval): # noqa: PLR0911
300
+ """
301
+ Ensure the sample interval is in seconds.
302
+
303
+ Parameters
304
+ ----------
305
+ sample_interval : int, numpy.ndarray, xarray.DataArray, or numpy.timedelta64
306
+ The sample interval to be converted to seconds.
307
+ It can be:
308
+ - An integer representing the interval in seconds.
309
+ - A numpy array or xarray DataArray of integers representing intervals in seconds.
310
+ - A numpy.timedelta64 object representing the interval.
311
+ - A numpy array or xarray DataArray of numpy.timedelta64 objects representing intervals.
312
+
313
+ Returns
314
+ -------
315
+ int, numpy.ndarray, or xarray.DataArray
316
+ The sample interval converted to seconds. The return type matches the input type:
317
+ - If the input is an integer, the output is an integer.
318
+ - If the input is a numpy array, the output is a numpy array of integers (unless NaN is present)
319
+ - If the input is an xarray DataArray, the output is an xarray DataArray of integers (unless NaN is present).
320
+
321
+ """
322
+ # Deal with timedelta objects
323
+ if isinstance(sample_interval, np.timedelta64):
324
+ return (sample_interval.astype("m8[s]") / np.timedelta64(1, "s")).astype(int)
325
+ # return sample_interval.astype("m8[s]").astype(int)
326
+
327
+ # Deal with scalar pure integer types (Python int or numpy int32/int64/etc.)
328
+ # --> ATTENTION: this also include np.timedelta64 objects !
329
+ if isinstance(sample_interval, numbers.Integral):
330
+ return sample_interval
331
+
332
+ # Deal with numpy or xarray arrays of integer types
333
+ if isinstance(sample_interval, (np.ndarray, xr.DataArray)) and np.issubdtype(sample_interval.dtype, int):
334
+ return sample_interval
335
+
336
+ # Deal with scalar floats that are actually integers (e.g. 1.0, np.float64(3.0))
337
+ if isinstance(sample_interval, numbers.Real):
338
+ if float(sample_interval).is_integer():
339
+ # Cast back to int seconds
340
+ return int(sample_interval)
341
+ raise TypeError(f"sample_interval floats must be whole numbers of seconds, got {sample_interval}")
342
+
343
+ # Deal with timedelta64 numpy arrays
344
+ if isinstance(sample_interval, np.ndarray) and np.issubdtype(sample_interval.dtype, np.timedelta64):
345
+ is_nat = np.isnat(sample_interval)
346
+ if np.any(is_nat):
347
+ sample_interval = sample_interval.astype("timedelta64[s]").astype(float)
348
+ sample_interval[is_nat] = np.nan
349
+ return sample_interval
350
+ return sample_interval.astype("timedelta64[s]").astype(int)
351
+ # Deal with timedelta64 xarray arrays
352
+ if isinstance(sample_interval, xr.DataArray) and np.issubdtype(sample_interval.dtype, np.timedelta64):
353
+ sample_interval = sample_interval.copy()
354
+ is_nat = np.isnat(sample_interval)
355
+ if np.any(is_nat):
356
+ sample_interval_array = sample_interval.data.astype("timedelta64[s]").astype(float)
357
+ sample_interval_array[is_nat] = np.nan
358
+ sample_interval.data = sample_interval_array
359
+ return sample_interval
360
+ sample_interval_array = sample_interval.data.astype("timedelta64[s]").astype(int)
361
+ sample_interval.data = sample_interval_array
362
+ return sample_interval
363
+
364
+ # Deal with numpy array of floats that are all integer-valued (with optionally some NaN)
365
+ if isinstance(sample_interval, np.ndarray) and np.issubdtype(sample_interval.dtype, np.floating):
366
+ mask_nan = np.isnan(sample_interval)
367
+ if mask_nan.any():
368
+ # Check non-NaN entries are whole numbers
369
+ nonnan = sample_interval[~mask_nan]
370
+ if not np.allclose(nonnan, np.rint(nonnan)):
371
+ raise TypeError("Float array sample_interval must contain only whole numbers or NaN.")
372
+ # Leave as float array so NaNs are preserved
373
+ return sample_interval
374
+ # No NaNs: can safely cast to integer dtype
375
+ if not np.allclose(sample_interval, np.rint(sample_interval)):
376
+ raise TypeError("Float array sample_interval must contain only whole numbers.")
377
+ return sample_interval.astype(int)
378
+
379
+ # Deal with xarray.DataArrayy of floats that are all integer-valued (with optionally some NaN)
380
+ if isinstance(sample_interval, xr.DataArray) and np.issubdtype(sample_interval.dtype, np.floating):
381
+ arr = sample_interval.copy()
382
+ data = arr.data
383
+ mask_nan = np.isnan(data)
384
+ if mask_nan.any():
385
+ nonnan = data[~mask_nan]
386
+ if not np.allclose(nonnan, np.rint(nonnan)):
387
+ raise TypeError("Float DataArray sample_interval must contain only whole numbers or NaN.")
388
+ # return as float DataArray so NaNs stay
389
+ return arr
390
+ if not np.allclose(data, np.rint(data)):
391
+ raise TypeError("Float DataArray sample_interval must contain only whole numbers.")
392
+ arr.data = data.astype(int)
393
+ return arr
394
+
395
+ raise TypeError(
396
+ "sample_interval must be an integer value or array, or numpy.ndarray / xarray.DataArray with type timedelta64.",
397
+ )
398
+
399
+
400
+ def infer_sample_interval(ds, robust=False, verbose=False, logger=None):
401
+ """Infer the sample interval of a dataset.
402
+
403
+ Duplicated timesteps are removed before inferring the sample interval.
404
+
405
+ NOTE: This function is used only for the reader preparation.
406
+ """
407
+ # Check sorted by time and sort if necessary
408
+ ds = ensure_sorted_by_time(ds)
409
+
410
+ # Retrieve timesteps
411
+ # - Remove duplicate timesteps
412
+ timesteps = np.unique(ds["time"].data)
413
+
414
+ # Calculate number of timesteps
415
+ n_timesteps = len(timesteps)
416
+
417
+ # Calculate time differences in seconds
418
+ deltadt = np.diff(timesteps).astype("timedelta64[s]").astype(int)
419
+
420
+ # Round each delta to the nearest multiple of 5 (because the smallest possible sample interval is 10 s)
421
+ # Example: for sample_interval = 10, deltat values like 8, 9, 11, 12 become 10 ...
422
+ # Example: for sample_interval = 10, deltat values like 6, 7 or 13, 14 become respectively 5 and 15 ...
423
+ # Example: for sample_interval = 30, deltat values like 28,29,30,31,32 deltat become 30 ...
424
+ # Example: for sample_interval = 30, deltat values like 26, 27 or 33, 34 become respectively 25 and 35 ...
425
+ # --> Need other rounding after having identified the most frequent sample interval to coerce such values to 30
426
+ min_sample_interval = 10
427
+ min_half_sample_interval = min_sample_interval / 2
428
+ deltadt = np.round(deltadt / min_half_sample_interval) * min_half_sample_interval
429
+
430
+ # Identify unique time intervals and their occurrences
431
+ unique_deltas, counts = np.unique(deltadt, return_counts=True)
432
+
433
+ # Determine the most frequent time interval (mode)
434
+ most_frequent_delta_idx = np.argmax(counts)
435
+ sample_interval = unique_deltas[most_frequent_delta_idx]
436
+
437
+ # Reround deltadt once knowing the sample interval
438
+ # - If sample interval is 10: all values between 6 and 14 are rounded to 10, below 6 to 0, above 14 to 20
439
+ # - If sample interval is 30: all values between 16 and 44 are rounded to 30, below 16 to 0, above 44 to 20
440
+ deltadt = np.round(deltadt / min_sample_interval) * min_sample_interval
441
+
442
+ # Identify unique time intervals and their occurrences
443
+ unique_deltas, counts = np.unique(deltadt, return_counts=True)
444
+ fractions = np.round(counts / len(deltadt) * 100, 2)
445
+
446
+ # Determine the most frequent time interval (mode)
447
+ most_frequent_delta_idx = np.argmax(counts)
448
+ sample_interval = unique_deltas[most_frequent_delta_idx]
449
+ sample_interval_fraction = fractions[most_frequent_delta_idx]
450
+
451
+ # Inform about irregular sampling
452
+ unexpected_intervals = unique_deltas[unique_deltas != sample_interval]
453
+ unexpected_intervals_counts = counts[unique_deltas != sample_interval]
454
+ unexpected_intervals_fractions = fractions[unique_deltas != sample_interval]
455
+ if verbose and len(unexpected_intervals) > 0:
456
+ msg = "Non-unique interval detected."
457
+ log_info(logger=logger, msg=msg, verbose=verbose)
458
+ for interval, count, fraction in zip(
459
+ unexpected_intervals,
460
+ unexpected_intervals_counts,
461
+ unexpected_intervals_fractions,
462
+ ):
463
+ msg = f"--> Interval: {interval} seconds, Occurrence: {count}, Frequency: {fraction} %"
464
+ log_info(logger=logger, msg=msg, verbose=verbose)
465
+
466
+ # Perform checks
467
+ # - Raise error if negative or zero time intervals are presents
468
+ # - If robust = False, still return the estimated sample_interval
469
+ if robust and np.any(deltadt == 0):
470
+ raise ValueError("Likely presence of duplicated timesteps.")
471
+
472
+ if robust and len(unexpected_intervals) > 0:
473
+ raise ValueError("Not unique sampling interval.")
474
+
475
+ ###-------------------------------------------------------------------------.
476
+ ### Display informative messages
477
+ # - Log a warning if estimated sample interval has frequency less than 60 %
478
+ sample_interval_fraction_threshold = 60
479
+ msg = (
480
+ f"The most frequent sampling interval ({sample_interval} s) "
481
+ + f"has a frequency lower than {sample_interval_fraction_threshold}%: {sample_interval_fraction} %. "
482
+ + f"(Total number of timesteps: {n_timesteps})"
483
+ )
484
+ if sample_interval_fraction < sample_interval_fraction_threshold:
485
+ log_warning(logger=logger, msg=msg, verbose=verbose)
486
+
487
+ # - Log a warning if an unexpected interval has frequency larger than 20 percent
488
+ frequent_unexpected_intervals = unexpected_intervals[unexpected_intervals_fractions > 20]
489
+ if len(frequent_unexpected_intervals) != 0:
490
+ frequent_unexpected_intervals_str = ", ".join(
491
+ f"{interval} seconds" for interval in frequent_unexpected_intervals
492
+ )
493
+ msg = (
494
+ "The following unexpected intervals have a frequency "
495
+ + f"greater than 20%: {frequent_unexpected_intervals_str}. "
496
+ + f"(Total number of timesteps: {n_timesteps})"
497
+ )
498
+ log_warning(logger=logger, msg=msg, verbose=verbose)
499
+ return int(sample_interval)
500
+
501
+
502
+ ####---------------------------------------------------------------------------------
503
+ #### Timesteps regularization
504
+
505
+
506
+ def get_problematic_timestep_indices(timesteps, sample_interval):
507
+ """Identify timesteps with missing previous or following timesteps."""
508
+ previous_time = timesteps - pd.Timedelta(seconds=sample_interval)
509
+ next_time = timesteps + pd.Timedelta(seconds=sample_interval)
510
+ idx_previous_missing = np.where(~np.isin(previous_time, timesteps))[0][1:]
511
+ idx_next_missing = np.where(~np.isin(next_time, timesteps))[0][:-1]
512
+ idx_isolated_missing = np.intersect1d(idx_previous_missing, idx_next_missing)
513
+ idx_previous_missing = idx_previous_missing[np.isin(idx_previous_missing, idx_isolated_missing, invert=True)]
514
+ idx_next_missing = idx_next_missing[np.isin(idx_next_missing, idx_isolated_missing, invert=True)]
515
+ return idx_previous_missing, idx_next_missing, idx_isolated_missing
516
+
517
+
518
+ def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=True, logger=None, verbose=True):
519
+ """Ensure timesteps match with the sample_interval.
520
+
521
+ This function:
522
+ - drop dataset indices with duplicated timesteps,
523
+ - but does not add missing timesteps to the dataset.
524
+ """
525
+ # Check sorted by time and sort if necessary
526
+ ds = ensure_sorted_by_time(ds)
527
+
528
+ # Convert time to pandas.DatetimeIndex for easier manipulation
529
+ times = pd.to_datetime(ds["time"].to_numpy())
530
+
531
+ # Determine the start and end times
532
+ start_time = times[0].floor(f"{sample_interval}s")
533
+ end_time = times[-1].ceil(f"{sample_interval}s")
534
+
535
+ # Create the expected time grid
536
+ expected_times = pd.date_range(start=start_time, end=end_time, freq=f"{sample_interval}s")
537
+
538
+ # Convert to numpy arrays
539
+ times = times.to_numpy(dtype="M8[s]")
540
+ expected_times = expected_times.to_numpy(dtype="M8[s]")
541
+
542
+ # Map original times to the nearest expected times
543
+ # Calculate the difference between original times and expected times
544
+ time_deltas = np.abs(times - expected_times[:, None]).astype(int)
545
+
546
+ # Find the index of the closest expected time for each original time
547
+ nearest_indices = np.argmin(time_deltas, axis=0)
548
+ adjusted_times = expected_times[nearest_indices]
549
+
550
+ # Check for duplicates in adjusted times
551
+ unique_times, counts = np.unique(adjusted_times, return_counts=True)
552
+ duplicates = unique_times[counts > 1]
553
+
554
+ # Initialize time quality flag
555
+ # - 0 when ok or just rounded to closest 00
556
+ # - 1 if previous timestep is missing
557
+ # - 2 if next timestep is missing
558
+ # - 3 if previous and next timestep is missing
559
+ # - 4 if solved duplicated timesteps
560
+ # - 5 if needed to drop duplicated timesteps and select the last
561
+ flag_previous_missing = 1
562
+ flag_next_missing = 2
563
+ flag_isolated_timestep = 3
564
+ flag_solved_duplicated_timestep = 4
565
+ flag_dropped_duplicated_timestep = 5
566
+ qc_flag = np.zeros(adjusted_times.shape)
567
+
568
+ # Initialize list with the duplicated timesteps index to drop
569
+ # - We drop the first occurrence because is likely the shortest interval
570
+ idx_to_drop = []
571
+
572
+ # Attempt to resolve for duplicates
573
+ if duplicates.size > 0:
574
+ # Handle duplicates
575
+ for dup_time in duplicates:
576
+ # Indices of duplicates
577
+ dup_indices = np.where(adjusted_times == dup_time)[0]
578
+ n_duplicates = len(dup_indices)
579
+ # Define previous and following timestep
580
+ prev_time = dup_time - pd.Timedelta(seconds=sample_interval)
581
+ next_time = dup_time + pd.Timedelta(seconds=sample_interval)
582
+ # Try to find missing slots before and after
583
+ # - If more than 3 duplicates, impossible to solve !
584
+ count_solved = 0
585
+ # If the previous timestep is available, set that one
586
+ if n_duplicates == 2:
587
+ if prev_time not in adjusted_times:
588
+ adjusted_times[dup_indices[0]] = prev_time
589
+ qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
590
+ count_solved += 1
591
+ elif next_time not in adjusted_times:
592
+ adjusted_times[dup_indices[-1]] = next_time
593
+ qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
594
+ count_solved += 1
595
+ else:
596
+ pass
597
+ elif n_duplicates == 3:
598
+ if prev_time not in adjusted_times:
599
+ adjusted_times[dup_indices[0]] = prev_time
600
+ qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
601
+ count_solved += 1
602
+ if next_time not in adjusted_times:
603
+ adjusted_times[dup_indices[-1]] = next_time
604
+ qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
605
+ count_solved += 1
606
+ if count_solved != n_duplicates - 1:
607
+ idx_to_drop = np.append(idx_to_drop, dup_indices[0:-1])
608
+ qc_flag[dup_indices[-1]] = flag_dropped_duplicated_timestep
609
+ msg = (
610
+ f"Cannot resolve {n_duplicates} duplicated timesteps "
611
+ f"(after trailing seconds correction) around {dup_time}."
612
+ )
613
+ log_warning(logger=logger, msg=msg, verbose=verbose)
614
+ if robust:
615
+ raise ValueError(msg)
616
+
617
+ # Update the time coordinate (Convert to ns for xarray compatibility)
618
+ ds = ds.assign_coords({"time": adjusted_times.astype("datetime64[ns]")})
619
+
620
+ # Update quality flag values for next and previous timestep is missing
621
+ if add_quality_flag:
622
+ idx_previous_missing, idx_next_missing, idx_isolated_missing = get_problematic_timestep_indices(
623
+ adjusted_times,
624
+ sample_interval,
625
+ )
626
+ qc_flag[idx_previous_missing] = np.maximum(qc_flag[idx_previous_missing], flag_previous_missing)
627
+ qc_flag[idx_next_missing] = np.maximum(qc_flag[idx_next_missing], flag_next_missing)
628
+ qc_flag[idx_isolated_missing] = np.maximum(qc_flag[idx_isolated_missing], flag_isolated_timestep)
629
+
630
+ # If the first timestep is at 00:00 and currently flagged as previous missing (1), reset to 0
631
+ # first_time = pd.to_datetime(adjusted_times[0]).time()
632
+ # first_expected_time = pd.Timestamp("00:00:00").time()
633
+ # if first_time == first_expected_time and qc_flag[0] == flag_previous_missing:
634
+ # qc_flag[0] = 0
635
+
636
+ # # If the last timestep is flagged and currently flagged as next missing (2), reset it to 0
637
+ # last_time = pd.to_datetime(adjusted_times[-1]).time()
638
+ # last_time_expected = (pd.Timestamp("00:00:00") - pd.Timedelta(30, unit="seconds")).time()
639
+ # # Check if adding one interval would go beyond the end_time
640
+ # if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
641
+ # qc_flag[-1] = 0
642
+
643
+ # Assign time quality flag coordinate
644
+ ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
645
+ ds = ds.set_coords("time_qc")
646
+
647
+ # Drop duplicated timesteps
648
+ # - Using ds = ds.drop_isel({"time": idx_to_drop.astype(int)}) raise:
649
+ # --> pandas.errors.InvalidIndexError: Reindexing only valid with uniquely valued Index objects
650
+ # --> https://github.com/pydata/xarray/issues/6605
651
+ if len(idx_to_drop) > 0:
652
+ idx_to_drop = idx_to_drop.astype(int)
653
+ idx_valid_timesteps = np.arange(0, ds["time"].size)
654
+ idx_valid_timesteps = np.delete(idx_valid_timesteps, idx_to_drop)
655
+ ds = ds.isel(time=idx_valid_timesteps)
656
+ # Return dataset
657
+ return ds
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """Warning utilities."""
20
+ import warnings
21
+ from contextlib import contextmanager
22
+
23
+
24
+ @contextmanager
25
+ def suppress_warnings():
26
+ """Context manager suppressing RuntimeWarnings and UserWarnings."""
27
+ with warnings.catch_warnings():
28
+ warnings.simplefilter("ignore", RuntimeWarning)
29
+ warnings.simplefilter("ignore", UserWarning)
30
+ yield