disdrodb 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +132 -15
- disdrodb/_config.py +4 -2
- disdrodb/_version.py +9 -4
- disdrodb/api/checks.py +264 -237
- disdrodb/api/configs.py +4 -8
- disdrodb/api/create_directories.py +235 -290
- disdrodb/api/info.py +217 -26
- disdrodb/api/io.py +306 -270
- disdrodb/api/path.py +597 -173
- disdrodb/api/search.py +486 -0
- disdrodb/{metadata/scripts → cli}/disdrodb_check_metadata_archive.py +12 -7
- disdrodb/{utils/pandas.py → cli/disdrodb_data_archive_directory.py} +9 -18
- disdrodb/cli/disdrodb_download_archive.py +86 -0
- disdrodb/cli/disdrodb_download_metadata_archive.py +53 -0
- disdrodb/cli/disdrodb_download_station.py +84 -0
- disdrodb/{api/scripts → cli}/disdrodb_initialize_station.py +22 -10
- disdrodb/cli/disdrodb_metadata_archive_directory.py +32 -0
- disdrodb/{data_transfer/scripts/disdrodb_download_station.py → cli/disdrodb_open_data_archive.py} +22 -22
- disdrodb/cli/disdrodb_open_logs_directory.py +69 -0
- disdrodb/{data_transfer/scripts/disdrodb_upload_station.py → cli/disdrodb_open_metadata_archive.py} +22 -24
- disdrodb/cli/disdrodb_open_metadata_directory.py +71 -0
- disdrodb/cli/disdrodb_open_product_directory.py +74 -0
- disdrodb/cli/disdrodb_open_readers_directory.py +32 -0
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0.py +38 -31
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0_station.py +32 -30
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0a.py +30 -21
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0a_station.py +24 -33
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0b.py +30 -21
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0b_station.py +25 -34
- disdrodb/cli/disdrodb_run_l0c.py +130 -0
- disdrodb/cli/disdrodb_run_l0c_station.py +129 -0
- disdrodb/cli/disdrodb_run_l1.py +122 -0
- disdrodb/cli/disdrodb_run_l1_station.py +121 -0
- disdrodb/cli/disdrodb_run_l2e.py +122 -0
- disdrodb/cli/disdrodb_run_l2e_station.py +122 -0
- disdrodb/cli/disdrodb_run_l2m.py +122 -0
- disdrodb/cli/disdrodb_run_l2m_station.py +122 -0
- disdrodb/cli/disdrodb_upload_archive.py +105 -0
- disdrodb/cli/disdrodb_upload_station.py +98 -0
- disdrodb/configs.py +90 -25
- disdrodb/data_transfer/__init__.py +22 -0
- disdrodb/data_transfer/download_data.py +87 -90
- disdrodb/data_transfer/upload_data.py +64 -37
- disdrodb/data_transfer/zenodo.py +15 -18
- disdrodb/docs.py +1 -1
- disdrodb/issue/__init__.py +17 -4
- disdrodb/issue/checks.py +10 -23
- disdrodb/issue/reader.py +9 -12
- disdrodb/issue/writer.py +14 -17
- disdrodb/l0/__init__.py +17 -26
- disdrodb/l0/check_configs.py +35 -23
- disdrodb/l0/check_standards.py +46 -51
- disdrodb/l0/configs/{Thies_LPM → LPM}/bins_diameter.yml +44 -44
- disdrodb/l0/configs/{Thies_LPM → LPM}/bins_velocity.yml +40 -40
- disdrodb/l0/configs/LPM/l0a_encodings.yml +80 -0
- disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_cf_attrs.yml +84 -65
- disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_encodings.yml +50 -9
- disdrodb/l0/configs/{Thies_LPM → LPM}/raw_data_format.yml +285 -245
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_diameter.yml +66 -66
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_velocity.yml +64 -64
- disdrodb/l0/configs/PARSIVEL/l0a_encodings.yml +32 -0
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_cf_attrs.yml +23 -21
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_encodings.yml +17 -17
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/raw_data_format.yml +77 -77
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_diameter.yml +64 -64
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_velocity.yml +64 -64
- disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +39 -0
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_cf_attrs.yml +28 -26
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_encodings.yml +20 -20
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/raw_data_format.yml +107 -107
- disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
- disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
- disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
- disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
- disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
- disdrodb/l0/configs/{RD_80 → RD80}/bins_diameter.yml +40 -40
- disdrodb/l0/configs/RD80/l0a_encodings.yml +16 -0
- disdrodb/l0/configs/{RD_80 → RD80}/l0b_cf_attrs.yml +3 -3
- disdrodb/l0/configs/RD80/l0b_encodings.yml +135 -0
- disdrodb/l0/configs/{RD_80 → RD80}/raw_data_format.yml +46 -50
- disdrodb/l0/l0_reader.py +216 -340
- disdrodb/l0/l0a_processing.py +237 -208
- disdrodb/l0/l0b_nc_processing.py +227 -80
- disdrodb/l0/l0b_processing.py +96 -174
- disdrodb/l0/l0c_processing.py +627 -0
- disdrodb/l0/readers/{ARM → LPM/ARM}/ARM_LPM.py +36 -58
- disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +236 -0
- disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +185 -0
- disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +185 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +195 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
- disdrodb/l0/readers/{BRAZIL/GOAMAZON_LPM.py → LPM/KIT/CHWALA.py} +97 -76
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
- disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
- disdrodb/l0/readers/{UK → LPM/UK}/DIVEN.py +14 -35
- disdrodb/l0/readers/PARSIVEL/AUSTRALIA/MELBOURNE_2007_PARSIVEL.py +157 -0
- disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +113 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/ARCTIC_2021.py +40 -57
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/COMMON_2011.py +37 -54
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/DAVOS_2009_2011.py +34 -51
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_2009.py +34 -51
- disdrodb/l0/readers/{EPFL/PARADISO_2014.py → PARSIVEL/EPFL/EPFL_ROOF_2008.py} +38 -50
- disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +105 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2011.py +34 -51
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2012.py +33 -51
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GENEPI_2007.py +25 -44
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007.py +25 -44
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007_2.py +25 -44
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HPICONET_2010.py +34 -51
- disdrodb/l0/readers/{EPFL/EPFL_ROOF_2010.py → PARSIVEL/EPFL/HYMEX_LTE_SOP2.py} +37 -50
- disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +111 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HYMEX_LTE_SOP4.py +36 -54
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2018.py +34 -52
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2019.py +38 -56
- disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +105 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PARSIVEL_2007.py +27 -45
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PLATO_2019.py +24 -44
- disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +140 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RACLETS_2019_WJF.py +41 -59
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RIETHOLZBACH_2011.py +34 -51
- disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +117 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +137 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/UNIL_2022.py +42 -55
- disdrodb/l0/readers/PARSIVEL/GPM/IFLOODS.py +104 -0
- disdrodb/l0/readers/{GPM → PARSIVEL/GPM}/LPVEX.py +29 -48
- disdrodb/l0/readers/PARSIVEL/GPM/MC3E.py +184 -0
- disdrodb/l0/readers/PARSIVEL/KIT/BURKINA_FASO.py +133 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/CCOPE_2015.py +113 -0
- disdrodb/l0/readers/{NCAR/VORTEX_SE_2016_P1.py → PARSIVEL/NCAR/OWLES_MIPS.py} +46 -72
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +125 -0
- disdrodb/l0/readers/{NCAR/OWLES_MIPS.py → PARSIVEL/NCAR/PLOWS_MIPS.py} +45 -64
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +114 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +176 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +183 -0
- disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
- disdrodb/l0/readers/{ARM/ARM_LD.py → PARSIVEL2/ARM/ARM_PARSIVEL2.py} +27 -50
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +163 -0
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +163 -0
- disdrodb/l0/readers/{DENMARK → PARSIVEL2/DENMARK}/EROSION_nc.py +14 -35
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +119 -0
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +104 -0
- disdrodb/l0/readers/PARSIVEL2/GPM/NSSTC.py +176 -0
- disdrodb/l0/readers/PARSIVEL2/ITALY/GID_PARSIVEL2.py +32 -0
- disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +56 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +120 -0
- disdrodb/l0/readers/{NCAR → PARSIVEL2/NCAR}/PECAN_MIPS.py +45 -64
- disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +181 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +160 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +160 -0
- disdrodb/l0/readers/{NCAR/PLOWS_MIPS.py → PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py} +49 -66
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +118 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +152 -0
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT.py +166 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
- disdrodb/l0/readers/{NCAR/RELAMPAGO_RD80.py → RD80/BRAZIL/CHUVA_RD80.py} +36 -60
- disdrodb/l0/readers/{BRAZIL → RD80/BRAZIL}/GOAMAZON_RD80.py +36 -55
- disdrodb/l0/readers/{NCAR → RD80/NCAR}/CINDY_2011_RD80.py +35 -54
- disdrodb/l0/readers/{BRAZIL/CHUVA_RD80.py → RD80/NCAR/RELAMPAGO_RD80.py} +40 -54
- disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +274 -0
- disdrodb/l0/readers/template_reader_raw_netcdf_data.py +62 -0
- disdrodb/l0/readers/{reader_template.py → template_reader_raw_text_data.py} +20 -44
- disdrodb/l0/routines.py +885 -581
- disdrodb/l0/standards.py +77 -238
- disdrodb/l0/template_tools.py +105 -110
- disdrodb/l1/__init__.py +17 -0
- disdrodb/l1/beard_model.py +716 -0
- disdrodb/l1/encoding_attrs.py +635 -0
- disdrodb/l1/fall_velocity.py +260 -0
- disdrodb/l1/filters.py +192 -0
- disdrodb/l1/processing.py +202 -0
- disdrodb/l1/resampling.py +236 -0
- disdrodb/l1/routines.py +358 -0
- disdrodb/l1_env/__init__.py +17 -0
- disdrodb/l1_env/routines.py +38 -0
- disdrodb/l2/__init__.py +17 -0
- disdrodb/l2/empirical_dsd.py +1833 -0
- disdrodb/l2/event.py +388 -0
- disdrodb/l2/processing.py +528 -0
- disdrodb/l2/processing_options.py +213 -0
- disdrodb/l2/routines.py +868 -0
- disdrodb/metadata/__init__.py +9 -2
- disdrodb/metadata/checks.py +180 -124
- disdrodb/metadata/download.py +81 -0
- disdrodb/metadata/geolocation.py +146 -0
- disdrodb/metadata/info.py +20 -13
- disdrodb/metadata/manipulation.py +3 -3
- disdrodb/metadata/reader.py +59 -8
- disdrodb/metadata/search.py +77 -144
- disdrodb/metadata/standards.py +83 -80
- disdrodb/metadata/writer.py +10 -16
- disdrodb/psd/__init__.py +38 -0
- disdrodb/psd/fitting.py +2146 -0
- disdrodb/psd/models.py +774 -0
- disdrodb/routines.py +1412 -0
- disdrodb/scattering/__init__.py +28 -0
- disdrodb/scattering/axis_ratio.py +344 -0
- disdrodb/scattering/routines.py +456 -0
- disdrodb/utils/__init__.py +17 -0
- disdrodb/utils/attrs.py +208 -0
- disdrodb/utils/cli.py +269 -0
- disdrodb/utils/compression.py +60 -42
- disdrodb/utils/dask.py +62 -0
- disdrodb/utils/dataframe.py +342 -0
- disdrodb/utils/decorators.py +110 -0
- disdrodb/utils/directories.py +107 -46
- disdrodb/utils/encoding.py +127 -0
- disdrodb/utils/list.py +29 -0
- disdrodb/utils/logger.py +168 -46
- disdrodb/utils/time.py +657 -0
- disdrodb/utils/warnings.py +30 -0
- disdrodb/utils/writer.py +57 -0
- disdrodb/utils/xarray.py +138 -47
- disdrodb/utils/yaml.py +0 -1
- disdrodb/viz/__init__.py +17 -0
- disdrodb/viz/plots.py +17 -0
- disdrodb-0.1.1.dist-info/METADATA +294 -0
- disdrodb-0.1.1.dist-info/RECORD +232 -0
- {disdrodb-0.0.21.dist-info → disdrodb-0.1.1.dist-info}/WHEEL +1 -1
- disdrodb-0.1.1.dist-info/entry_points.txt +30 -0
- disdrodb/data_transfer/scripts/disdrodb_download_archive.py +0 -53
- disdrodb/data_transfer/scripts/disdrodb_upload_archive.py +0 -57
- disdrodb/l0/configs/OTT_Parsivel/l0a_encodings.yml +0 -32
- disdrodb/l0/configs/OTT_Parsivel2/l0a_encodings.yml +0 -39
- disdrodb/l0/configs/RD_80/l0a_encodings.yml +0 -16
- disdrodb/l0/configs/RD_80/l0b_encodings.yml +0 -135
- disdrodb/l0/configs/Thies_LPM/l0a_encodings.yml +0 -80
- disdrodb/l0/io.py +0 -257
- disdrodb/l0/l0_processing.py +0 -1091
- disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_OTT.py +0 -178
- disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_THIES.py +0 -247
- disdrodb/l0/readers/BRAZIL/CHUVA_LPM.py +0 -204
- disdrodb/l0/readers/BRAZIL/CHUVA_OTT.py +0 -183
- disdrodb/l0/readers/BRAZIL/GOAMAZON_OTT.py +0 -183
- disdrodb/l0/readers/CHINA/CHONGQING.py +0 -131
- disdrodb/l0/readers/EPFL/EPFL_ROOF_2008.py +0 -128
- disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP2.py +0 -127
- disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP3.py +0 -129
- disdrodb/l0/readers/EPFL/RACLETS_2019.py +0 -158
- disdrodb/l0/readers/EPFL/SAMOYLOV_2017.py +0 -136
- disdrodb/l0/readers/EPFL/SAMOYLOV_2019.py +0 -158
- disdrodb/l0/readers/FRANCE/SIRTA_OTT2.py +0 -138
- disdrodb/l0/readers/GPM/GCPEX.py +0 -123
- disdrodb/l0/readers/GPM/IFLOODS.py +0 -123
- disdrodb/l0/readers/GPM/MC3E.py +0 -123
- disdrodb/l0/readers/GPM/NSSTC.py +0 -164
- disdrodb/l0/readers/ITALY/GID.py +0 -199
- disdrodb/l0/readers/MEXICO/OH_IIUNAM_nc.py +0 -92
- disdrodb/l0/readers/NCAR/CCOPE_2015.py +0 -133
- disdrodb/l0/readers/NCAR/PECAN_FP3.py +0 -137
- disdrodb/l0/readers/NCAR/PECAN_MOBILE.py +0 -144
- disdrodb/l0/readers/NCAR/RELAMPAGO_OTT.py +0 -195
- disdrodb/l0/readers/NCAR/SNOWIE_PJ.py +0 -172
- disdrodb/l0/readers/NCAR/SNOWIE_SB.py +0 -179
- disdrodb/l0/readers/NCAR/VORTEX2_2009.py +0 -133
- disdrodb/l0/readers/NCAR/VORTEX2_2010.py +0 -188
- disdrodb/l0/readers/NCAR/VORTEX2_2010_UF.py +0 -191
- disdrodb/l0/readers/NCAR/VORTEX_SE_2016_P2.py +0 -135
- disdrodb/l0/readers/NCAR/VORTEX_SE_2016_PIPS.py +0 -170
- disdrodb/l0/readers/NETHERLANDS/DELFT.py +0 -187
- disdrodb/l0/readers/SPAIN/SBEGUERIA.py +0 -179
- disdrodb/l0/scripts/disdrodb_run_l0b_concat.py +0 -93
- disdrodb/l0/scripts/disdrodb_run_l0b_concat_station.py +0 -85
- disdrodb/utils/netcdf.py +0 -452
- disdrodb/utils/scripts.py +0 -102
- disdrodb-0.0.21.dist-info/AUTHORS.md +0 -18
- disdrodb-0.0.21.dist-info/METADATA +0 -186
- disdrodb-0.0.21.dist-info/RECORD +0 -168
- disdrodb-0.0.21.dist-info/entry_points.txt +0 -15
- /disdrodb/l0/configs/{RD_80 → RD80}/bins_velocity.yml +0 -0
- /disdrodb/l0/manuals/{Thies_LPM.pdf → LPM.pdf} +0 -0
- /disdrodb/l0/manuals/{ODM_470.pdf → ODM470.pdf} +0 -0
- /disdrodb/l0/manuals/{OTT_Parsivel.pdf → PARSIVEL.pdf} +0 -0
- /disdrodb/l0/manuals/{OTT_Parsivel2.pdf → PARSIVEL2.pdf} +0 -0
- /disdrodb/l0/manuals/{PWS_100.pdf → PWS100.pdf} +0 -0
- /disdrodb/l0/manuals/{RD_80.pdf → RD80.pdf} +0 -0
- {disdrodb-0.0.21.dist-info → disdrodb-0.1.1.dist-info/licenses}/LICENSE +0 -0
- {disdrodb-0.0.21.dist-info → disdrodb-0.1.1.dist-info}/top_level.txt +0 -0
disdrodb/utils/time.py
ADDED
|
@@ -0,0 +1,657 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------.
|
|
2
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
3
|
+
#
|
|
4
|
+
# This program is free software: you can redistribute it and/or modify
|
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
|
6
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
7
|
+
# (at your option) any later version.
|
|
8
|
+
#
|
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
# GNU General Public License for more details.
|
|
13
|
+
#
|
|
14
|
+
# You should have received a copy of the GNU General Public License
|
|
15
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
+
# -----------------------------------------------------------------------------.
|
|
17
|
+
"""This module contains utilities related to the processing of temporal dataset."""
|
|
18
|
+
import logging
|
|
19
|
+
import numbers
|
|
20
|
+
import re
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
import pandas as pd
|
|
25
|
+
import xarray as xr
|
|
26
|
+
|
|
27
|
+
from disdrodb.utils.logger import log_info, log_warning
|
|
28
|
+
from disdrodb.utils.xarray import define_fill_value_dictionary
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
####------------------------------------------------------------------------------------.
|
|
33
|
+
#### Sampling Interval Acronyms
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def seconds_to_acronym(seconds):
|
|
37
|
+
"""
|
|
38
|
+
Convert a duration in seconds to a readable string format (e.g., "1H30", "1D2H").
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
- seconds (int): The time duration in seconds.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
- str: The duration as a string in a format like "30S", "1MIN30S", "1H30MIN", or "1D2H".
|
|
47
|
+
"""
|
|
48
|
+
timedelta = pd.Timedelta(seconds=seconds)
|
|
49
|
+
components = timedelta.components
|
|
50
|
+
|
|
51
|
+
parts = []
|
|
52
|
+
if components.days > 0:
|
|
53
|
+
parts.append(f"{components.days}D")
|
|
54
|
+
if components.hours > 0:
|
|
55
|
+
parts.append(f"{components.hours}H")
|
|
56
|
+
if components.minutes > 0:
|
|
57
|
+
parts.append(f"{components.minutes}MIN")
|
|
58
|
+
if components.seconds > 0:
|
|
59
|
+
parts.append(f"{components.seconds}S")
|
|
60
|
+
acronym = "".join(parts)
|
|
61
|
+
return acronym
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_resampling_information(sample_interval_acronym):
|
|
65
|
+
"""
|
|
66
|
+
Extract resampling information from the sample interval acronym.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
sample_interval_acronym: str
|
|
71
|
+
A string representing the sample interval: e.g., "1H30MIN", "ROLL1H30MIN".
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
sample_interval_seconds, rolling: tuple
|
|
76
|
+
Sample_interval in seconds and whether rolling is enabled.
|
|
77
|
+
"""
|
|
78
|
+
rolling = sample_interval_acronym.startswith("ROLL")
|
|
79
|
+
if rolling:
|
|
80
|
+
sample_interval_acronym = sample_interval_acronym[4:] # Remove "ROLL"
|
|
81
|
+
|
|
82
|
+
# Allowed pattern: one or more occurrences of "<number><unit>"
|
|
83
|
+
# where unit is exactly one of D, H, MIN, or S.
|
|
84
|
+
# Examples: 1H, 30MIN, 2D, 45S, and any concatenation like 1H30MIN.
|
|
85
|
+
pattern = r"^(\d+(?:D|H|MIN|S))+$"
|
|
86
|
+
|
|
87
|
+
# Check if the entire string matches the pattern
|
|
88
|
+
if not re.match(pattern, sample_interval_acronym):
|
|
89
|
+
raise ValueError(
|
|
90
|
+
f"Invalid sample interval acronym '{sample_interval_acronym}'. "
|
|
91
|
+
"Must be composed of one or more <number><unit> groups, where unit is D, H, MIN, or S.",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Regular expression to match duration components and extract all (value, unit) pairs
|
|
95
|
+
pattern = r"(\d+)(D|H|MIN|S)"
|
|
96
|
+
matches = re.findall(pattern, sample_interval_acronym)
|
|
97
|
+
|
|
98
|
+
# Conversion factors for each unit
|
|
99
|
+
unit_to_seconds = {
|
|
100
|
+
"D": 86400, # Seconds in a day
|
|
101
|
+
"H": 3600, # Seconds in an hour
|
|
102
|
+
"MIN": 60, # Seconds in a minute
|
|
103
|
+
"S": 1, # Seconds in a second
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
# Parse matches and calculate total seconds
|
|
107
|
+
sample_interval = 0
|
|
108
|
+
for value, unit in matches:
|
|
109
|
+
value = int(value)
|
|
110
|
+
if unit in unit_to_seconds:
|
|
111
|
+
sample_interval += value * unit_to_seconds[unit]
|
|
112
|
+
return sample_interval, rolling
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def acronym_to_seconds(acronym):
|
|
116
|
+
"""
|
|
117
|
+
Extract the interval in seconds from the duration acronym.
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
acronym: str
|
|
122
|
+
A string representing a duration: e.g., "1H30MIN", "ROLL1H30MIN".
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
seconds
|
|
127
|
+
Duration in seconds.
|
|
128
|
+
"""
|
|
129
|
+
seconds, _ = get_resampling_information(acronym)
|
|
130
|
+
return seconds
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
####----------------------------------------------------------------------------.
|
|
134
|
+
#### File start and end time utilities
|
|
135
|
+
def get_dataframe_start_end_time(df: pd.DataFrame, time_column="time"):
|
|
136
|
+
"""Retrieves dataframe starting and ending time.
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
df : pandas.DataFrame
|
|
141
|
+
Input dataframe
|
|
142
|
+
time_column: str
|
|
143
|
+
Name of the time column.
|
|
144
|
+
The default is "time".
|
|
145
|
+
The column must be of type datetime.
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
(start_time, end_time): tuple
|
|
150
|
+
File start and end time of type pandas.Timestamp.
|
|
151
|
+
|
|
152
|
+
"""
|
|
153
|
+
starting_time = pd.to_datetime(df[time_column].iloc[0])
|
|
154
|
+
ending_time = pd.to_datetime(df[time_column].iloc[-1])
|
|
155
|
+
return (starting_time, ending_time)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def get_dataset_start_end_time(ds: xr.Dataset, time_dim="time"):
|
|
159
|
+
"""Retrieves dataset starting and ending time.
|
|
160
|
+
|
|
161
|
+
Parameters
|
|
162
|
+
----------
|
|
163
|
+
ds : xarray.Dataset
|
|
164
|
+
Input dataset
|
|
165
|
+
time_dim: str
|
|
166
|
+
Name of the time dimension.
|
|
167
|
+
The default is "time".
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
(start_time, end_time): tuple
|
|
172
|
+
File start and end time of type pandas.Timestamp.
|
|
173
|
+
|
|
174
|
+
"""
|
|
175
|
+
starting_time = pd.to_datetime(ds[time_dim].to_numpy()[0])
|
|
176
|
+
ending_time = pd.to_datetime(ds[time_dim].to_numpy()[-1])
|
|
177
|
+
return (starting_time, ending_time)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def get_file_start_end_time(obj, time="time"):
|
|
181
|
+
"""Retrieves object starting and ending time.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
obj : xarray.Dataset or pandas.DataFrame
|
|
186
|
+
Input object with time dimension or column respectively.
|
|
187
|
+
time: str
|
|
188
|
+
Name of the time dimension or column.
|
|
189
|
+
The default is "time".
|
|
190
|
+
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
(start_time, end_time): tuple
|
|
194
|
+
File start and end time of type pandas.Timestamp.
|
|
195
|
+
|
|
196
|
+
"""
|
|
197
|
+
if isinstance(obj, xr.Dataset):
|
|
198
|
+
return get_dataset_start_end_time(obj, time_dim=time)
|
|
199
|
+
if isinstance(obj, pd.DataFrame):
|
|
200
|
+
return get_dataframe_start_end_time(obj, time_column=time)
|
|
201
|
+
raise TypeError("Expecting a xarray Dataset or a pandas Dataframe object.")
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
####------------------------------------------------------------------------------------.
|
|
205
|
+
#### Xarray utilities
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def ensure_sorted_by_time(obj, time="time"):
|
|
209
|
+
"""Ensure a xarray object or pandas Dataframe is sorted by time."""
|
|
210
|
+
# Check sorted by time and sort if necessary
|
|
211
|
+
is_sorted = np.all(np.diff(obj[time].to_numpy().astype(int)) > 0)
|
|
212
|
+
if not is_sorted:
|
|
213
|
+
if isinstance(obj, pd.DataFrame):
|
|
214
|
+
return obj.sort_values(by="time")
|
|
215
|
+
# Else xarray DataArray or Dataset
|
|
216
|
+
obj = obj.sortby("time")
|
|
217
|
+
return obj
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _check_time_sorted(ds, time_dim):
|
|
221
|
+
"""Ensure the xarray.Dataset is sorted."""
|
|
222
|
+
time_diff = np.diff(ds[time_dim].to_numpy().astype(int))
|
|
223
|
+
if np.any(time_diff == 0):
|
|
224
|
+
raise ValueError(f"In the {time_dim} dimension there are duplicated timesteps !")
|
|
225
|
+
if not np.all(time_diff > 0):
|
|
226
|
+
print(f"The {time_dim} dimension was not sorted. Sorting it now !")
|
|
227
|
+
ds = ds.sortby(time_dim)
|
|
228
|
+
return ds
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def regularize_dataset(
|
|
232
|
+
xr_obj,
|
|
233
|
+
freq: str,
|
|
234
|
+
time_dim: str = "time",
|
|
235
|
+
method: Optional[str] = None,
|
|
236
|
+
fill_value=None,
|
|
237
|
+
):
|
|
238
|
+
"""Regularize a dataset across time dimension with uniform resolution.
|
|
239
|
+
|
|
240
|
+
Parameters
|
|
241
|
+
----------
|
|
242
|
+
xr_obj : xarray.Dataset or xr.DataArray
|
|
243
|
+
xarray object with time dimension.
|
|
244
|
+
time_dim : str, optional
|
|
245
|
+
The time dimension in the xarray object. The default value is ``"time"``.
|
|
246
|
+
freq : str
|
|
247
|
+
The ``freq`` string to pass to `pd.date_range()` to define the new time coordinates.
|
|
248
|
+
Examples: ``freq="2min"``.
|
|
249
|
+
method : str, optional
|
|
250
|
+
Method to use for filling missing timesteps.
|
|
251
|
+
If ``None``, fill with ``fill_value``. The default value is ``None``.
|
|
252
|
+
For other possible methods, see xarray.Dataset.reindex()`.
|
|
253
|
+
fill_value : (float, dict), optional
|
|
254
|
+
Fill value to fill missing timesteps.
|
|
255
|
+
If not specified, for float variables it uses ``dtypes.NA`` while for
|
|
256
|
+
for integers variables it uses the maximum allowed integer value or,
|
|
257
|
+
in case of undecoded variables, the ``_FillValue`` DataArray attribute..
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
ds_reindexed : xarray.Dataset
|
|
262
|
+
Regularized dataset.
|
|
263
|
+
|
|
264
|
+
"""
|
|
265
|
+
xr_obj = _check_time_sorted(xr_obj, time_dim=time_dim)
|
|
266
|
+
start_time, end_time = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
|
|
267
|
+
|
|
268
|
+
# Define new time index
|
|
269
|
+
new_time_index = pd.date_range(
|
|
270
|
+
start=start_time,
|
|
271
|
+
end=end_time,
|
|
272
|
+
freq=freq,
|
|
273
|
+
)
|
|
274
|
+
# Check all existing timesteps are within the new time index
|
|
275
|
+
# - Otherwise raise error because it means that the desired frequency is not compatible
|
|
276
|
+
idx_missing = np.where(~np.isin(xr_obj[time_dim].data, new_time_index))[0]
|
|
277
|
+
if len(idx_missing) > 0:
|
|
278
|
+
not_included_timesteps = xr_obj[time_dim].data[idx_missing].astype("M8[s]")
|
|
279
|
+
raise ValueError(f"With freq='{freq}', the following timesteps would be dropped: {not_included_timesteps}")
|
|
280
|
+
|
|
281
|
+
# Define fill_value dictionary
|
|
282
|
+
if fill_value is None:
|
|
283
|
+
fill_value = define_fill_value_dictionary(xr_obj)
|
|
284
|
+
|
|
285
|
+
# Regularize dataset and fill with NA values
|
|
286
|
+
xr_obj = xr_obj.reindex(
|
|
287
|
+
{time_dim: new_time_index},
|
|
288
|
+
method=method, # do not fill gaps
|
|
289
|
+
# tolerance=tolerance, # mismatch in seconds
|
|
290
|
+
fill_value=fill_value,
|
|
291
|
+
)
|
|
292
|
+
return xr_obj
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
####------------------------------------------
|
|
296
|
+
#### Sampling interval utilities
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def ensure_sample_interval_in_seconds(sample_interval): # noqa: PLR0911
|
|
300
|
+
"""
|
|
301
|
+
Ensure the sample interval is in seconds.
|
|
302
|
+
|
|
303
|
+
Parameters
|
|
304
|
+
----------
|
|
305
|
+
sample_interval : int, numpy.ndarray, xarray.DataArray, or numpy.timedelta64
|
|
306
|
+
The sample interval to be converted to seconds.
|
|
307
|
+
It can be:
|
|
308
|
+
- An integer representing the interval in seconds.
|
|
309
|
+
- A numpy array or xarray DataArray of integers representing intervals in seconds.
|
|
310
|
+
- A numpy.timedelta64 object representing the interval.
|
|
311
|
+
- A numpy array or xarray DataArray of numpy.timedelta64 objects representing intervals.
|
|
312
|
+
|
|
313
|
+
Returns
|
|
314
|
+
-------
|
|
315
|
+
int, numpy.ndarray, or xarray.DataArray
|
|
316
|
+
The sample interval converted to seconds. The return type matches the input type:
|
|
317
|
+
- If the input is an integer, the output is an integer.
|
|
318
|
+
- If the input is a numpy array, the output is a numpy array of integers (unless NaN is present)
|
|
319
|
+
- If the input is an xarray DataArray, the output is an xarray DataArray of integers (unless NaN is present).
|
|
320
|
+
|
|
321
|
+
"""
|
|
322
|
+
# Deal with timedelta objects
|
|
323
|
+
if isinstance(sample_interval, np.timedelta64):
|
|
324
|
+
return (sample_interval.astype("m8[s]") / np.timedelta64(1, "s")).astype(int)
|
|
325
|
+
# return sample_interval.astype("m8[s]").astype(int)
|
|
326
|
+
|
|
327
|
+
# Deal with scalar pure integer types (Python int or numpy int32/int64/etc.)
|
|
328
|
+
# --> ATTENTION: this also include np.timedelta64 objects !
|
|
329
|
+
if isinstance(sample_interval, numbers.Integral):
|
|
330
|
+
return sample_interval
|
|
331
|
+
|
|
332
|
+
# Deal with numpy or xarray arrays of integer types
|
|
333
|
+
if isinstance(sample_interval, (np.ndarray, xr.DataArray)) and np.issubdtype(sample_interval.dtype, int):
|
|
334
|
+
return sample_interval
|
|
335
|
+
|
|
336
|
+
# Deal with scalar floats that are actually integers (e.g. 1.0, np.float64(3.0))
|
|
337
|
+
if isinstance(sample_interval, numbers.Real):
|
|
338
|
+
if float(sample_interval).is_integer():
|
|
339
|
+
# Cast back to int seconds
|
|
340
|
+
return int(sample_interval)
|
|
341
|
+
raise TypeError(f"sample_interval floats must be whole numbers of seconds, got {sample_interval}")
|
|
342
|
+
|
|
343
|
+
# Deal with timedelta64 numpy arrays
|
|
344
|
+
if isinstance(sample_interval, np.ndarray) and np.issubdtype(sample_interval.dtype, np.timedelta64):
|
|
345
|
+
is_nat = np.isnat(sample_interval)
|
|
346
|
+
if np.any(is_nat):
|
|
347
|
+
sample_interval = sample_interval.astype("timedelta64[s]").astype(float)
|
|
348
|
+
sample_interval[is_nat] = np.nan
|
|
349
|
+
return sample_interval
|
|
350
|
+
return sample_interval.astype("timedelta64[s]").astype(int)
|
|
351
|
+
# Deal with timedelta64 xarray arrays
|
|
352
|
+
if isinstance(sample_interval, xr.DataArray) and np.issubdtype(sample_interval.dtype, np.timedelta64):
|
|
353
|
+
sample_interval = sample_interval.copy()
|
|
354
|
+
is_nat = np.isnat(sample_interval)
|
|
355
|
+
if np.any(is_nat):
|
|
356
|
+
sample_interval_array = sample_interval.data.astype("timedelta64[s]").astype(float)
|
|
357
|
+
sample_interval_array[is_nat] = np.nan
|
|
358
|
+
sample_interval.data = sample_interval_array
|
|
359
|
+
return sample_interval
|
|
360
|
+
sample_interval_array = sample_interval.data.astype("timedelta64[s]").astype(int)
|
|
361
|
+
sample_interval.data = sample_interval_array
|
|
362
|
+
return sample_interval
|
|
363
|
+
|
|
364
|
+
# Deal with numpy array of floats that are all integer-valued (with optionally some NaN)
|
|
365
|
+
if isinstance(sample_interval, np.ndarray) and np.issubdtype(sample_interval.dtype, np.floating):
|
|
366
|
+
mask_nan = np.isnan(sample_interval)
|
|
367
|
+
if mask_nan.any():
|
|
368
|
+
# Check non-NaN entries are whole numbers
|
|
369
|
+
nonnan = sample_interval[~mask_nan]
|
|
370
|
+
if not np.allclose(nonnan, np.rint(nonnan)):
|
|
371
|
+
raise TypeError("Float array sample_interval must contain only whole numbers or NaN.")
|
|
372
|
+
# Leave as float array so NaNs are preserved
|
|
373
|
+
return sample_interval
|
|
374
|
+
# No NaNs: can safely cast to integer dtype
|
|
375
|
+
if not np.allclose(sample_interval, np.rint(sample_interval)):
|
|
376
|
+
raise TypeError("Float array sample_interval must contain only whole numbers.")
|
|
377
|
+
return sample_interval.astype(int)
|
|
378
|
+
|
|
379
|
+
# Deal with xarray.DataArrayy of floats that are all integer-valued (with optionally some NaN)
|
|
380
|
+
if isinstance(sample_interval, xr.DataArray) and np.issubdtype(sample_interval.dtype, np.floating):
|
|
381
|
+
arr = sample_interval.copy()
|
|
382
|
+
data = arr.data
|
|
383
|
+
mask_nan = np.isnan(data)
|
|
384
|
+
if mask_nan.any():
|
|
385
|
+
nonnan = data[~mask_nan]
|
|
386
|
+
if not np.allclose(nonnan, np.rint(nonnan)):
|
|
387
|
+
raise TypeError("Float DataArray sample_interval must contain only whole numbers or NaN.")
|
|
388
|
+
# return as float DataArray so NaNs stay
|
|
389
|
+
return arr
|
|
390
|
+
if not np.allclose(data, np.rint(data)):
|
|
391
|
+
raise TypeError("Float DataArray sample_interval must contain only whole numbers.")
|
|
392
|
+
arr.data = data.astype(int)
|
|
393
|
+
return arr
|
|
394
|
+
|
|
395
|
+
raise TypeError(
|
|
396
|
+
"sample_interval must be an integer value or array, or numpy.ndarray / xarray.DataArray with type timedelta64.",
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def infer_sample_interval(ds, robust=False, verbose=False, logger=None):
|
|
401
|
+
"""Infer the sample interval of a dataset.
|
|
402
|
+
|
|
403
|
+
Duplicated timesteps are removed before inferring the sample interval.
|
|
404
|
+
|
|
405
|
+
NOTE: This function is used only for the reader preparation.
|
|
406
|
+
"""
|
|
407
|
+
# Check sorted by time and sort if necessary
|
|
408
|
+
ds = ensure_sorted_by_time(ds)
|
|
409
|
+
|
|
410
|
+
# Retrieve timesteps
|
|
411
|
+
# - Remove duplicate timesteps
|
|
412
|
+
timesteps = np.unique(ds["time"].data)
|
|
413
|
+
|
|
414
|
+
# Calculate number of timesteps
|
|
415
|
+
n_timesteps = len(timesteps)
|
|
416
|
+
|
|
417
|
+
# Calculate time differences in seconds
|
|
418
|
+
deltadt = np.diff(timesteps).astype("timedelta64[s]").astype(int)
|
|
419
|
+
|
|
420
|
+
# Round each delta to the nearest multiple of 5 (because the smallest possible sample interval is 10 s)
|
|
421
|
+
# Example: for sample_interval = 10, deltat values like 8, 9, 11, 12 become 10 ...
|
|
422
|
+
# Example: for sample_interval = 10, deltat values like 6, 7 or 13, 14 become respectively 5 and 15 ...
|
|
423
|
+
# Example: for sample_interval = 30, deltat values like 28,29,30,31,32 deltat become 30 ...
|
|
424
|
+
# Example: for sample_interval = 30, deltat values like 26, 27 or 33, 34 become respectively 25 and 35 ...
|
|
425
|
+
# --> Need other rounding after having identified the most frequent sample interval to coerce such values to 30
|
|
426
|
+
min_sample_interval = 10
|
|
427
|
+
min_half_sample_interval = min_sample_interval / 2
|
|
428
|
+
deltadt = np.round(deltadt / min_half_sample_interval) * min_half_sample_interval
|
|
429
|
+
|
|
430
|
+
# Identify unique time intervals and their occurrences
|
|
431
|
+
unique_deltas, counts = np.unique(deltadt, return_counts=True)
|
|
432
|
+
|
|
433
|
+
# Determine the most frequent time interval (mode)
|
|
434
|
+
most_frequent_delta_idx = np.argmax(counts)
|
|
435
|
+
sample_interval = unique_deltas[most_frequent_delta_idx]
|
|
436
|
+
|
|
437
|
+
# Reround deltadt once knowing the sample interval
|
|
438
|
+
# - If sample interval is 10: all values between 6 and 14 are rounded to 10, below 6 to 0, above 14 to 20
|
|
439
|
+
# - If sample interval is 30: all values between 16 and 44 are rounded to 30, below 16 to 0, above 44 to 20
|
|
440
|
+
deltadt = np.round(deltadt / min_sample_interval) * min_sample_interval
|
|
441
|
+
|
|
442
|
+
# Identify unique time intervals and their occurrences
|
|
443
|
+
unique_deltas, counts = np.unique(deltadt, return_counts=True)
|
|
444
|
+
fractions = np.round(counts / len(deltadt) * 100, 2)
|
|
445
|
+
|
|
446
|
+
# Determine the most frequent time interval (mode)
|
|
447
|
+
most_frequent_delta_idx = np.argmax(counts)
|
|
448
|
+
sample_interval = unique_deltas[most_frequent_delta_idx]
|
|
449
|
+
sample_interval_fraction = fractions[most_frequent_delta_idx]
|
|
450
|
+
|
|
451
|
+
# Inform about irregular sampling
|
|
452
|
+
unexpected_intervals = unique_deltas[unique_deltas != sample_interval]
|
|
453
|
+
unexpected_intervals_counts = counts[unique_deltas != sample_interval]
|
|
454
|
+
unexpected_intervals_fractions = fractions[unique_deltas != sample_interval]
|
|
455
|
+
if verbose and len(unexpected_intervals) > 0:
|
|
456
|
+
msg = "Non-unique interval detected."
|
|
457
|
+
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
458
|
+
for interval, count, fraction in zip(
|
|
459
|
+
unexpected_intervals,
|
|
460
|
+
unexpected_intervals_counts,
|
|
461
|
+
unexpected_intervals_fractions,
|
|
462
|
+
):
|
|
463
|
+
msg = f"--> Interval: {interval} seconds, Occurrence: {count}, Frequency: {fraction} %"
|
|
464
|
+
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
465
|
+
|
|
466
|
+
# Perform checks
|
|
467
|
+
# - Raise error if negative or zero time intervals are presents
|
|
468
|
+
# - If robust = False, still return the estimated sample_interval
|
|
469
|
+
if robust and np.any(deltadt == 0):
|
|
470
|
+
raise ValueError("Likely presence of duplicated timesteps.")
|
|
471
|
+
|
|
472
|
+
if robust and len(unexpected_intervals) > 0:
|
|
473
|
+
raise ValueError("Not unique sampling interval.")
|
|
474
|
+
|
|
475
|
+
###-------------------------------------------------------------------------.
|
|
476
|
+
### Display informative messages
|
|
477
|
+
# - Log a warning if estimated sample interval has frequency less than 60 %
|
|
478
|
+
sample_interval_fraction_threshold = 60
|
|
479
|
+
msg = (
|
|
480
|
+
f"The most frequent sampling interval ({sample_interval} s) "
|
|
481
|
+
+ f"has a frequency lower than {sample_interval_fraction_threshold}%: {sample_interval_fraction} %. "
|
|
482
|
+
+ f"(Total number of timesteps: {n_timesteps})"
|
|
483
|
+
)
|
|
484
|
+
if sample_interval_fraction < sample_interval_fraction_threshold:
|
|
485
|
+
log_warning(logger=logger, msg=msg, verbose=verbose)
|
|
486
|
+
|
|
487
|
+
# - Log a warning if an unexpected interval has frequency larger than 20 percent
|
|
488
|
+
frequent_unexpected_intervals = unexpected_intervals[unexpected_intervals_fractions > 20]
|
|
489
|
+
if len(frequent_unexpected_intervals) != 0:
|
|
490
|
+
frequent_unexpected_intervals_str = ", ".join(
|
|
491
|
+
f"{interval} seconds" for interval in frequent_unexpected_intervals
|
|
492
|
+
)
|
|
493
|
+
msg = (
|
|
494
|
+
"The following unexpected intervals have a frequency "
|
|
495
|
+
+ f"greater than 20%: {frequent_unexpected_intervals_str}. "
|
|
496
|
+
+ f"(Total number of timesteps: {n_timesteps})"
|
|
497
|
+
)
|
|
498
|
+
log_warning(logger=logger, msg=msg, verbose=verbose)
|
|
499
|
+
return int(sample_interval)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
####---------------------------------------------------------------------------------
|
|
503
|
+
#### Timesteps regularization
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def get_problematic_timestep_indices(timesteps, sample_interval):
|
|
507
|
+
"""Identify timesteps with missing previous or following timesteps."""
|
|
508
|
+
previous_time = timesteps - pd.Timedelta(seconds=sample_interval)
|
|
509
|
+
next_time = timesteps + pd.Timedelta(seconds=sample_interval)
|
|
510
|
+
idx_previous_missing = np.where(~np.isin(previous_time, timesteps))[0][1:]
|
|
511
|
+
idx_next_missing = np.where(~np.isin(next_time, timesteps))[0][:-1]
|
|
512
|
+
idx_isolated_missing = np.intersect1d(idx_previous_missing, idx_next_missing)
|
|
513
|
+
idx_previous_missing = idx_previous_missing[np.isin(idx_previous_missing, idx_isolated_missing, invert=True)]
|
|
514
|
+
idx_next_missing = idx_next_missing[np.isin(idx_next_missing, idx_isolated_missing, invert=True)]
|
|
515
|
+
return idx_previous_missing, idx_next_missing, idx_isolated_missing
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=True, logger=None, verbose=True):
|
|
519
|
+
"""Ensure timesteps match with the sample_interval.
|
|
520
|
+
|
|
521
|
+
This function:
|
|
522
|
+
- drop dataset indices with duplicated timesteps,
|
|
523
|
+
- but does not add missing timesteps to the dataset.
|
|
524
|
+
"""
|
|
525
|
+
# Check sorted by time and sort if necessary
|
|
526
|
+
ds = ensure_sorted_by_time(ds)
|
|
527
|
+
|
|
528
|
+
# Convert time to pandas.DatetimeIndex for easier manipulation
|
|
529
|
+
times = pd.to_datetime(ds["time"].to_numpy())
|
|
530
|
+
|
|
531
|
+
# Determine the start and end times
|
|
532
|
+
start_time = times[0].floor(f"{sample_interval}s")
|
|
533
|
+
end_time = times[-1].ceil(f"{sample_interval}s")
|
|
534
|
+
|
|
535
|
+
# Create the expected time grid
|
|
536
|
+
expected_times = pd.date_range(start=start_time, end=end_time, freq=f"{sample_interval}s")
|
|
537
|
+
|
|
538
|
+
# Convert to numpy arrays
|
|
539
|
+
times = times.to_numpy(dtype="M8[s]")
|
|
540
|
+
expected_times = expected_times.to_numpy(dtype="M8[s]")
|
|
541
|
+
|
|
542
|
+
# Map original times to the nearest expected times
|
|
543
|
+
# Calculate the difference between original times and expected times
|
|
544
|
+
time_deltas = np.abs(times - expected_times[:, None]).astype(int)
|
|
545
|
+
|
|
546
|
+
# Find the index of the closest expected time for each original time
|
|
547
|
+
nearest_indices = np.argmin(time_deltas, axis=0)
|
|
548
|
+
adjusted_times = expected_times[nearest_indices]
|
|
549
|
+
|
|
550
|
+
# Check for duplicates in adjusted times
|
|
551
|
+
unique_times, counts = np.unique(adjusted_times, return_counts=True)
|
|
552
|
+
duplicates = unique_times[counts > 1]
|
|
553
|
+
|
|
554
|
+
# Initialize time quality flag
|
|
555
|
+
# - 0 when ok or just rounded to closest 00
|
|
556
|
+
# - 1 if previous timestep is missing
|
|
557
|
+
# - 2 if next timestep is missing
|
|
558
|
+
# - 3 if previous and next timestep is missing
|
|
559
|
+
# - 4 if solved duplicated timesteps
|
|
560
|
+
# - 5 if needed to drop duplicated timesteps and select the last
|
|
561
|
+
flag_previous_missing = 1
|
|
562
|
+
flag_next_missing = 2
|
|
563
|
+
flag_isolated_timestep = 3
|
|
564
|
+
flag_solved_duplicated_timestep = 4
|
|
565
|
+
flag_dropped_duplicated_timestep = 5
|
|
566
|
+
qc_flag = np.zeros(adjusted_times.shape)
|
|
567
|
+
|
|
568
|
+
# Initialize list with the duplicated timesteps index to drop
|
|
569
|
+
# - We drop the first occurrence because is likely the shortest interval
|
|
570
|
+
idx_to_drop = []
|
|
571
|
+
|
|
572
|
+
# Attempt to resolve for duplicates
|
|
573
|
+
if duplicates.size > 0:
|
|
574
|
+
# Handle duplicates
|
|
575
|
+
for dup_time in duplicates:
|
|
576
|
+
# Indices of duplicates
|
|
577
|
+
dup_indices = np.where(adjusted_times == dup_time)[0]
|
|
578
|
+
n_duplicates = len(dup_indices)
|
|
579
|
+
# Define previous and following timestep
|
|
580
|
+
prev_time = dup_time - pd.Timedelta(seconds=sample_interval)
|
|
581
|
+
next_time = dup_time + pd.Timedelta(seconds=sample_interval)
|
|
582
|
+
# Try to find missing slots before and after
|
|
583
|
+
# - If more than 3 duplicates, impossible to solve !
|
|
584
|
+
count_solved = 0
|
|
585
|
+
# If the previous timestep is available, set that one
|
|
586
|
+
if n_duplicates == 2:
|
|
587
|
+
if prev_time not in adjusted_times:
|
|
588
|
+
adjusted_times[dup_indices[0]] = prev_time
|
|
589
|
+
qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
|
|
590
|
+
count_solved += 1
|
|
591
|
+
elif next_time not in adjusted_times:
|
|
592
|
+
adjusted_times[dup_indices[-1]] = next_time
|
|
593
|
+
qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
|
|
594
|
+
count_solved += 1
|
|
595
|
+
else:
|
|
596
|
+
pass
|
|
597
|
+
elif n_duplicates == 3:
|
|
598
|
+
if prev_time not in adjusted_times:
|
|
599
|
+
adjusted_times[dup_indices[0]] = prev_time
|
|
600
|
+
qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
|
|
601
|
+
count_solved += 1
|
|
602
|
+
if next_time not in adjusted_times:
|
|
603
|
+
adjusted_times[dup_indices[-1]] = next_time
|
|
604
|
+
qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
|
|
605
|
+
count_solved += 1
|
|
606
|
+
if count_solved != n_duplicates - 1:
|
|
607
|
+
idx_to_drop = np.append(idx_to_drop, dup_indices[0:-1])
|
|
608
|
+
qc_flag[dup_indices[-1]] = flag_dropped_duplicated_timestep
|
|
609
|
+
msg = (
|
|
610
|
+
f"Cannot resolve {n_duplicates} duplicated timesteps "
|
|
611
|
+
f"(after trailing seconds correction) around {dup_time}."
|
|
612
|
+
)
|
|
613
|
+
log_warning(logger=logger, msg=msg, verbose=verbose)
|
|
614
|
+
if robust:
|
|
615
|
+
raise ValueError(msg)
|
|
616
|
+
|
|
617
|
+
# Update the time coordinate (Convert to ns for xarray compatibility)
|
|
618
|
+
ds = ds.assign_coords({"time": adjusted_times.astype("datetime64[ns]")})
|
|
619
|
+
|
|
620
|
+
# Update quality flag values for next and previous timestep is missing
|
|
621
|
+
if add_quality_flag:
|
|
622
|
+
idx_previous_missing, idx_next_missing, idx_isolated_missing = get_problematic_timestep_indices(
|
|
623
|
+
adjusted_times,
|
|
624
|
+
sample_interval,
|
|
625
|
+
)
|
|
626
|
+
qc_flag[idx_previous_missing] = np.maximum(qc_flag[idx_previous_missing], flag_previous_missing)
|
|
627
|
+
qc_flag[idx_next_missing] = np.maximum(qc_flag[idx_next_missing], flag_next_missing)
|
|
628
|
+
qc_flag[idx_isolated_missing] = np.maximum(qc_flag[idx_isolated_missing], flag_isolated_timestep)
|
|
629
|
+
|
|
630
|
+
# If the first timestep is at 00:00 and currently flagged as previous missing (1), reset to 0
|
|
631
|
+
# first_time = pd.to_datetime(adjusted_times[0]).time()
|
|
632
|
+
# first_expected_time = pd.Timestamp("00:00:00").time()
|
|
633
|
+
# if first_time == first_expected_time and qc_flag[0] == flag_previous_missing:
|
|
634
|
+
# qc_flag[0] = 0
|
|
635
|
+
|
|
636
|
+
# # If the last timestep is flagged and currently flagged as next missing (2), reset it to 0
|
|
637
|
+
# last_time = pd.to_datetime(adjusted_times[-1]).time()
|
|
638
|
+
# last_time_expected = (pd.Timestamp("00:00:00") - pd.Timedelta(30, unit="seconds")).time()
|
|
639
|
+
# # Check if adding one interval would go beyond the end_time
|
|
640
|
+
# if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
|
|
641
|
+
# qc_flag[-1] = 0
|
|
642
|
+
|
|
643
|
+
# Assign time quality flag coordinate
|
|
644
|
+
ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
|
|
645
|
+
ds = ds.set_coords("time_qc")
|
|
646
|
+
|
|
647
|
+
# Drop duplicated timesteps
|
|
648
|
+
# - Using ds = ds.drop_isel({"time": idx_to_drop.astype(int)}) raise:
|
|
649
|
+
# --> pandas.errors.InvalidIndexError: Reindexing only valid with uniquely valued Index objects
|
|
650
|
+
# --> https://github.com/pydata/xarray/issues/6605
|
|
651
|
+
if len(idx_to_drop) > 0:
|
|
652
|
+
idx_to_drop = idx_to_drop.astype(int)
|
|
653
|
+
idx_valid_timesteps = np.arange(0, ds["time"].size)
|
|
654
|
+
idx_valid_timesteps = np.delete(idx_valid_timesteps, idx_to_drop)
|
|
655
|
+
ds = ds.isel(time=idx_valid_timesteps)
|
|
656
|
+
# Return dataset
|
|
657
|
+
return ds
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# -----------------------------------------------------------------------------.
|
|
4
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
5
|
+
#
|
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
# (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
# GNU General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU General Public License
|
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
|
+
# -----------------------------------------------------------------------------.
|
|
19
|
+
"""Warning utilities."""
|
|
20
|
+
import warnings
|
|
21
|
+
from contextlib import contextmanager
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@contextmanager
|
|
25
|
+
def suppress_warnings():
|
|
26
|
+
"""Context manager suppressing RuntimeWarnings and UserWarnings."""
|
|
27
|
+
with warnings.catch_warnings():
|
|
28
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
|
29
|
+
warnings.simplefilter("ignore", UserWarning)
|
|
30
|
+
yield
|