disdrodb 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +132 -15
- disdrodb/_config.py +4 -2
- disdrodb/_version.py +9 -4
- disdrodb/api/checks.py +264 -237
- disdrodb/api/configs.py +4 -8
- disdrodb/api/create_directories.py +235 -290
- disdrodb/api/info.py +217 -26
- disdrodb/api/io.py +306 -270
- disdrodb/api/path.py +597 -173
- disdrodb/api/search.py +486 -0
- disdrodb/{metadata/scripts → cli}/disdrodb_check_metadata_archive.py +12 -7
- disdrodb/{utils/pandas.py → cli/disdrodb_data_archive_directory.py} +9 -18
- disdrodb/cli/disdrodb_download_archive.py +86 -0
- disdrodb/cli/disdrodb_download_metadata_archive.py +53 -0
- disdrodb/cli/disdrodb_download_station.py +84 -0
- disdrodb/{api/scripts → cli}/disdrodb_initialize_station.py +22 -10
- disdrodb/cli/disdrodb_metadata_archive_directory.py +32 -0
- disdrodb/{data_transfer/scripts/disdrodb_download_station.py → cli/disdrodb_open_data_archive.py} +22 -22
- disdrodb/cli/disdrodb_open_logs_directory.py +69 -0
- disdrodb/{data_transfer/scripts/disdrodb_upload_station.py → cli/disdrodb_open_metadata_archive.py} +22 -24
- disdrodb/cli/disdrodb_open_metadata_directory.py +71 -0
- disdrodb/cli/disdrodb_open_product_directory.py +74 -0
- disdrodb/cli/disdrodb_open_readers_directory.py +32 -0
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0.py +38 -31
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0_station.py +32 -30
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0a.py +30 -21
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0a_station.py +24 -33
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0b.py +30 -21
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0b_station.py +25 -34
- disdrodb/cli/disdrodb_run_l0c.py +130 -0
- disdrodb/cli/disdrodb_run_l0c_station.py +129 -0
- disdrodb/cli/disdrodb_run_l1.py +122 -0
- disdrodb/cli/disdrodb_run_l1_station.py +121 -0
- disdrodb/cli/disdrodb_run_l2e.py +122 -0
- disdrodb/cli/disdrodb_run_l2e_station.py +122 -0
- disdrodb/cli/disdrodb_run_l2m.py +122 -0
- disdrodb/cli/disdrodb_run_l2m_station.py +122 -0
- disdrodb/cli/disdrodb_upload_archive.py +105 -0
- disdrodb/cli/disdrodb_upload_station.py +98 -0
- disdrodb/configs.py +90 -25
- disdrodb/data_transfer/__init__.py +22 -0
- disdrodb/data_transfer/download_data.py +87 -90
- disdrodb/data_transfer/upload_data.py +64 -37
- disdrodb/data_transfer/zenodo.py +15 -18
- disdrodb/docs.py +1 -1
- disdrodb/issue/__init__.py +17 -4
- disdrodb/issue/checks.py +10 -23
- disdrodb/issue/reader.py +9 -12
- disdrodb/issue/writer.py +14 -17
- disdrodb/l0/__init__.py +17 -26
- disdrodb/l0/check_configs.py +35 -23
- disdrodb/l0/check_standards.py +46 -51
- disdrodb/l0/configs/{Thies_LPM → LPM}/bins_diameter.yml +44 -44
- disdrodb/l0/configs/{Thies_LPM → LPM}/bins_velocity.yml +40 -40
- disdrodb/l0/configs/LPM/l0a_encodings.yml +80 -0
- disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_cf_attrs.yml +84 -65
- disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_encodings.yml +50 -9
- disdrodb/l0/configs/{Thies_LPM → LPM}/raw_data_format.yml +285 -245
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_diameter.yml +66 -66
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_velocity.yml +64 -64
- disdrodb/l0/configs/PARSIVEL/l0a_encodings.yml +32 -0
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_cf_attrs.yml +23 -21
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_encodings.yml +17 -17
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/raw_data_format.yml +77 -77
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_diameter.yml +64 -64
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_velocity.yml +64 -64
- disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +39 -0
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_cf_attrs.yml +28 -26
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_encodings.yml +20 -20
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/raw_data_format.yml +107 -107
- disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
- disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
- disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
- disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
- disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
- disdrodb/l0/configs/{RD_80 → RD80}/bins_diameter.yml +40 -40
- disdrodb/l0/configs/RD80/l0a_encodings.yml +16 -0
- disdrodb/l0/configs/{RD_80 → RD80}/l0b_cf_attrs.yml +3 -3
- disdrodb/l0/configs/RD80/l0b_encodings.yml +135 -0
- disdrodb/l0/configs/{RD_80 → RD80}/raw_data_format.yml +46 -50
- disdrodb/l0/l0_reader.py +216 -340
- disdrodb/l0/l0a_processing.py +237 -208
- disdrodb/l0/l0b_nc_processing.py +227 -80
- disdrodb/l0/l0b_processing.py +96 -174
- disdrodb/l0/l0c_processing.py +627 -0
- disdrodb/l0/readers/{ARM → LPM/ARM}/ARM_LPM.py +36 -58
- disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +236 -0
- disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +185 -0
- disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +185 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +195 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
- disdrodb/l0/readers/{BRAZIL/GOAMAZON_LPM.py → LPM/KIT/CHWALA.py} +97 -76
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
- disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
- disdrodb/l0/readers/{UK → LPM/UK}/DIVEN.py +14 -35
- disdrodb/l0/readers/PARSIVEL/AUSTRALIA/MELBOURNE_2007_PARSIVEL.py +157 -0
- disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +113 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/ARCTIC_2021.py +40 -57
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/COMMON_2011.py +37 -54
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/DAVOS_2009_2011.py +34 -51
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_2009.py +34 -51
- disdrodb/l0/readers/{EPFL/PARADISO_2014.py → PARSIVEL/EPFL/EPFL_ROOF_2008.py} +38 -50
- disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +105 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2011.py +34 -51
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2012.py +33 -51
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GENEPI_2007.py +25 -44
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007.py +25 -44
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007_2.py +25 -44
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HPICONET_2010.py +34 -51
- disdrodb/l0/readers/{EPFL/EPFL_ROOF_2010.py → PARSIVEL/EPFL/HYMEX_LTE_SOP2.py} +37 -50
- disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +111 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HYMEX_LTE_SOP4.py +36 -54
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2018.py +34 -52
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2019.py +38 -56
- disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +105 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PARSIVEL_2007.py +27 -45
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PLATO_2019.py +24 -44
- disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +140 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RACLETS_2019_WJF.py +41 -59
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RIETHOLZBACH_2011.py +34 -51
- disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +117 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +137 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/UNIL_2022.py +42 -55
- disdrodb/l0/readers/PARSIVEL/GPM/IFLOODS.py +104 -0
- disdrodb/l0/readers/{GPM → PARSIVEL/GPM}/LPVEX.py +29 -48
- disdrodb/l0/readers/PARSIVEL/GPM/MC3E.py +184 -0
- disdrodb/l0/readers/PARSIVEL/KIT/BURKINA_FASO.py +133 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/CCOPE_2015.py +113 -0
- disdrodb/l0/readers/{NCAR/VORTEX_SE_2016_P1.py → PARSIVEL/NCAR/OWLES_MIPS.py} +46 -72
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +125 -0
- disdrodb/l0/readers/{NCAR/OWLES_MIPS.py → PARSIVEL/NCAR/PLOWS_MIPS.py} +45 -64
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +114 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +176 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +183 -0
- disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
- disdrodb/l0/readers/{ARM/ARM_LD.py → PARSIVEL2/ARM/ARM_PARSIVEL2.py} +27 -50
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +163 -0
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +163 -0
- disdrodb/l0/readers/{DENMARK → PARSIVEL2/DENMARK}/EROSION_nc.py +14 -35
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +119 -0
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +104 -0
- disdrodb/l0/readers/PARSIVEL2/GPM/NSSTC.py +176 -0
- disdrodb/l0/readers/PARSIVEL2/ITALY/GID_PARSIVEL2.py +32 -0
- disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +56 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +120 -0
- disdrodb/l0/readers/{NCAR → PARSIVEL2/NCAR}/PECAN_MIPS.py +45 -64
- disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +181 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +160 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +160 -0
- disdrodb/l0/readers/{NCAR/PLOWS_MIPS.py → PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py} +49 -66
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +118 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +152 -0
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT.py +166 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
- disdrodb/l0/readers/{NCAR/RELAMPAGO_RD80.py → RD80/BRAZIL/CHUVA_RD80.py} +36 -60
- disdrodb/l0/readers/{BRAZIL → RD80/BRAZIL}/GOAMAZON_RD80.py +36 -55
- disdrodb/l0/readers/{NCAR → RD80/NCAR}/CINDY_2011_RD80.py +35 -54
- disdrodb/l0/readers/{BRAZIL/CHUVA_RD80.py → RD80/NCAR/RELAMPAGO_RD80.py} +40 -54
- disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +274 -0
- disdrodb/l0/readers/template_reader_raw_netcdf_data.py +62 -0
- disdrodb/l0/readers/{reader_template.py → template_reader_raw_text_data.py} +20 -44
- disdrodb/l0/routines.py +885 -581
- disdrodb/l0/standards.py +77 -238
- disdrodb/l0/template_tools.py +105 -110
- disdrodb/l1/__init__.py +17 -0
- disdrodb/l1/beard_model.py +716 -0
- disdrodb/l1/encoding_attrs.py +635 -0
- disdrodb/l1/fall_velocity.py +260 -0
- disdrodb/l1/filters.py +192 -0
- disdrodb/l1/processing.py +202 -0
- disdrodb/l1/resampling.py +236 -0
- disdrodb/l1/routines.py +358 -0
- disdrodb/l1_env/__init__.py +17 -0
- disdrodb/l1_env/routines.py +38 -0
- disdrodb/l2/__init__.py +17 -0
- disdrodb/l2/empirical_dsd.py +1833 -0
- disdrodb/l2/event.py +388 -0
- disdrodb/l2/processing.py +528 -0
- disdrodb/l2/processing_options.py +213 -0
- disdrodb/l2/routines.py +868 -0
- disdrodb/metadata/__init__.py +9 -2
- disdrodb/metadata/checks.py +180 -124
- disdrodb/metadata/download.py +81 -0
- disdrodb/metadata/geolocation.py +146 -0
- disdrodb/metadata/info.py +20 -13
- disdrodb/metadata/manipulation.py +3 -3
- disdrodb/metadata/reader.py +59 -8
- disdrodb/metadata/search.py +77 -144
- disdrodb/metadata/standards.py +83 -80
- disdrodb/metadata/writer.py +10 -16
- disdrodb/psd/__init__.py +38 -0
- disdrodb/psd/fitting.py +2146 -0
- disdrodb/psd/models.py +774 -0
- disdrodb/routines.py +1412 -0
- disdrodb/scattering/__init__.py +28 -0
- disdrodb/scattering/axis_ratio.py +344 -0
- disdrodb/scattering/routines.py +456 -0
- disdrodb/utils/__init__.py +17 -0
- disdrodb/utils/attrs.py +208 -0
- disdrodb/utils/cli.py +269 -0
- disdrodb/utils/compression.py +60 -42
- disdrodb/utils/dask.py +62 -0
- disdrodb/utils/dataframe.py +342 -0
- disdrodb/utils/decorators.py +110 -0
- disdrodb/utils/directories.py +107 -46
- disdrodb/utils/encoding.py +127 -0
- disdrodb/utils/list.py +29 -0
- disdrodb/utils/logger.py +168 -46
- disdrodb/utils/time.py +657 -0
- disdrodb/utils/warnings.py +30 -0
- disdrodb/utils/writer.py +57 -0
- disdrodb/utils/xarray.py +138 -47
- disdrodb/utils/yaml.py +0 -1
- disdrodb/viz/__init__.py +17 -0
- disdrodb/viz/plots.py +17 -0
- disdrodb-0.1.1.dist-info/METADATA +294 -0
- disdrodb-0.1.1.dist-info/RECORD +232 -0
- {disdrodb-0.0.21.dist-info → disdrodb-0.1.1.dist-info}/WHEEL +1 -1
- disdrodb-0.1.1.dist-info/entry_points.txt +30 -0
- disdrodb/data_transfer/scripts/disdrodb_download_archive.py +0 -53
- disdrodb/data_transfer/scripts/disdrodb_upload_archive.py +0 -57
- disdrodb/l0/configs/OTT_Parsivel/l0a_encodings.yml +0 -32
- disdrodb/l0/configs/OTT_Parsivel2/l0a_encodings.yml +0 -39
- disdrodb/l0/configs/RD_80/l0a_encodings.yml +0 -16
- disdrodb/l0/configs/RD_80/l0b_encodings.yml +0 -135
- disdrodb/l0/configs/Thies_LPM/l0a_encodings.yml +0 -80
- disdrodb/l0/io.py +0 -257
- disdrodb/l0/l0_processing.py +0 -1091
- disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_OTT.py +0 -178
- disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_THIES.py +0 -247
- disdrodb/l0/readers/BRAZIL/CHUVA_LPM.py +0 -204
- disdrodb/l0/readers/BRAZIL/CHUVA_OTT.py +0 -183
- disdrodb/l0/readers/BRAZIL/GOAMAZON_OTT.py +0 -183
- disdrodb/l0/readers/CHINA/CHONGQING.py +0 -131
- disdrodb/l0/readers/EPFL/EPFL_ROOF_2008.py +0 -128
- disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP2.py +0 -127
- disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP3.py +0 -129
- disdrodb/l0/readers/EPFL/RACLETS_2019.py +0 -158
- disdrodb/l0/readers/EPFL/SAMOYLOV_2017.py +0 -136
- disdrodb/l0/readers/EPFL/SAMOYLOV_2019.py +0 -158
- disdrodb/l0/readers/FRANCE/SIRTA_OTT2.py +0 -138
- disdrodb/l0/readers/GPM/GCPEX.py +0 -123
- disdrodb/l0/readers/GPM/IFLOODS.py +0 -123
- disdrodb/l0/readers/GPM/MC3E.py +0 -123
- disdrodb/l0/readers/GPM/NSSTC.py +0 -164
- disdrodb/l0/readers/ITALY/GID.py +0 -199
- disdrodb/l0/readers/MEXICO/OH_IIUNAM_nc.py +0 -92
- disdrodb/l0/readers/NCAR/CCOPE_2015.py +0 -133
- disdrodb/l0/readers/NCAR/PECAN_FP3.py +0 -137
- disdrodb/l0/readers/NCAR/PECAN_MOBILE.py +0 -144
- disdrodb/l0/readers/NCAR/RELAMPAGO_OTT.py +0 -195
- disdrodb/l0/readers/NCAR/SNOWIE_PJ.py +0 -172
- disdrodb/l0/readers/NCAR/SNOWIE_SB.py +0 -179
- disdrodb/l0/readers/NCAR/VORTEX2_2009.py +0 -133
- disdrodb/l0/readers/NCAR/VORTEX2_2010.py +0 -188
- disdrodb/l0/readers/NCAR/VORTEX2_2010_UF.py +0 -191
- disdrodb/l0/readers/NCAR/VORTEX_SE_2016_P2.py +0 -135
- disdrodb/l0/readers/NCAR/VORTEX_SE_2016_PIPS.py +0 -170
- disdrodb/l0/readers/NETHERLANDS/DELFT.py +0 -187
- disdrodb/l0/readers/SPAIN/SBEGUERIA.py +0 -179
- disdrodb/l0/scripts/disdrodb_run_l0b_concat.py +0 -93
- disdrodb/l0/scripts/disdrodb_run_l0b_concat_station.py +0 -85
- disdrodb/utils/netcdf.py +0 -452
- disdrodb/utils/scripts.py +0 -102
- disdrodb-0.0.21.dist-info/AUTHORS.md +0 -18
- disdrodb-0.0.21.dist-info/METADATA +0 -186
- disdrodb-0.0.21.dist-info/RECORD +0 -168
- disdrodb-0.0.21.dist-info/entry_points.txt +0 -15
- /disdrodb/l0/configs/{RD_80 → RD80}/bins_velocity.yml +0 -0
- /disdrodb/l0/manuals/{Thies_LPM.pdf → LPM.pdf} +0 -0
- /disdrodb/l0/manuals/{ODM_470.pdf → ODM470.pdf} +0 -0
- /disdrodb/l0/manuals/{OTT_Parsivel.pdf → PARSIVEL.pdf} +0 -0
- /disdrodb/l0/manuals/{OTT_Parsivel2.pdf → PARSIVEL2.pdf} +0 -0
- /disdrodb/l0/manuals/{PWS_100.pdf → PWS100.pdf} +0 -0
- /disdrodb/l0/manuals/{RD_80.pdf → RD80.pdf} +0 -0
- {disdrodb-0.0.21.dist-info → disdrodb-0.1.1.dist-info/licenses}/LICENSE +0 -0
- {disdrodb-0.0.21.dist-info → disdrodb-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
# -----------------------------------------------------------------------------.
|
|
2
|
-
# Copyright (c) 2021-2023 DISDRODB developers
|
|
3
|
-
#
|
|
4
|
-
# This program is free software: you can redistribute it and/or modify
|
|
5
|
-
# it under the terms of the GNU General Public License as published by
|
|
6
|
-
# the Free Software Foundation, either version 3 of the License, or
|
|
7
|
-
# (at your option) any later version.
|
|
8
|
-
#
|
|
9
|
-
# This program is distributed in the hope that it will be useful,
|
|
10
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
-
# GNU General Public License for more details.
|
|
13
|
-
#
|
|
14
|
-
# You should have received a copy of the GNU General Public License
|
|
15
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
-
# -----------------------------------------------------------------------------.
|
|
17
|
-
##################################################
|
|
18
|
-
## Wrapper to concat L0B files by command lines ##
|
|
19
|
-
##################################################
|
|
20
|
-
import sys
|
|
21
|
-
|
|
22
|
-
import click
|
|
23
|
-
|
|
24
|
-
from disdrodb.l0.routines import (
|
|
25
|
-
click_l0_stations_options,
|
|
26
|
-
click_l0b_concat_options,
|
|
27
|
-
)
|
|
28
|
-
from disdrodb.utils.scripts import click_base_dir_option, parse_arg_to_list, parse_base_dir
|
|
29
|
-
|
|
30
|
-
sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@click.command()
|
|
34
|
-
@click_l0_stations_options
|
|
35
|
-
@click_l0b_concat_options
|
|
36
|
-
@click_base_dir_option
|
|
37
|
-
def disdrodb_run_l0b_concat(
|
|
38
|
-
data_sources: str = None,
|
|
39
|
-
campaign_names: str = None,
|
|
40
|
-
station_names: str = None,
|
|
41
|
-
remove_l0b: bool = False,
|
|
42
|
-
verbose: bool = True,
|
|
43
|
-
base_dir: str = None,
|
|
44
|
-
):
|
|
45
|
-
"""Run the L0B concatenation of available DISDRODB stations.
|
|
46
|
-
|
|
47
|
-
This function allow to launch the processing of many DISDRODB stations with a single command.
|
|
48
|
-
From the list of all available DISDRODB stations, it runs the processing of the
|
|
49
|
-
stations matching the provided data_sources, campaign_names and station_names.
|
|
50
|
-
|
|
51
|
-
Parameters
|
|
52
|
-
----------
|
|
53
|
-
|
|
54
|
-
data_sources : str
|
|
55
|
-
Name of data source(s) to process.
|
|
56
|
-
The name(s) must be UPPER CASE.
|
|
57
|
-
If campaign_names and station are not specified, process all stations.
|
|
58
|
-
To specify multiple data sources, write i.e.: --data_sources 'GPM EPFL NCAR'
|
|
59
|
-
campaign_names : str
|
|
60
|
-
Name of the campaign(s) to process.
|
|
61
|
-
The name(s) must be UPPER CASE.
|
|
62
|
-
To specify multiple campaigns, write i.e.: --campaign_names 'IPEX IMPACTS'
|
|
63
|
-
station_names : str
|
|
64
|
-
Station names.
|
|
65
|
-
To specify multiple stations, write i.e.: --station_names 'station1 station2'
|
|
66
|
-
remove_l0b : bool
|
|
67
|
-
If true, remove all source L0B files once L0B concatenation is terminated.
|
|
68
|
-
The default is False.
|
|
69
|
-
verbose : bool
|
|
70
|
-
Whether to print detailed processing information into terminal.
|
|
71
|
-
The default is False.
|
|
72
|
-
base_dir : str
|
|
73
|
-
Base directory of DISDRODB
|
|
74
|
-
Format: <...>/DISDRODB
|
|
75
|
-
If not specified, uses path specified in the DISDRODB active configuration.
|
|
76
|
-
"""
|
|
77
|
-
from disdrodb.l0.routines import run_disdrodb_l0b_concat
|
|
78
|
-
|
|
79
|
-
# Parse data_sources, campaign_names and station arguments
|
|
80
|
-
base_dir = parse_base_dir(base_dir)
|
|
81
|
-
data_sources = parse_arg_to_list(data_sources)
|
|
82
|
-
campaign_names = parse_arg_to_list(campaign_names)
|
|
83
|
-
station_names = parse_arg_to_list(station_names)
|
|
84
|
-
|
|
85
|
-
# Run concatenation
|
|
86
|
-
run_disdrodb_l0b_concat(
|
|
87
|
-
base_dir=base_dir,
|
|
88
|
-
data_sources=data_sources,
|
|
89
|
-
campaign_names=campaign_names,
|
|
90
|
-
station_names=station_names,
|
|
91
|
-
remove_l0b=remove_l0b,
|
|
92
|
-
verbose=verbose,
|
|
93
|
-
)
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
# -----------------------------------------------------------------------------.
|
|
2
|
-
# Copyright (c) 2021-2023 DISDRODB developers
|
|
3
|
-
#
|
|
4
|
-
# This program is free software: you can redistribute it and/or modify
|
|
5
|
-
# it under the terms of the GNU General Public License as published by
|
|
6
|
-
# the Free Software Foundation, either version 3 of the License, or
|
|
7
|
-
# (at your option) any later version.
|
|
8
|
-
#
|
|
9
|
-
# This program is distributed in the hope that it will be useful,
|
|
10
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
-
# GNU General Public License for more details.
|
|
13
|
-
#
|
|
14
|
-
# You should have received a copy of the GNU General Public License
|
|
15
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
-
# -----------------------------------------------------------------------------.
|
|
17
|
-
##################################################
|
|
18
|
-
## Wrapper to concat L0B files by command lines ##
|
|
19
|
-
##################################################
|
|
20
|
-
import sys
|
|
21
|
-
|
|
22
|
-
import click
|
|
23
|
-
|
|
24
|
-
from disdrodb.l0.routines import click_l0b_concat_options
|
|
25
|
-
from disdrodb.utils.scripts import (
|
|
26
|
-
click_base_dir_option,
|
|
27
|
-
click_station_arguments,
|
|
28
|
-
parse_base_dir,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
@click.command()
|
|
35
|
-
@click_station_arguments
|
|
36
|
-
@click_l0b_concat_options
|
|
37
|
-
@click_base_dir_option
|
|
38
|
-
def disdrodb_run_l0b_concat_station(
|
|
39
|
-
# Station arguments
|
|
40
|
-
data_source: str,
|
|
41
|
-
campaign_name: str,
|
|
42
|
-
station_name: str,
|
|
43
|
-
# L0B concat options
|
|
44
|
-
remove_l0b=False,
|
|
45
|
-
verbose=True,
|
|
46
|
-
base_dir: str = None,
|
|
47
|
-
):
|
|
48
|
-
"""Concatenation all L0B files of a specific DISDRODB station into a single netCDF.
|
|
49
|
-
|
|
50
|
-
Parameters
|
|
51
|
-
----------
|
|
52
|
-
|
|
53
|
-
data_source : str
|
|
54
|
-
Institution name (when campaign data spans more than 1 country),
|
|
55
|
-
or country (when all campaigns (or sensor networks) are inside a given country).
|
|
56
|
-
Must be UPPER CASE.
|
|
57
|
-
campaign_name : str
|
|
58
|
-
Campaign name. Must be UPPER CASE.
|
|
59
|
-
station_name : str
|
|
60
|
-
Station name
|
|
61
|
-
remove_l0b : bool
|
|
62
|
-
If true, remove all source L0B files once L0B concatenation is terminated.
|
|
63
|
-
The default is False.
|
|
64
|
-
verbose : bool
|
|
65
|
-
Whether to print detailed processing information into terminal.
|
|
66
|
-
The default is False.
|
|
67
|
-
base_dir : str
|
|
68
|
-
Base directory of DISDRODB
|
|
69
|
-
Format: <...>/DISDRODB
|
|
70
|
-
If not specified, uses path specified in the DISDRODB active configuration.
|
|
71
|
-
"""
|
|
72
|
-
from disdrodb.l0.l0_processing import run_l0b_concat_station
|
|
73
|
-
|
|
74
|
-
base_dir = parse_base_dir(base_dir)
|
|
75
|
-
|
|
76
|
-
run_l0b_concat_station(
|
|
77
|
-
# Station arguments
|
|
78
|
-
data_source=data_source,
|
|
79
|
-
campaign_name=campaign_name,
|
|
80
|
-
station_name=station_name,
|
|
81
|
-
# Processing options
|
|
82
|
-
remove_l0b=remove_l0b,
|
|
83
|
-
verbose=verbose,
|
|
84
|
-
base_dir=base_dir,
|
|
85
|
-
)
|
disdrodb/utils/netcdf.py
DELETED
|
@@ -1,452 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
|
|
3
|
-
# -----------------------------------------------------------------------------.
|
|
4
|
-
# Copyright (c) 2021-2023 DISDRODB developers
|
|
5
|
-
#
|
|
6
|
-
# This program is free software: you can redistribute it and/or modify
|
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
|
8
|
-
# the Free Software Foundation, either version 3 of the License, or
|
|
9
|
-
# (at your option) any later version.
|
|
10
|
-
#
|
|
11
|
-
# This program is distributed in the hope that it will be useful,
|
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
-
# GNU General Public License for more details.
|
|
15
|
-
#
|
|
16
|
-
# You should have received a copy of the GNU General Public License
|
|
17
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
|
-
# -----------------------------------------------------------------------------.
|
|
19
|
-
"""DISDRODB netCDF utility."""
|
|
20
|
-
|
|
21
|
-
import logging
|
|
22
|
-
from typing import Tuple
|
|
23
|
-
|
|
24
|
-
import numpy as np
|
|
25
|
-
import pandas as pd
|
|
26
|
-
import xarray as xr
|
|
27
|
-
|
|
28
|
-
from disdrodb.utils.logger import log_error, log_info, log_warning
|
|
29
|
-
|
|
30
|
-
logger = logging.getLogger(__name__)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
####---------------------------------------------------------------------------.
|
|
34
|
-
def _sort_datasets_by_dim(list_ds: list, filepaths: str, dim: str = "time") -> Tuple[list, list]:
|
|
35
|
-
"""Sort a list of xarray.Dataset and corresponding file paths by the starting value of a specified dimension.
|
|
36
|
-
|
|
37
|
-
Parameters
|
|
38
|
-
----------
|
|
39
|
-
filepaths : list
|
|
40
|
-
List of netCDFs file paths.
|
|
41
|
-
list_ds : list
|
|
42
|
-
List of xarray Dataset.
|
|
43
|
-
dim : str, optional
|
|
44
|
-
Dimension name. The default is "time".
|
|
45
|
-
|
|
46
|
-
Returns
|
|
47
|
-
-------
|
|
48
|
-
tuple
|
|
49
|
-
Tuple of sorted list of xarray datasets and sorted list of file paths.
|
|
50
|
-
"""
|
|
51
|
-
start_values = [ds[dim].values[0] for ds in list_ds]
|
|
52
|
-
sorted_idx = np.argsort(start_values)
|
|
53
|
-
sorted_list_ds = [list_ds[i] for i in sorted_idx]
|
|
54
|
-
sorted_filepaths = [filepaths[i] for i in sorted_idx]
|
|
55
|
-
return sorted_list_ds, sorted_filepaths
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def _get_dim_values_index(list_ds: list, dim: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
59
|
-
"""Get list and dataset indices associated to the dimension values."""
|
|
60
|
-
dim_values = np.concatenate([ds[dim].values for ds in list_ds])
|
|
61
|
-
list_index = np.concatenate([np.ones(len(ds[dim])) * i for i, ds in enumerate(list_ds)])
|
|
62
|
-
list_index = list_index.astype(int)
|
|
63
|
-
ds_index = np.concatenate([np.arange(0, len(ds[dim])) for i, ds in enumerate(list_ds)])
|
|
64
|
-
return dim_values, list_index, ds_index
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def _get_non_monotonic_indices_to_remove(dim_values: np.ndarray) -> np.ndarray:
|
|
68
|
-
"""Returns the indices that cause a non-monotonic increasing series of values.
|
|
69
|
-
|
|
70
|
-
Assume that duplicated values, if present, occurs consecutively !
|
|
71
|
-
"""
|
|
72
|
-
diff_dim_values = np.diff(dim_values)
|
|
73
|
-
indices_decreasing = np.where(diff_dim_values.astype(float) <= 0)[0] + 1
|
|
74
|
-
if len(indices_decreasing) == 0:
|
|
75
|
-
return []
|
|
76
|
-
idx_start_decreasing = indices_decreasing[0]
|
|
77
|
-
idx_restart_increase = np.max(np.where(dim_values <= dim_values[idx_start_decreasing - 1])[0])
|
|
78
|
-
idx_to_remove = np.arange(idx_start_decreasing, idx_restart_increase + 1)
|
|
79
|
-
return idx_to_remove
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def _get_duplicated_indices(x, keep="first"):
|
|
83
|
-
"""Return the indices to remove for duplicated values in x such that there is only one value occurrence.
|
|
84
|
-
|
|
85
|
-
Parameters
|
|
86
|
-
----------
|
|
87
|
-
x : np.array
|
|
88
|
-
Array of values.
|
|
89
|
-
keep : str, optional
|
|
90
|
-
The value to keep, either 'first', 'last' or False.
|
|
91
|
-
The default is 'first'.
|
|
92
|
-
‘first’ : Mark duplicates as True except for the first occurrence.
|
|
93
|
-
‘last’ : Mark duplicates as True except for the last occurrence.
|
|
94
|
-
False : Mark all duplicates as True.
|
|
95
|
-
|
|
96
|
-
Returns
|
|
97
|
-
-------
|
|
98
|
-
np.array
|
|
99
|
-
Array of indices to remove.
|
|
100
|
-
"""
|
|
101
|
-
# Check 'keep' argument
|
|
102
|
-
# if not isinstance(keep, str):
|
|
103
|
-
# raise TypeError("`keep` must be a string. Either first or last.")
|
|
104
|
-
# if not np.isin(keep, ["first", "last"]):
|
|
105
|
-
# raise ValueError("Invalid value for argument keep. Only 'first' and 'last' are accepted.")
|
|
106
|
-
# # Get
|
|
107
|
-
|
|
108
|
-
# x_indices = np.arange(len(x))
|
|
109
|
-
# unique_values, unique_counts = np.unique(x, return_counts=True)
|
|
110
|
-
# duplicated_values = unique_values[unique_counts > 1]
|
|
111
|
-
|
|
112
|
-
# duplicated_indices = np.array([], dtype=np.int32)
|
|
113
|
-
# if keep == 'first':
|
|
114
|
-
# for value in duplicated_values:
|
|
115
|
-
# indices = np.where(x == value)[0]
|
|
116
|
-
# duplicated_indices = np.concatenate([duplicated_indices, indices[1:]])
|
|
117
|
-
# elif keep == 'last':
|
|
118
|
-
# indices = np.where(x == value)[0]
|
|
119
|
-
# duplicated_indices = np.concatenate([duplicated_indices, indices[:-1]])
|
|
120
|
-
# return duplicated_indices
|
|
121
|
-
|
|
122
|
-
# Get duplicate indices
|
|
123
|
-
idx_duplicated = pd.Index(x).duplicated(keep=keep)
|
|
124
|
-
return np.where(idx_duplicated)[0]
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def _get_bad_info_dict(
|
|
128
|
-
idx_to_remove: np.ndarray,
|
|
129
|
-
list_index: np.ndarray,
|
|
130
|
-
dim_values: np.ndarray,
|
|
131
|
-
ds_index: np.ndarray,
|
|
132
|
-
) -> Tuple[dict, dict]:
|
|
133
|
-
"""Return two dictionaries mapping, for each dataset, the bad values and indices to remove.
|
|
134
|
-
|
|
135
|
-
Parameters
|
|
136
|
-
----------
|
|
137
|
-
idx_to_remove : np.ndarray
|
|
138
|
-
Indices to be removed to ensure monotonic dimension.
|
|
139
|
-
list_index : np.ndarray
|
|
140
|
-
Indices corresponding to the file in the `list_ds` parameter.
|
|
141
|
-
ds_index : np.ndarray
|
|
142
|
-
Indices corresponding to the dataset dimension index in the `list_ds` parameter.
|
|
143
|
-
|
|
144
|
-
Returns
|
|
145
|
-
-------
|
|
146
|
-
dict
|
|
147
|
-
A dictionary mapping the dimension values to remove for each file.
|
|
148
|
-
dict
|
|
149
|
-
A dictionary mapping the dataset dimension indices to remove for each file.
|
|
150
|
-
"""
|
|
151
|
-
list_index_bad = list_index[idx_to_remove]
|
|
152
|
-
ds_index_bad = ds_index[idx_to_remove]
|
|
153
|
-
dim_values_bad = dim_values[idx_to_remove]
|
|
154
|
-
# Retrieve dictionary with the bad values in each dataset
|
|
155
|
-
dict_ds_bad_values = {k: dim_values_bad[np.where(list_index_bad == k)[0]] for k in np.unique(list_index_bad)}
|
|
156
|
-
# Retrieve dictionary with the index with the bad values in each dataset
|
|
157
|
-
dict_ds_bad_idx = {k: ds_index_bad[np.where(list_index_bad == k)[0]] for k in np.unique(list_index_bad)}
|
|
158
|
-
return dict_ds_bad_values, dict_ds_bad_idx
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
def _remove_dataset_bad_values(list_ds, filepaths, dict_ds_bad_idx, dim):
|
|
162
|
-
"""Remove portions of xarray Datasets corresponding to duplicated values.
|
|
163
|
-
|
|
164
|
-
Parameters
|
|
165
|
-
----------
|
|
166
|
-
list_ds : list
|
|
167
|
-
List of xarray Dataset.
|
|
168
|
-
dict_ds_bad_idx : dict
|
|
169
|
-
Dictionary with the dimension indices corresponding to bad values in each xarray Dataset.
|
|
170
|
-
|
|
171
|
-
Returns
|
|
172
|
-
-------
|
|
173
|
-
|
|
174
|
-
list_ds : list
|
|
175
|
-
List of xarray Dataset without bad values.
|
|
176
|
-
"""
|
|
177
|
-
list_index_valid = list(range(len(list_ds)))
|
|
178
|
-
for list_index_bad, bad_idx in dict_ds_bad_idx.items():
|
|
179
|
-
# Get dataset
|
|
180
|
-
ds = list_ds[list_index_bad]
|
|
181
|
-
# If resulting in a empty dataset, drop index from list_index_valid
|
|
182
|
-
if len(bad_idx) == len(list_ds[list_index_bad][dim]):
|
|
183
|
-
list_index_valid.remove(list_index_bad)
|
|
184
|
-
# Remove invalid indices
|
|
185
|
-
list_ds[list_index_bad] = ds.drop_isel({dim: bad_idx})
|
|
186
|
-
|
|
187
|
-
# Keep non-empty datasets
|
|
188
|
-
new_list_ds = [list_ds[idx] for idx in list_index_valid]
|
|
189
|
-
new_filepaths = [filepaths[idx] for idx in list_index_valid]
|
|
190
|
-
return new_list_ds, new_filepaths
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def ensure_unique_dimension_values(list_ds: list, filepaths: str, dim: str = "time", verbose: bool = False) -> list:
|
|
194
|
-
"""Ensure that a list of xr.Dataset has non duplicated dimension values.
|
|
195
|
-
|
|
196
|
-
Parameters
|
|
197
|
-
----------
|
|
198
|
-
list_ds : list
|
|
199
|
-
List of xarray Dataset.
|
|
200
|
-
filepaths : list
|
|
201
|
-
List of netCDFs file paths.
|
|
202
|
-
dim : str, optional
|
|
203
|
-
Dimension name.
|
|
204
|
-
The default is "time".
|
|
205
|
-
|
|
206
|
-
Returns
|
|
207
|
-
-------
|
|
208
|
-
list
|
|
209
|
-
List of xarray Dataset.
|
|
210
|
-
list
|
|
211
|
-
List of netCDFs file paths.
|
|
212
|
-
"""
|
|
213
|
-
# Reorder the files and filepaths by the starting dimension value (time)
|
|
214
|
-
list_ds, filepaths = _sort_datasets_by_dim(list_ds=list_ds, filepaths=filepaths, dim=dim)
|
|
215
|
-
|
|
216
|
-
# Get the datasets dimension values array (and associated list_ds/xr.Dataset indices)
|
|
217
|
-
dim_values, list_index, ds_index = _get_dim_values_index(list_ds, dim=dim)
|
|
218
|
-
|
|
219
|
-
# Get duplicated indices
|
|
220
|
-
idx_duplicated = _get_duplicated_indices(dim_values, keep="first")
|
|
221
|
-
|
|
222
|
-
# Remove duplicated indices
|
|
223
|
-
if len(idx_duplicated) > 0:
|
|
224
|
-
# Retrieve dictionary providing bad values and indexes for each dataset
|
|
225
|
-
dict_ds_bad_values, dict_ds_bad_idx = _get_bad_info_dict(
|
|
226
|
-
idx_to_remove=idx_duplicated,
|
|
227
|
-
list_index=list_index,
|
|
228
|
-
dim_values=dim_values,
|
|
229
|
-
ds_index=ds_index,
|
|
230
|
-
)
|
|
231
|
-
|
|
232
|
-
# Report for each dataset, the duplicates values occurring
|
|
233
|
-
for list_index_bad, bad_values in dict_ds_bad_values.items():
|
|
234
|
-
# Retrieve dataset filepath
|
|
235
|
-
filepath = filepaths[list_index_bad]
|
|
236
|
-
# If all values inside the file are duplicated, report it
|
|
237
|
-
if len(bad_values) == len(list_ds[list_index_bad][dim]):
|
|
238
|
-
msg = (
|
|
239
|
-
f"{filepath} is excluded from concatenation. All {dim} values are already present in some other"
|
|
240
|
-
" file."
|
|
241
|
-
)
|
|
242
|
-
log_warning(logger=logger, msg=msg, verbose=verbose)
|
|
243
|
-
else:
|
|
244
|
-
if np.issubdtype(bad_values.dtype, np.datetime64):
|
|
245
|
-
bad_values = bad_values.astype("M8[s]")
|
|
246
|
-
msg = f"In {filepath}, dropping {dim} values {bad_values} to avoid duplicated {dim} values."
|
|
247
|
-
log_warning(logger=logger, msg=msg, verbose=verbose)
|
|
248
|
-
|
|
249
|
-
# Remove duplicated values
|
|
250
|
-
list_ds, filepaths = _remove_dataset_bad_values(
|
|
251
|
-
list_ds=list_ds, filepaths=filepaths, dict_ds_bad_idx=dict_ds_bad_idx, dim=dim
|
|
252
|
-
)
|
|
253
|
-
return list_ds, filepaths
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def ensure_monotonic_dimension(list_ds: list, filepaths: str, dim: str = "time", verbose: bool = False) -> list:
|
|
257
|
-
"""Ensure that a list of xr.Dataset has a monotonic increasing (non duplicated) dimension values.
|
|
258
|
-
|
|
259
|
-
Parameters
|
|
260
|
-
----------
|
|
261
|
-
list_ds : list
|
|
262
|
-
List of xarray Dataset.
|
|
263
|
-
filepaths : list
|
|
264
|
-
List of netCDFs file paths.
|
|
265
|
-
dim : str, optional
|
|
266
|
-
Dimension name.
|
|
267
|
-
The default is "time".
|
|
268
|
-
|
|
269
|
-
Returns
|
|
270
|
-
-------
|
|
271
|
-
list
|
|
272
|
-
List of xarray Dataset.
|
|
273
|
-
list
|
|
274
|
-
List of netCDFs file paths.
|
|
275
|
-
"""
|
|
276
|
-
# Reorder the files and filepaths by the starting dimension value (time)
|
|
277
|
-
# TODO: should maybe also split by non-continuous time ...
|
|
278
|
-
list_ds, filepaths = _sort_datasets_by_dim(list_ds=list_ds, filepaths=filepaths, dim=dim)
|
|
279
|
-
|
|
280
|
-
# Get the datasets dimension values array (and associated list_ds/xr.Dataset indices)
|
|
281
|
-
dim_values, list_index, ds_index = _get_dim_values_index(list_ds, dim=dim)
|
|
282
|
-
|
|
283
|
-
# Identify the indices to remove to ensure monotonic values
|
|
284
|
-
idx_to_remove = _get_non_monotonic_indices_to_remove(dim_values)
|
|
285
|
-
|
|
286
|
-
# Remove indices causing the values to be non-monotonic increasing
|
|
287
|
-
if len(idx_to_remove) > 0:
|
|
288
|
-
# Retrieve dictionary providing bad values and indexes for each dataset
|
|
289
|
-
dict_ds_bad_values, dict_ds_bad_idx = _get_bad_info_dict(
|
|
290
|
-
idx_to_remove=idx_to_remove,
|
|
291
|
-
list_index=list_index,
|
|
292
|
-
dim_values=dim_values,
|
|
293
|
-
ds_index=ds_index,
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
# Report for each dataset, the values to be dropped
|
|
297
|
-
for list_index_bad, bad_values in dict_ds_bad_values.items():
|
|
298
|
-
# Retrieve dataset filepath
|
|
299
|
-
filepath = filepaths[list_index_bad]
|
|
300
|
-
# If all values inside the file should be dropped, report it
|
|
301
|
-
if len(bad_values) == len(list_ds[list_index_bad][dim]):
|
|
302
|
-
msg = (
|
|
303
|
-
f"{filepath} is excluded from concatenation. All {dim} values cause the dimension to be"
|
|
304
|
-
" non-monotonic."
|
|
305
|
-
)
|
|
306
|
-
log_warning(logger=logger, msg=msg, verbose=verbose)
|
|
307
|
-
else:
|
|
308
|
-
if np.issubdtype(bad_values.dtype, np.datetime64):
|
|
309
|
-
bad_values = bad_values.astype("M8[s]")
|
|
310
|
-
msg = f"In {filepath}, dropping {dim} values {bad_values} to ensure monotonic {dim} dimension."
|
|
311
|
-
log_warning(logger=logger, msg=msg, verbose=verbose)
|
|
312
|
-
|
|
313
|
-
# Remove duplicated values
|
|
314
|
-
list_ds, filepaths = _remove_dataset_bad_values(
|
|
315
|
-
list_ds=list_ds, filepaths=filepaths, dict_ds_bad_idx=dict_ds_bad_idx, dim=dim
|
|
316
|
-
)
|
|
317
|
-
# Iterative check
|
|
318
|
-
list_ds, filepaths = ensure_monotonic_dimension(list_ds=list_ds, filepaths=filepaths, dim=dim)
|
|
319
|
-
|
|
320
|
-
return list_ds, filepaths
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
# ds_index = [0,1,2,3,0,1,2,3,4]
|
|
324
|
-
# list_index = [0,0,0,0,1, 1, 1,1, 1]
|
|
325
|
-
# dim_values = [0,1,5,5,5, 5, 6,7,8]
|
|
326
|
-
# list_index = np.array(list_index)
|
|
327
|
-
# dim_values = np.array(dim_values)
|
|
328
|
-
# ds_index = np.array(ds_index)
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
####---------------------------------------------------------------------------
|
|
332
|
-
def get_list_ds(filepaths: str) -> list:
|
|
333
|
-
"""Get list of xarray datasets from file paths.
|
|
334
|
-
|
|
335
|
-
Parameters
|
|
336
|
-
----------
|
|
337
|
-
filepaths : list
|
|
338
|
-
List of netCDFs file paths.
|
|
339
|
-
|
|
340
|
-
Returns
|
|
341
|
-
-------
|
|
342
|
-
list
|
|
343
|
-
List of xarray datasets.
|
|
344
|
-
"""
|
|
345
|
-
list_ds = []
|
|
346
|
-
for filepath in filepaths:
|
|
347
|
-
# This context manager is required to avoid random HDF locking
|
|
348
|
-
# - cache=True: store data in memory to avoid reading back from disk
|
|
349
|
-
# --> but LRU cache might cause the netCDF to not be closed !
|
|
350
|
-
with xr.open_dataset(filepath, cache=False) as data:
|
|
351
|
-
ds = data.load()
|
|
352
|
-
list_ds.append(ds)
|
|
353
|
-
return list_ds
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
# def get_list_ds(filepaths: str) -> list:
|
|
357
|
-
# """Get list of xarray datasets from file paths.
|
|
358
|
-
|
|
359
|
-
# Parameters
|
|
360
|
-
# ----------
|
|
361
|
-
# filepaths : list
|
|
362
|
-
# List of netCDFs file paths.
|
|
363
|
-
|
|
364
|
-
# Returns
|
|
365
|
-
# -------
|
|
366
|
-
# list
|
|
367
|
-
# List of xarray datasets.
|
|
368
|
-
# """
|
|
369
|
-
# # WARNING: READING IN PARALLEL USING MULTIPROCESS CAUSE HDF LOCK ERRORS
|
|
370
|
-
# @dask.delayed
|
|
371
|
-
# def open_dataset_delayed(filepath):
|
|
372
|
-
# import os
|
|
373
|
-
|
|
374
|
-
# os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
|
|
375
|
-
#
|
|
376
|
-
|
|
377
|
-
# # This context manager is required to avoid random HDF locking
|
|
378
|
-
# # - cache=True: store data in memory to avoid reading back from disk
|
|
379
|
-
# # --> but LRU cache might cause the netCDF to not be closed !
|
|
380
|
-
# with xr.open_dataset(filepath, cache=False) as data:
|
|
381
|
-
# ds = data.load()
|
|
382
|
-
# return ds
|
|
383
|
-
|
|
384
|
-
# list_ds_delayed = []
|
|
385
|
-
# for filepath in filepaths:
|
|
386
|
-
# list_ds_delayed.append(open_dataset_delayed(filepath))
|
|
387
|
-
# list_ds = dask.compute(list_ds_delayed)[0]
|
|
388
|
-
# return list_ds
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
####---------------------------------------------------------------------------
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
def _concatenate_datasets(list_ds, dim="time", verbose=False):
|
|
395
|
-
try:
|
|
396
|
-
msg = "Start concatenating with xr.concat."
|
|
397
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
398
|
-
|
|
399
|
-
ds = xr.concat(list_ds, dim="time", coords="minimal", compat="override")
|
|
400
|
-
|
|
401
|
-
msg = "Concatenation with xr.concat has been successful."
|
|
402
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
403
|
-
|
|
404
|
-
except Exception as e:
|
|
405
|
-
msg = f"Concatenation with xr.concat failed. Error is {e}."
|
|
406
|
-
log_error(logger=logger, msg=msg, verbose=False)
|
|
407
|
-
raise ValueError(msg)
|
|
408
|
-
return ds
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
def xr_concat_datasets(filepaths: str, verbose=False) -> xr.Dataset:
|
|
412
|
-
"""Concat xr.Dataset in a robust and parallel way.
|
|
413
|
-
|
|
414
|
-
1. It checks for time dimension monotonicity
|
|
415
|
-
|
|
416
|
-
Parameters
|
|
417
|
-
----------
|
|
418
|
-
filepaths : list
|
|
419
|
-
List of netCDFs file paths.
|
|
420
|
-
|
|
421
|
-
Returns
|
|
422
|
-
-------
|
|
423
|
-
xr.Dataset
|
|
424
|
-
A single xarray dataset.
|
|
425
|
-
|
|
426
|
-
Raises
|
|
427
|
-
------
|
|
428
|
-
ValueError
|
|
429
|
-
Error if the merging/concatenation operations can not be achieved.
|
|
430
|
-
|
|
431
|
-
"""
|
|
432
|
-
|
|
433
|
-
# --------------------------------------.
|
|
434
|
-
# Open xr.Dataset lazily in parallel using dask delayed
|
|
435
|
-
list_ds = get_list_ds(filepaths)
|
|
436
|
-
|
|
437
|
-
# --------------------------------------.
|
|
438
|
-
# Ensure time dimension contains no duplicated values
|
|
439
|
-
list_ds, filepaths = ensure_unique_dimension_values(
|
|
440
|
-
list_ds=list_ds, filepaths=filepaths, dim="time", verbose=verbose
|
|
441
|
-
)
|
|
442
|
-
|
|
443
|
-
# Ensure time dimension is monotonic increasingly
|
|
444
|
-
list_ds, filepaths = ensure_monotonic_dimension(list_ds=list_ds, filepaths=filepaths, dim="time", verbose=verbose)
|
|
445
|
-
|
|
446
|
-
# --------------------------------------.
|
|
447
|
-
# Concatenate all netCDFs
|
|
448
|
-
ds = _concatenate_datasets(list_ds=list_ds, dim="time", verbose=verbose)
|
|
449
|
-
|
|
450
|
-
# --------------------------------------.
|
|
451
|
-
# Return xr.Dataset
|
|
452
|
-
return ds
|