disdrodb 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +2 -0
- disdrodb/_config.py +1 -0
- disdrodb/_version.py +2 -2
- disdrodb/accessor/__init__.py +1 -0
- disdrodb/accessor/methods.py +1 -0
- disdrodb/api/checks.py +2 -4
- disdrodb/api/io.py +224 -24
- disdrodb/api/path.py +2 -4
- disdrodb/cli/disdrodb_check_metadata_archive.py +1 -0
- disdrodb/cli/disdrodb_check_products_options.py +1 -0
- disdrodb/cli/disdrodb_create_summary.py +6 -6
- disdrodb/cli/disdrodb_create_summary_station.py +2 -2
- disdrodb/cli/disdrodb_data_archive_directory.py +1 -0
- disdrodb/cli/disdrodb_download_archive.py +5 -6
- disdrodb/cli/disdrodb_download_metadata_archive.py +1 -0
- disdrodb/cli/disdrodb_download_station.py +2 -3
- disdrodb/cli/disdrodb_initialize_station.py +3 -3
- disdrodb/cli/disdrodb_metadata_archive_directory.py +1 -0
- disdrodb/cli/disdrodb_open_data_archive.py +1 -2
- disdrodb/cli/disdrodb_open_logs_directory.py +2 -3
- disdrodb/cli/disdrodb_open_metadata_archive.py +1 -2
- disdrodb/cli/disdrodb_open_metadata_directory.py +2 -3
- disdrodb/cli/disdrodb_open_product_directory.py +1 -2
- disdrodb/cli/disdrodb_open_readers_directory.py +1 -0
- disdrodb/cli/disdrodb_run.py +6 -6
- disdrodb/cli/disdrodb_run_l0.py +6 -6
- disdrodb/cli/disdrodb_run_l0_station.py +3 -3
- disdrodb/cli/disdrodb_run_l0a.py +6 -6
- disdrodb/cli/disdrodb_run_l0a_station.py +3 -3
- disdrodb/cli/disdrodb_run_l0b.py +6 -6
- disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
- disdrodb/cli/disdrodb_run_l0c.py +6 -6
- disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
- disdrodb/cli/disdrodb_run_l1.py +6 -6
- disdrodb/cli/disdrodb_run_l1_station.py +3 -3
- disdrodb/cli/disdrodb_run_l2e.py +6 -6
- disdrodb/cli/disdrodb_run_l2e_station.py +3 -3
- disdrodb/cli/disdrodb_run_l2m.py +6 -6
- disdrodb/cli/disdrodb_run_l2m_station.py +3 -3
- disdrodb/cli/disdrodb_run_station.py +3 -3
- disdrodb/cli/disdrodb_upload_archive.py +6 -7
- disdrodb/cli/disdrodb_upload_station.py +3 -4
- disdrodb/configs.py +7 -8
- disdrodb/constants.py +1 -0
- disdrodb/data_transfer/download_data.py +8 -8
- disdrodb/data_transfer/upload_data.py +6 -8
- disdrodb/data_transfer/zenodo.py +1 -1
- disdrodb/fall_velocity/__init__.py +1 -0
- disdrodb/fall_velocity/graupel.py +1 -0
- disdrodb/fall_velocity/hail.py +1 -0
- disdrodb/fall_velocity/rain.py +1 -0
- disdrodb/issue/checks.py +1 -0
- disdrodb/issue/reader.py +1 -0
- disdrodb/issue/writer.py +1 -2
- disdrodb/l0/__init__.py +1 -0
- disdrodb/l0/check_configs.py +21 -23
- disdrodb/l0/check_standards.py +0 -1
- disdrodb/l0/configs/LPM/l0a_encodings.yml +17 -17
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +55 -55
- disdrodb/l0/configs/LPM/l0b_encodings.yml +17 -17
- disdrodb/l0/configs/LPM/raw_data_format.yml +17 -17
- disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +2 -2
- disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +2 -2
- disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +2 -2
- disdrodb/l0/configs/LPM_V0/raw_data_format.yml +2 -2
- disdrodb/l0/l0_reader.py +1 -0
- disdrodb/l0/l0a_processing.py +5 -5
- disdrodb/l0/l0b_nc_processing.py +1 -2
- disdrodb/l0/l0b_processing.py +1 -13
- disdrodb/l0/l0c_processing.py +2 -1
- disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -0
- disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +17 -17
- disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +17 -17
- disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +17 -17
- disdrodb/l0/readers/LPM/GERMANY/DWD.py +55 -52
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +18 -17
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_AQ.py +277 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +18 -17
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +18 -17
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +18 -18
- disdrodb/l0/readers/LPM/KIT/CHWALA.py +18 -17
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +17 -17
- disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +18 -17
- disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +18 -17
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +18 -17
- disdrodb/l0/readers/LPM/SLOVENIA/UL.py +18 -17
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +18 -17
- disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +18 -17
- disdrodb/l0/readers/LPM/USA/CHARLESTON.py +18 -17
- disdrodb/l0/readers/LPM/USA/DEVEX.py +255 -0
- disdrodb/l0/readers/LPM_V0/BELGIUM/ULIEGE.py +2 -2
- disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +3 -2
- disdrodb/l0/readers/ODM470/OCEAN/OCEANRAIN.py +1 -0
- disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +1 -0
- disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/ARCTIC_2021.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/COMMON_2011.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/DAVOS_2009_2011.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_2009.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2008.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2011.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2012.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/GENEPI_2007.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/GRAND_ST_BERNARD_2007.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/GRAND_ST_BERNARD_2007_2.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/HPICONET_2010.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP2.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP4.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/PARSIVEL_2007.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019_WJF.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/RIETHOLZBACH_2011.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +1 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/UNIL_2022.py +1 -0
- disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_nc.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/USA/CW3E.py +1 -0
- disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +1 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +1 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +1 -0
- disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +1 -0
- disdrodb/l0/readers/SWS250/BELGIUM/KMI.py +1 -0
- disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -0
- disdrodb/l0/readers/template_reader_raw_text_data.py +1 -0
- disdrodb/l0/template_tools.py +6 -8
- disdrodb/l1/__init__.py +1 -0
- disdrodb/l1/classification.py +1 -0
- disdrodb/l1/resampling.py +5 -0
- disdrodb/l1_env/routines.py +1 -0
- disdrodb/l2/__init__.py +1 -0
- disdrodb/l2/empirical_dsd.py +1 -0
- disdrodb/l2/processing.py +1 -0
- disdrodb/metadata/checks.py +9 -10
- disdrodb/metadata/download.py +1 -0
- disdrodb/metadata/geolocation.py +2 -1
- disdrodb/metadata/info.py +2 -2
- disdrodb/metadata/search.py +0 -1
- disdrodb/physics/atmosphere.py +1 -0
- disdrodb/physics/water.py +1 -0
- disdrodb/physics/wrappers.py +1 -0
- disdrodb/psd/__init__.py +0 -1
- disdrodb/psd/fitting.py +1 -0
- disdrodb/psd/models.py +1 -0
- disdrodb/routines/__init__.py +1 -0
- disdrodb/routines/l0.py +13 -9
- disdrodb/routines/l1.py +17 -12
- disdrodb/routines/l2.py +4 -5
- disdrodb/routines/options.py +1 -0
- disdrodb/routines/options_validation.py +12 -12
- disdrodb/routines/wrappers.py +33 -33
- disdrodb/scattering/__init__.py +0 -1
- disdrodb/scattering/permittivity.py +1 -0
- disdrodb/scattering/routines.py +3 -3
- disdrodb/summary/routines.py +12 -5
- disdrodb/utils/archiving.py +2 -1
- disdrodb/utils/attrs.py +3 -2
- disdrodb/utils/compression.py +1 -2
- disdrodb/utils/coords.py +45 -0
- disdrodb/utils/dask.py +5 -2
- disdrodb/utils/dataframe.py +4 -3
- disdrodb/utils/decorators.py +2 -1
- disdrodb/utils/directories.py +2 -2
- disdrodb/utils/encoding.py +2 -1
- disdrodb/utils/manipulations.py +1 -0
- disdrodb/utils/pydantic.py +1 -0
- disdrodb/utils/routines.py +1 -0
- disdrodb/utils/time.py +3 -2
- disdrodb/utils/warnings.py +1 -0
- disdrodb/utils/writer.py +4 -0
- disdrodb/utils/xarray.py +1 -0
- disdrodb/viz/plots.py +1 -0
- {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/METADATA +4 -3
- disdrodb-0.4.0.dist-info/RECORD +361 -0
- disdrodb-0.3.0.dist-info/RECORD +0 -358
- {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/WHEEL +0 -0
- {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/entry_points.txt +0 -0
- {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/top_level.txt +0 -0
disdrodb/__init__.py
CHANGED
|
@@ -20,6 +20,8 @@ import contextlib
|
|
|
20
20
|
import os
|
|
21
21
|
from importlib.metadata import PackageNotFoundError, version
|
|
22
22
|
|
|
23
|
+
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
|
|
24
|
+
|
|
23
25
|
import disdrodb.accessor # noqa
|
|
24
26
|
from disdrodb._config import config # noqa
|
|
25
27
|
from disdrodb.api.configs import available_sensor_names
|
disdrodb/_config.py
CHANGED
disdrodb/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.4.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
disdrodb/accessor/__init__.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""This directory defines DISDRODB xarray accessors."""
|
|
18
|
+
|
|
18
19
|
from .methods import DISDRODB_DataArray_Accessor, DISDRODB_Dataset_Accessor
|
|
19
20
|
|
|
20
21
|
__all__ = ["DISDRODB_DataArray_Accessor", "DISDRODB_Dataset_Accessor"]
|
disdrodb/accessor/methods.py
CHANGED
disdrodb/api/checks.py
CHANGED
|
@@ -15,12 +15,12 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""DISDRODB Checks Functions."""
|
|
18
|
+
|
|
18
19
|
import datetime
|
|
19
20
|
import difflib
|
|
20
21
|
import logging
|
|
21
22
|
import os
|
|
22
23
|
import re
|
|
23
|
-
import sys
|
|
24
24
|
import warnings
|
|
25
25
|
|
|
26
26
|
import numpy as np
|
|
@@ -565,9 +565,7 @@ def check_filepaths(filepaths):
|
|
|
565
565
|
|
|
566
566
|
def get_current_utc_time():
|
|
567
567
|
"""Get current UTC time."""
|
|
568
|
-
|
|
569
|
-
return datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
|
570
|
-
return datetime.datetime.utcnow()
|
|
568
|
+
return datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
|
571
569
|
|
|
572
570
|
|
|
573
571
|
def check_start_end_time(start_time, end_time):
|
disdrodb/api/io.py
CHANGED
|
@@ -15,21 +15,23 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Routines to list and open DISDRODB products."""
|
|
18
|
+
|
|
18
19
|
import datetime
|
|
20
|
+
import functools
|
|
19
21
|
import os
|
|
20
22
|
import subprocess
|
|
21
23
|
import sys
|
|
22
24
|
from pathlib import Path
|
|
23
|
-
from typing import Optional
|
|
24
25
|
|
|
25
26
|
import numpy as np
|
|
26
27
|
|
|
27
28
|
from disdrodb.api.checks import (
|
|
28
29
|
check_filepaths,
|
|
29
30
|
check_start_end_time,
|
|
31
|
+
check_time,
|
|
30
32
|
get_current_utc_time,
|
|
31
33
|
)
|
|
32
|
-
from disdrodb.api.info import get_start_end_time_from_filepaths
|
|
34
|
+
from disdrodb.api.info import get_start_end_time_from_filepaths, group_filepaths
|
|
33
35
|
from disdrodb.api.path import (
|
|
34
36
|
define_campaign_dir,
|
|
35
37
|
define_data_dir,
|
|
@@ -130,8 +132,8 @@ def find_files(
|
|
|
130
132
|
station_name,
|
|
131
133
|
product,
|
|
132
134
|
debugging_mode: bool = False,
|
|
133
|
-
data_archive_dir:
|
|
134
|
-
metadata_archive_dir:
|
|
135
|
+
data_archive_dir: str | None = None,
|
|
136
|
+
metadata_archive_dir: str | None = None,
|
|
135
137
|
glob_pattern=None,
|
|
136
138
|
start_time=None,
|
|
137
139
|
end_time=None,
|
|
@@ -289,6 +291,42 @@ def _open_raw_files(filepaths, data_source, campaign_name, station_name, metadat
|
|
|
289
291
|
return ds
|
|
290
292
|
|
|
291
293
|
|
|
294
|
+
def list_coordinates_names(ds):
|
|
295
|
+
"""List coordinates of a xarray.Dataset not CF decoded !."""
|
|
296
|
+
coords = set()
|
|
297
|
+
for v in ds.variables:
|
|
298
|
+
attrs = ds[v].attrs
|
|
299
|
+
# auxiliary coordinates
|
|
300
|
+
if "coordinates" in attrs:
|
|
301
|
+
coords |= set(attrs["coordinates"].split())
|
|
302
|
+
# bounds variables
|
|
303
|
+
if "bounds" in attrs:
|
|
304
|
+
coords.add(attrs["bounds"])
|
|
305
|
+
# grid mapping
|
|
306
|
+
if "grid_mapping" in attrs:
|
|
307
|
+
coords.add(attrs["grid_mapping"])
|
|
308
|
+
return coords
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def subset_variables(ds, variables):
|
|
312
|
+
"""Subset variables while keeping coordinates."""
|
|
313
|
+
# Ensure list
|
|
314
|
+
variables = list(variables)
|
|
315
|
+
|
|
316
|
+
# Always keep dimension variables
|
|
317
|
+
dim_vars = list(ds.dims)
|
|
318
|
+
|
|
319
|
+
# Variables referenced by CF relationships
|
|
320
|
+
coords = list_coordinates_names(ds)
|
|
321
|
+
|
|
322
|
+
# Union of everything we must keep
|
|
323
|
+
keep = set(variables) | set(dim_vars) | coords
|
|
324
|
+
|
|
325
|
+
# Only keep variables that exist
|
|
326
|
+
keep = [v for v in keep if v in list(ds.variables)]
|
|
327
|
+
return ds[keep]
|
|
328
|
+
|
|
329
|
+
|
|
292
330
|
def filter_dataset_by_time(ds, start_time=None, end_time=None):
|
|
293
331
|
"""Subset an xarray.Dataset by time, robust to duplicated/non-monotonic indices.
|
|
294
332
|
|
|
@@ -318,6 +356,84 @@ def filter_dataset_by_time(ds, start_time=None, end_time=None):
|
|
|
318
356
|
return ds.isel(time=np.where(mask)[0])
|
|
319
357
|
|
|
320
358
|
|
|
359
|
+
def open_parquet_files(
|
|
360
|
+
filepaths,
|
|
361
|
+
variables=None,
|
|
362
|
+
start_time=None,
|
|
363
|
+
end_time=None,
|
|
364
|
+
time_col="time",
|
|
365
|
+
use_threads=True,
|
|
366
|
+
):
|
|
367
|
+
"""Open Parquet files."""
|
|
368
|
+
import pyarrow.dataset as ds
|
|
369
|
+
|
|
370
|
+
# Open dataset
|
|
371
|
+
dataset = ds.dataset(
|
|
372
|
+
filepaths,
|
|
373
|
+
format="parquet",
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# Define filters
|
|
377
|
+
filters = []
|
|
378
|
+
if start_time is not None:
|
|
379
|
+
start_time = check_time(start_time)
|
|
380
|
+
filters.append(ds.field(time_col) >= start_time)
|
|
381
|
+
|
|
382
|
+
if end_time is not None:
|
|
383
|
+
end_time = check_time(end_time)
|
|
384
|
+
filters.append(ds.field(time_col) <= end_time)
|
|
385
|
+
|
|
386
|
+
# Combine filters if any exist
|
|
387
|
+
filter_expr = None
|
|
388
|
+
if filters:
|
|
389
|
+
filter_expr = filters[0]
|
|
390
|
+
for f in filters[1:]:
|
|
391
|
+
filter_expr = filter_expr & f
|
|
392
|
+
|
|
393
|
+
# Read table and convert to pandas
|
|
394
|
+
df = dataset.to_table(
|
|
395
|
+
columns=variables,
|
|
396
|
+
filter=filter_expr,
|
|
397
|
+
use_threads=use_threads,
|
|
398
|
+
).to_pandas()
|
|
399
|
+
return df
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def ensure_safe_open_mfdataset(function):
|
|
403
|
+
"""Decorator to ensure safe xarray open_mfdataset.
|
|
404
|
+
|
|
405
|
+
parallel argument is changed to False if:
|
|
406
|
+
- dask threading or single-threaded is active
|
|
407
|
+
- distributed multiprocessing with more than 1 thread per process
|
|
408
|
+
|
|
409
|
+
parallel argument is allowed to be True only if:
|
|
410
|
+
- distributed multiprocessing with only 1 thread per process
|
|
411
|
+
"""
|
|
412
|
+
import dask
|
|
413
|
+
|
|
414
|
+
from disdrodb.utils.dask import check_parallel_validity
|
|
415
|
+
|
|
416
|
+
@functools.wraps(function)
|
|
417
|
+
def wrapper(*args, **kwargs):
|
|
418
|
+
# Check if it must be a delayed function
|
|
419
|
+
parallel = kwargs.get("parallel", False)
|
|
420
|
+
parallel = check_parallel_validity(parallel)
|
|
421
|
+
kwargs["parallel"] = parallel
|
|
422
|
+
|
|
423
|
+
# If parallel is True at this stage, means being using
|
|
424
|
+
# multiprocessing or dask.distributed with single thread
|
|
425
|
+
if parallel:
|
|
426
|
+
return function(*args, **kwargs)
|
|
427
|
+
|
|
428
|
+
# Call function with single threading
|
|
429
|
+
with dask.config.set(scheduler="single-threaded"): # "synchronous"
|
|
430
|
+
result = function(*args, **kwargs)
|
|
431
|
+
return result
|
|
432
|
+
|
|
433
|
+
return wrapper
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
@ensure_safe_open_mfdataset
|
|
321
437
|
def open_netcdf_files(
|
|
322
438
|
filepaths,
|
|
323
439
|
chunks=-1,
|
|
@@ -326,11 +442,33 @@ def open_netcdf_files(
|
|
|
326
442
|
variables=None,
|
|
327
443
|
parallel=False,
|
|
328
444
|
compute=True,
|
|
445
|
+
engine="netcdf4",
|
|
329
446
|
**open_kwargs,
|
|
330
447
|
):
|
|
331
448
|
"""Open DISDRODB netCDF files using xarray.
|
|
332
449
|
|
|
333
|
-
Using
|
|
450
|
+
Using data_vars="minimal", coords="minimal", compat="override"
|
|
451
|
+
--> will only concatenate those variables with the time dimension,
|
|
452
|
+
--> will skip any checking for variables that don't have a time dimension
|
|
453
|
+
(simply pick the variable from the first file).
|
|
454
|
+
https://github.com/pydata/xarray/issues/1385#issuecomment-1958761334
|
|
455
|
+
|
|
456
|
+
Using combine="nested" and join="outer" ensure that duplicated timesteps
|
|
457
|
+
are not overwritten!
|
|
458
|
+
|
|
459
|
+
When decode_cf=False
|
|
460
|
+
--> lat,lon are data_vars and get concatenated without any checking or reading
|
|
461
|
+
When decode_cf=True
|
|
462
|
+
--> lat, lon are promoted to coords, then get checked for equality across all files
|
|
463
|
+
|
|
464
|
+
For L0B product, if sample_interval variable is present and varies with time,
|
|
465
|
+
this function concatenate the variable over time without problems.
|
|
466
|
+
For L0C product, if sample_interval changes across listed files,
|
|
467
|
+
only sample_interval of first file is reported.
|
|
468
|
+
--> open_dataset take care of just providing filepaths of files with same sample interval.
|
|
469
|
+
In L1 and L2 processing, only filepaths of files with same sample interval
|
|
470
|
+
must be passed to this function.
|
|
471
|
+
|
|
334
472
|
"""
|
|
335
473
|
import xarray as xr
|
|
336
474
|
|
|
@@ -341,35 +479,64 @@ def open_netcdf_files(
|
|
|
341
479
|
variables = np.unique(variables).tolist()
|
|
342
480
|
|
|
343
481
|
# Define preprocessing function for parallel opening
|
|
344
|
-
|
|
482
|
+
if parallel and variables is not None:
|
|
483
|
+
|
|
484
|
+
def preprocess(ds):
|
|
485
|
+
return subset_variables(ds, variables)
|
|
486
|
+
|
|
487
|
+
else:
|
|
488
|
+
preprocess = None
|
|
345
489
|
|
|
346
490
|
# Open netcdf
|
|
491
|
+
xr.set_options(use_new_combine_kwarg_defaults=True)
|
|
347
492
|
ds = xr.open_mfdataset(
|
|
348
493
|
filepaths,
|
|
349
494
|
chunks=chunks,
|
|
350
|
-
data_vars="all",
|
|
351
495
|
combine="nested",
|
|
352
|
-
join="outer",
|
|
353
496
|
concat_dim="time",
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
compat="no_conflicts"
|
|
497
|
+
data_vars="minimal", # ["sample_interval"], "all" would concat all across time
|
|
498
|
+
coords="minimal",
|
|
499
|
+
join="outer", # "exact"
|
|
500
|
+
compat="override", # "no_conflicts" slows down
|
|
358
501
|
combine_attrs="override",
|
|
359
|
-
|
|
502
|
+
preprocess=preprocess, # only if parallel=True
|
|
503
|
+
engine=engine,
|
|
504
|
+
parallel=parallel,
|
|
505
|
+
decode_cf=False, # assume encoding do not vary across files (e.g. "time" units)
|
|
506
|
+
decode_coords=False, # no effect if decode_cf=False
|
|
360
507
|
decode_timedelta=False,
|
|
361
508
|
cache=False,
|
|
362
509
|
autoclose=True,
|
|
363
510
|
**open_kwargs,
|
|
364
511
|
)
|
|
365
|
-
|
|
512
|
+
|
|
513
|
+
# Decode CF
|
|
514
|
+
# - Set to coordinates the variables
|
|
515
|
+
# - latitude/longitude/altitude
|
|
516
|
+
# - sample_interval
|
|
517
|
+
# - diameter/velocity bin width/upper/lower
|
|
518
|
+
ds = xr.decode_cf(ds, decode_times=True, decode_coords=True, decode_timedelta=False)
|
|
519
|
+
|
|
520
|
+
# Subset variables
|
|
521
|
+
# --> After decoding CF, when coordinates are properly set
|
|
522
|
+
# --> Othewerwise, coordinate variables would be removed unless listed in variables
|
|
366
523
|
if variables is not None and preprocess is None:
|
|
367
524
|
variables = [var for var in variables if var in ds]
|
|
368
525
|
ds = ds[variables]
|
|
369
|
-
|
|
526
|
+
|
|
527
|
+
# Subset time
|
|
370
528
|
if start_time is not None or end_time is not None:
|
|
371
529
|
ds = filter_dataset_by_time(ds, start_time=start_time, end_time=end_time)
|
|
372
|
-
|
|
530
|
+
|
|
531
|
+
# Ensure coordinates are already loaded in memory
|
|
532
|
+
for coord in list(ds.coords):
|
|
533
|
+
ds[coord] = ds[coord].load()
|
|
534
|
+
|
|
535
|
+
# Update time coverage attributes
|
|
536
|
+
ds.attrs["time_coverage_start"] = str(ds.disdrodb.start_time)
|
|
537
|
+
ds.attrs["time_coverage_end"] = str(ds.disdrodb.end_time)
|
|
538
|
+
|
|
539
|
+
# If compute=True, load in memory and close connections to files
|
|
373
540
|
if compute:
|
|
374
541
|
dataset = ds.compute()
|
|
375
542
|
ds.close()
|
|
@@ -387,8 +554,8 @@ def open_dataset(
|
|
|
387
554
|
product,
|
|
388
555
|
product_kwargs=None,
|
|
389
556
|
debugging_mode: bool = False,
|
|
390
|
-
data_archive_dir:
|
|
391
|
-
metadata_archive_dir:
|
|
557
|
+
data_archive_dir: str | None = None,
|
|
558
|
+
metadata_archive_dir: str | None = None,
|
|
392
559
|
chunks=-1,
|
|
393
560
|
parallel=False,
|
|
394
561
|
compute=False,
|
|
@@ -430,7 +597,7 @@ def open_dataset(
|
|
|
430
597
|
xarray.Dataset
|
|
431
598
|
|
|
432
599
|
"""
|
|
433
|
-
|
|
600
|
+
import xarray as xr
|
|
434
601
|
|
|
435
602
|
# Extract product kwargs from open_kwargs
|
|
436
603
|
product_kwargs = extract_product_kwargs(open_kwargs, product=product)
|
|
@@ -464,9 +631,46 @@ def open_dataset(
|
|
|
464
631
|
|
|
465
632
|
# Open L0A Parquet files
|
|
466
633
|
if product == "L0A":
|
|
467
|
-
|
|
634
|
+
df = open_parquet_files(
|
|
635
|
+
filepaths=filepaths,
|
|
636
|
+
variables=variables,
|
|
637
|
+
start_time=start_time,
|
|
638
|
+
end_time=end_time,
|
|
639
|
+
use_threads=parallel,
|
|
640
|
+
)
|
|
641
|
+
return df
|
|
468
642
|
|
|
469
643
|
# Open DISDRODB netCDF files using xarray
|
|
644
|
+
# - Special handling for L0C product with possible multiple sample intervals
|
|
645
|
+
if product == "L0C":
|
|
646
|
+
dict_sample_intervals = group_filepaths(filepaths, groups="sample_interval")
|
|
647
|
+
if len(dict_sample_intervals) > 1:
|
|
648
|
+
# Open separately each sample interval
|
|
649
|
+
list_ds = [
|
|
650
|
+
open_netcdf_files(
|
|
651
|
+
filepaths=filepaths,
|
|
652
|
+
chunks=chunks,
|
|
653
|
+
start_time=start_time,
|
|
654
|
+
end_time=end_time,
|
|
655
|
+
variables=variables,
|
|
656
|
+
parallel=parallel,
|
|
657
|
+
compute=compute,
|
|
658
|
+
**open_kwargs,
|
|
659
|
+
)
|
|
660
|
+
for filepaths in dict_sample_intervals.values()
|
|
661
|
+
]
|
|
662
|
+
# Expand sample_interval coordinate for each dataset
|
|
663
|
+
list_ds = [ds.assign_coords(sample_interval=ds.sample_interval.expand_dims(time=ds.time)) for ds in list_ds]
|
|
664
|
+
# Concatenate along time dimension and sort by time
|
|
665
|
+
ds = xr.concat(list_ds, dim="time")
|
|
666
|
+
ds.attrs["measurement_interval"] = list(dict_sample_intervals)
|
|
667
|
+
ds = ds.sortby("time")
|
|
668
|
+
# Update time coverage attributes
|
|
669
|
+
ds.attrs["time_coverage_start"] = str(ds.disdrodb.start_time)
|
|
670
|
+
ds.attrs["time_coverage_end"] = str(ds.disdrodb.end_time)
|
|
671
|
+
return ds
|
|
672
|
+
|
|
673
|
+
# Otherwise, open all files together
|
|
470
674
|
ds = open_netcdf_files(
|
|
471
675
|
filepaths=filepaths,
|
|
472
676
|
chunks=chunks,
|
|
@@ -477,10 +681,6 @@ def open_dataset(
|
|
|
477
681
|
compute=compute,
|
|
478
682
|
**open_kwargs,
|
|
479
683
|
)
|
|
480
|
-
|
|
481
|
-
# Ensure coordinates in memory
|
|
482
|
-
# for coord in list(ds.coords):
|
|
483
|
-
# ds[coord] = ds[coord].compute()
|
|
484
684
|
return ds
|
|
485
685
|
|
|
486
686
|
|
disdrodb/api/path.py
CHANGED
|
@@ -15,13 +15,13 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Define paths within the DISDRODB infrastructure."""
|
|
18
|
+
|
|
18
19
|
import os
|
|
19
20
|
|
|
20
21
|
from disdrodb.configs import get_data_archive_dir, get_metadata_archive_dir
|
|
21
22
|
from disdrodb.constants import ARCHIVE_VERSION
|
|
22
23
|
from disdrodb.utils.directories import check_directory_exists
|
|
23
24
|
from disdrodb.utils.time import (
|
|
24
|
-
ensure_sample_interval_in_seconds,
|
|
25
25
|
get_file_start_end_time,
|
|
26
26
|
seconds_to_temporal_resolution,
|
|
27
27
|
)
|
|
@@ -803,10 +803,8 @@ def define_l0b_filename(ds, campaign_name: str, station_name: str) -> str:
|
|
|
803
803
|
return filename
|
|
804
804
|
|
|
805
805
|
|
|
806
|
-
def define_l0c_filename(ds, campaign_name: str, station_name: str) -> str:
|
|
806
|
+
def define_l0c_filename(ds, campaign_name: str, station_name: str, sample_interval: str) -> str:
|
|
807
807
|
"""Define L0C file name."""
|
|
808
|
-
# TODO: add sample_interval as function argument
|
|
809
|
-
sample_interval = int(ensure_sample_interval_in_seconds(ds["sample_interval"]).data.item())
|
|
810
808
|
temporal_resolution = define_temporal_resolution(sample_interval, rolling=False)
|
|
811
809
|
starting_time, ending_time = get_file_start_end_time(ds)
|
|
812
810
|
starting_time = starting_time.strftime("%Y%m%d%H%M%S")
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Wrapper to check DISDRODB Metadata Archive Compliance from terminal."""
|
|
18
|
+
|
|
18
19
|
import sys
|
|
19
20
|
|
|
20
21
|
import click
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Script to check the validity of the DISDRODB products configuration files."""
|
|
18
|
+
|
|
18
19
|
import sys
|
|
19
20
|
|
|
20
21
|
import click
|
|
@@ -15,8 +15,8 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Script to create summary figures and tables for a DISDRODB stationn."""
|
|
18
|
+
|
|
18
19
|
import sys
|
|
19
|
-
from typing import Optional
|
|
20
20
|
|
|
21
21
|
import click
|
|
22
22
|
|
|
@@ -49,15 +49,15 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
|
49
49
|
)
|
|
50
50
|
def disdrodb_create_summary(
|
|
51
51
|
# Stations options
|
|
52
|
-
data_sources:
|
|
53
|
-
campaign_names:
|
|
54
|
-
station_names:
|
|
52
|
+
data_sources: str | None = None,
|
|
53
|
+
campaign_names: str | None = None,
|
|
54
|
+
station_names: str | None = None,
|
|
55
55
|
# Processing options:
|
|
56
56
|
parallel=False,
|
|
57
57
|
temporal_resolution="1MIN",
|
|
58
58
|
# DISDRODB root directories
|
|
59
|
-
data_archive_dir:
|
|
60
|
-
metadata_archive_dir:
|
|
59
|
+
data_archive_dir: str | None = None,
|
|
60
|
+
metadata_archive_dir: str | None = None,
|
|
61
61
|
):
|
|
62
62
|
"""Create summary figures and tables for DISDRODB stations.
|
|
63
63
|
|
|
@@ -15,8 +15,8 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Script to create summary figures and tables for a DISDRODB station."""
|
|
18
|
+
|
|
18
19
|
import sys
|
|
19
|
-
from typing import Optional
|
|
20
20
|
|
|
21
21
|
import click
|
|
22
22
|
|
|
@@ -53,7 +53,7 @@ def disdrodb_create_summary_station(
|
|
|
53
53
|
parallel=False,
|
|
54
54
|
temporal_resolution="1MIN",
|
|
55
55
|
# DISDRODB root directories
|
|
56
|
-
data_archive_dir:
|
|
56
|
+
data_archive_dir: str | None = None,
|
|
57
57
|
):
|
|
58
58
|
"""Create summary figures and tables for a specific DISDRODB station.
|
|
59
59
|
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
"""Wrapper to download stations from the DISDRODB Decentralized Data Archive."""
|
|
18
18
|
|
|
19
19
|
import sys
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
import click
|
|
23
22
|
|
|
@@ -38,11 +37,11 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
|
38
37
|
@click_metadata_archive_dir_option
|
|
39
38
|
@click_download_options
|
|
40
39
|
def disdrodb_download_archive(
|
|
41
|
-
data_sources:
|
|
42
|
-
campaign_names:
|
|
43
|
-
station_names:
|
|
44
|
-
data_archive_dir:
|
|
45
|
-
metadata_archive_dir:
|
|
40
|
+
data_sources: str | None = None,
|
|
41
|
+
campaign_names: str | None = None,
|
|
42
|
+
station_names: str | None = None,
|
|
43
|
+
data_archive_dir: str | None = None,
|
|
44
|
+
metadata_archive_dir: str | None = None,
|
|
46
45
|
force: bool = False,
|
|
47
46
|
):
|
|
48
47
|
"""Download raw data for multiple DISDRODB stations from the DISDRODB Decentralized Data Archive.
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Routine to download the DISDRODB Metadata Data Archive."""
|
|
18
|
+
|
|
18
19
|
import sys
|
|
19
20
|
from pathlib import Path
|
|
20
21
|
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
"""Routine to download station data from the DISDRODB Decentralized Data Archive."""
|
|
18
18
|
|
|
19
19
|
import sys
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
import click
|
|
23
22
|
|
|
@@ -41,8 +40,8 @@ def disdrodb_download_station(
|
|
|
41
40
|
data_source: str,
|
|
42
41
|
campaign_name: str,
|
|
43
42
|
station_name: str,
|
|
44
|
-
data_archive_dir:
|
|
45
|
-
metadata_archive_dir:
|
|
43
|
+
data_archive_dir: str | None = None,
|
|
44
|
+
metadata_archive_dir: str | None = None,
|
|
46
45
|
force: bool = False,
|
|
47
46
|
):
|
|
48
47
|
"""Download raw data of a single station from the DISDRODB Decentralized Data Archive.
|
|
@@ -15,8 +15,8 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Script to initialize the DISDRODB station directory structure."""
|
|
18
|
+
|
|
18
19
|
import sys
|
|
19
|
-
from typing import Optional
|
|
20
20
|
|
|
21
21
|
import click
|
|
22
22
|
|
|
@@ -43,8 +43,8 @@ def disdrodb_initialize_station(
|
|
|
43
43
|
campaign_name: str,
|
|
44
44
|
station_name: str,
|
|
45
45
|
# DISDRODB root directories
|
|
46
|
-
data_archive_dir:
|
|
47
|
-
metadata_archive_dir:
|
|
46
|
+
data_archive_dir: str | None = None,
|
|
47
|
+
metadata_archive_dir: str | None = None,
|
|
48
48
|
):
|
|
49
49
|
"""Initialize the DISDRODB directory structure for a new station.
|
|
50
50
|
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
"""Routine to open the DISDRODB Data Archive."""
|
|
18
18
|
|
|
19
19
|
import sys
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
import click
|
|
23
22
|
|
|
@@ -32,7 +31,7 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
|
32
31
|
@click.command()
|
|
33
32
|
@click_data_archive_dir_option
|
|
34
33
|
def disdrodb_open_data_archive(
|
|
35
|
-
data_archive_dir:
|
|
34
|
+
data_archive_dir: str | None = None,
|
|
36
35
|
):
|
|
37
36
|
"""Open the DISDRODB Data Archive directory in the system file explorer.
|
|
38
37
|
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
"""Routine to open the DISDRODB Data Archive logs directory."""
|
|
18
18
|
|
|
19
19
|
import sys
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
import click
|
|
23
22
|
|
|
@@ -36,8 +35,8 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
|
36
35
|
def disdrodb_open_logs_directory(
|
|
37
36
|
data_source: str,
|
|
38
37
|
campaign_name: str,
|
|
39
|
-
station_name:
|
|
40
|
-
data_archive_dir:
|
|
38
|
+
station_name: str | None = None,
|
|
39
|
+
data_archive_dir: str | None = None,
|
|
41
40
|
):
|
|
42
41
|
"""Open the DISDRODB Data Archive station logs directory in the system file explorer.
|
|
43
42
|
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
"""Routine to open the DISDRODB Metadata Data Archive."""
|
|
18
18
|
|
|
19
19
|
import sys
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
import click
|
|
23
22
|
|
|
@@ -32,7 +31,7 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
|
|
|
32
31
|
@click.command()
|
|
33
32
|
@click_metadata_archive_dir_option
|
|
34
33
|
def disdrodb_open_metadata_archive(
|
|
35
|
-
metadata_archive_dir:
|
|
34
|
+
metadata_archive_dir: str | None = None,
|
|
36
35
|
):
|
|
37
36
|
"""Open the DISDRODB Metadata Archive directory in the system file explorer.
|
|
38
37
|
|