disdrodb 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. disdrodb/__init__.py +2 -0
  2. disdrodb/_config.py +1 -0
  3. disdrodb/_version.py +2 -2
  4. disdrodb/accessor/__init__.py +1 -0
  5. disdrodb/accessor/methods.py +1 -0
  6. disdrodb/api/checks.py +2 -4
  7. disdrodb/api/io.py +224 -24
  8. disdrodb/api/path.py +2 -4
  9. disdrodb/cli/disdrodb_check_metadata_archive.py +1 -0
  10. disdrodb/cli/disdrodb_check_products_options.py +1 -0
  11. disdrodb/cli/disdrodb_create_summary.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +2 -2
  13. disdrodb/cli/disdrodb_data_archive_directory.py +1 -0
  14. disdrodb/cli/disdrodb_download_archive.py +5 -6
  15. disdrodb/cli/disdrodb_download_metadata_archive.py +1 -0
  16. disdrodb/cli/disdrodb_download_station.py +2 -3
  17. disdrodb/cli/disdrodb_initialize_station.py +3 -3
  18. disdrodb/cli/disdrodb_metadata_archive_directory.py +1 -0
  19. disdrodb/cli/disdrodb_open_data_archive.py +1 -2
  20. disdrodb/cli/disdrodb_open_logs_directory.py +2 -3
  21. disdrodb/cli/disdrodb_open_metadata_archive.py +1 -2
  22. disdrodb/cli/disdrodb_open_metadata_directory.py +2 -3
  23. disdrodb/cli/disdrodb_open_product_directory.py +1 -2
  24. disdrodb/cli/disdrodb_open_readers_directory.py +1 -0
  25. disdrodb/cli/disdrodb_run.py +6 -6
  26. disdrodb/cli/disdrodb_run_l0.py +6 -6
  27. disdrodb/cli/disdrodb_run_l0_station.py +3 -3
  28. disdrodb/cli/disdrodb_run_l0a.py +6 -6
  29. disdrodb/cli/disdrodb_run_l0a_station.py +3 -3
  30. disdrodb/cli/disdrodb_run_l0b.py +6 -6
  31. disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
  32. disdrodb/cli/disdrodb_run_l0c.py +6 -6
  33. disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
  34. disdrodb/cli/disdrodb_run_l1.py +6 -6
  35. disdrodb/cli/disdrodb_run_l1_station.py +3 -3
  36. disdrodb/cli/disdrodb_run_l2e.py +6 -6
  37. disdrodb/cli/disdrodb_run_l2e_station.py +3 -3
  38. disdrodb/cli/disdrodb_run_l2m.py +6 -6
  39. disdrodb/cli/disdrodb_run_l2m_station.py +3 -3
  40. disdrodb/cli/disdrodb_run_station.py +3 -3
  41. disdrodb/cli/disdrodb_upload_archive.py +6 -7
  42. disdrodb/cli/disdrodb_upload_station.py +3 -4
  43. disdrodb/configs.py +7 -8
  44. disdrodb/constants.py +1 -0
  45. disdrodb/data_transfer/download_data.py +8 -8
  46. disdrodb/data_transfer/upload_data.py +6 -8
  47. disdrodb/data_transfer/zenodo.py +1 -1
  48. disdrodb/fall_velocity/__init__.py +1 -0
  49. disdrodb/fall_velocity/graupel.py +1 -0
  50. disdrodb/fall_velocity/hail.py +1 -0
  51. disdrodb/fall_velocity/rain.py +1 -0
  52. disdrodb/issue/checks.py +1 -0
  53. disdrodb/issue/reader.py +1 -0
  54. disdrodb/issue/writer.py +1 -2
  55. disdrodb/l0/__init__.py +1 -0
  56. disdrodb/l0/check_configs.py +21 -23
  57. disdrodb/l0/check_standards.py +0 -1
  58. disdrodb/l0/configs/LPM/l0a_encodings.yml +17 -17
  59. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +55 -55
  60. disdrodb/l0/configs/LPM/l0b_encodings.yml +17 -17
  61. disdrodb/l0/configs/LPM/raw_data_format.yml +17 -17
  62. disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +2 -2
  63. disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +2 -2
  64. disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +2 -2
  65. disdrodb/l0/configs/LPM_V0/raw_data_format.yml +2 -2
  66. disdrodb/l0/l0_reader.py +1 -0
  67. disdrodb/l0/l0a_processing.py +5 -5
  68. disdrodb/l0/l0b_nc_processing.py +1 -2
  69. disdrodb/l0/l0b_processing.py +1 -13
  70. disdrodb/l0/l0c_processing.py +2 -1
  71. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -0
  72. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +17 -17
  73. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +17 -17
  74. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +17 -17
  75. disdrodb/l0/readers/LPM/GERMANY/DWD.py +55 -52
  76. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +18 -17
  77. disdrodb/l0/readers/LPM/ITALY/GID_LPM_AQ.py +277 -0
  78. disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +18 -17
  79. disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +18 -17
  80. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +18 -18
  81. disdrodb/l0/readers/LPM/KIT/CHWALA.py +18 -17
  82. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +17 -17
  83. disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +18 -17
  84. disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +18 -17
  85. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +18 -17
  86. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +18 -17
  87. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +18 -17
  88. disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +18 -17
  89. disdrodb/l0/readers/LPM/USA/CHARLESTON.py +18 -17
  90. disdrodb/l0/readers/LPM/USA/DEVEX.py +255 -0
  91. disdrodb/l0/readers/LPM_V0/BELGIUM/ULIEGE.py +2 -2
  92. disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +3 -2
  93. disdrodb/l0/readers/ODM470/OCEAN/OCEANRAIN.py +1 -0
  94. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +1 -0
  95. disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +1 -0
  96. disdrodb/l0/readers/PARSIVEL/EPFL/ARCTIC_2021.py +1 -0
  97. disdrodb/l0/readers/PARSIVEL/EPFL/COMMON_2011.py +1 -0
  98. disdrodb/l0/readers/PARSIVEL/EPFL/DAVOS_2009_2011.py +1 -0
  99. disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_2009.py +1 -0
  100. disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2008.py +1 -0
  101. disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +1 -0
  102. disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2011.py +1 -0
  103. disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2012.py +1 -0
  104. disdrodb/l0/readers/PARSIVEL/EPFL/GENEPI_2007.py +1 -0
  105. disdrodb/l0/readers/PARSIVEL/EPFL/GRAND_ST_BERNARD_2007.py +1 -0
  106. disdrodb/l0/readers/PARSIVEL/EPFL/GRAND_ST_BERNARD_2007_2.py +1 -0
  107. disdrodb/l0/readers/PARSIVEL/EPFL/HPICONET_2010.py +1 -0
  108. disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP2.py +1 -0
  109. disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +1 -0
  110. disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP4.py +1 -0
  111. disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +1 -0
  112. disdrodb/l0/readers/PARSIVEL/EPFL/PARSIVEL_2007.py +1 -0
  113. disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +1 -0
  114. disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019_WJF.py +1 -0
  115. disdrodb/l0/readers/PARSIVEL/EPFL/RIETHOLZBACH_2011.py +1 -0
  116. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +1 -0
  117. disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +1 -0
  118. disdrodb/l0/readers/PARSIVEL/EPFL/UNIL_2022.py +1 -0
  119. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +1 -0
  120. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +1 -0
  121. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +1 -0
  122. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +1 -0
  123. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_nc.py +1 -0
  124. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -0
  125. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +1 -0
  126. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  127. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +1 -0
  128. disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +1 -0
  129. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  130. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +1 -0
  131. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +1 -0
  132. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py +1 -0
  133. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +1 -0
  134. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +1 -0
  135. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -0
  136. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +1 -0
  137. disdrodb/l0/readers/PARSIVEL2/USA/CW3E.py +1 -0
  138. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +1 -0
  139. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +1 -0
  140. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +1 -0
  141. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +1 -0
  142. disdrodb/l0/readers/SWS250/BELGIUM/KMI.py +1 -0
  143. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -0
  144. disdrodb/l0/readers/template_reader_raw_text_data.py +1 -0
  145. disdrodb/l0/template_tools.py +6 -8
  146. disdrodb/l1/__init__.py +1 -0
  147. disdrodb/l1/classification.py +1 -0
  148. disdrodb/l1/resampling.py +5 -0
  149. disdrodb/l1_env/routines.py +1 -0
  150. disdrodb/l2/__init__.py +1 -0
  151. disdrodb/l2/empirical_dsd.py +1 -0
  152. disdrodb/l2/processing.py +1 -0
  153. disdrodb/metadata/checks.py +9 -10
  154. disdrodb/metadata/download.py +1 -0
  155. disdrodb/metadata/geolocation.py +2 -1
  156. disdrodb/metadata/info.py +2 -2
  157. disdrodb/metadata/search.py +0 -1
  158. disdrodb/physics/atmosphere.py +1 -0
  159. disdrodb/physics/water.py +1 -0
  160. disdrodb/physics/wrappers.py +1 -0
  161. disdrodb/psd/__init__.py +0 -1
  162. disdrodb/psd/fitting.py +1 -0
  163. disdrodb/psd/models.py +1 -0
  164. disdrodb/routines/__init__.py +1 -0
  165. disdrodb/routines/l0.py +13 -9
  166. disdrodb/routines/l1.py +17 -12
  167. disdrodb/routines/l2.py +4 -5
  168. disdrodb/routines/options.py +1 -0
  169. disdrodb/routines/options_validation.py +12 -12
  170. disdrodb/routines/wrappers.py +33 -33
  171. disdrodb/scattering/__init__.py +0 -1
  172. disdrodb/scattering/permittivity.py +1 -0
  173. disdrodb/scattering/routines.py +3 -3
  174. disdrodb/summary/routines.py +12 -5
  175. disdrodb/utils/archiving.py +2 -1
  176. disdrodb/utils/attrs.py +3 -2
  177. disdrodb/utils/compression.py +1 -2
  178. disdrodb/utils/coords.py +45 -0
  179. disdrodb/utils/dask.py +5 -2
  180. disdrodb/utils/dataframe.py +4 -3
  181. disdrodb/utils/decorators.py +2 -1
  182. disdrodb/utils/directories.py +2 -2
  183. disdrodb/utils/encoding.py +2 -1
  184. disdrodb/utils/manipulations.py +1 -0
  185. disdrodb/utils/pydantic.py +1 -0
  186. disdrodb/utils/routines.py +1 -0
  187. disdrodb/utils/time.py +3 -2
  188. disdrodb/utils/warnings.py +1 -0
  189. disdrodb/utils/writer.py +4 -0
  190. disdrodb/utils/xarray.py +1 -0
  191. disdrodb/viz/plots.py +1 -0
  192. {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/METADATA +4 -3
  193. disdrodb-0.4.0.dist-info/RECORD +361 -0
  194. disdrodb-0.3.0.dist-info/RECORD +0 -358
  195. {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/WHEEL +0 -0
  196. {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/entry_points.txt +0 -0
  197. {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/licenses/LICENSE +0 -0
  198. {disdrodb-0.3.0.dist-info → disdrodb-0.4.0.dist-info}/top_level.txt +0 -0
disdrodb/__init__.py CHANGED
@@ -20,6 +20,8 @@ import contextlib
20
20
  import os
21
21
  from importlib.metadata import PackageNotFoundError, version
22
22
 
23
+ os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
24
+
23
25
  import disdrodb.accessor # noqa
24
26
  from disdrodb._config import config # noqa
25
27
  from disdrodb.api.configs import available_sensor_names
disdrodb/_config.py CHANGED
@@ -18,6 +18,7 @@
18
18
 
19
19
  See https://donfig.readthedocs.io/en/latest/configuration.html for more info.
20
20
  """
21
+
21
22
  from donfig import Config
22
23
 
23
24
  from disdrodb.configs import read_configs
disdrodb/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.0'
32
- __version_tuple__ = version_tuple = (0, 3, 0)
31
+ __version__ = version = '0.4.0'
32
+ __version_tuple__ = version_tuple = (0, 4, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -15,6 +15,7 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """This directory defines DISDRODB xarray accessors."""
18
+
18
19
  from .methods import DISDRODB_DataArray_Accessor, DISDRODB_Dataset_Accessor
19
20
 
20
21
  __all__ = ["DISDRODB_DataArray_Accessor", "DISDRODB_Dataset_Accessor"]
@@ -15,6 +15,7 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """This module defines DISDRODB xarray accessors."""
18
+
18
19
  import xarray as xr
19
20
 
20
21
 
disdrodb/api/checks.py CHANGED
@@ -15,12 +15,12 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """DISDRODB Checks Functions."""
18
+
18
19
  import datetime
19
20
  import difflib
20
21
  import logging
21
22
  import os
22
23
  import re
23
- import sys
24
24
  import warnings
25
25
 
26
26
  import numpy as np
@@ -565,9 +565,7 @@ def check_filepaths(filepaths):
565
565
 
566
566
  def get_current_utc_time():
567
567
  """Get current UTC time."""
568
- if sys.version_info >= (3, 11):
569
- return datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
570
- return datetime.datetime.utcnow()
568
+ return datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
571
569
 
572
570
 
573
571
  def check_start_end_time(start_time, end_time):
disdrodb/api/io.py CHANGED
@@ -15,21 +15,23 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Routines to list and open DISDRODB products."""
18
+
18
19
  import datetime
20
+ import functools
19
21
  import os
20
22
  import subprocess
21
23
  import sys
22
24
  from pathlib import Path
23
- from typing import Optional
24
25
 
25
26
  import numpy as np
26
27
 
27
28
  from disdrodb.api.checks import (
28
29
  check_filepaths,
29
30
  check_start_end_time,
31
+ check_time,
30
32
  get_current_utc_time,
31
33
  )
32
- from disdrodb.api.info import get_start_end_time_from_filepaths
34
+ from disdrodb.api.info import get_start_end_time_from_filepaths, group_filepaths
33
35
  from disdrodb.api.path import (
34
36
  define_campaign_dir,
35
37
  define_data_dir,
@@ -130,8 +132,8 @@ def find_files(
130
132
  station_name,
131
133
  product,
132
134
  debugging_mode: bool = False,
133
- data_archive_dir: Optional[str] = None,
134
- metadata_archive_dir: Optional[str] = None,
135
+ data_archive_dir: str | None = None,
136
+ metadata_archive_dir: str | None = None,
135
137
  glob_pattern=None,
136
138
  start_time=None,
137
139
  end_time=None,
@@ -289,6 +291,42 @@ def _open_raw_files(filepaths, data_source, campaign_name, station_name, metadat
289
291
  return ds
290
292
 
291
293
 
294
+ def list_coordinates_names(ds):
295
+ """List coordinates of a xarray.Dataset not CF decoded !."""
296
+ coords = set()
297
+ for v in ds.variables:
298
+ attrs = ds[v].attrs
299
+ # auxiliary coordinates
300
+ if "coordinates" in attrs:
301
+ coords |= set(attrs["coordinates"].split())
302
+ # bounds variables
303
+ if "bounds" in attrs:
304
+ coords.add(attrs["bounds"])
305
+ # grid mapping
306
+ if "grid_mapping" in attrs:
307
+ coords.add(attrs["grid_mapping"])
308
+ return coords
309
+
310
+
311
+ def subset_variables(ds, variables):
312
+ """Subset variables while keeping coordinates."""
313
+ # Ensure list
314
+ variables = list(variables)
315
+
316
+ # Always keep dimension variables
317
+ dim_vars = list(ds.dims)
318
+
319
+ # Variables referenced by CF relationships
320
+ coords = list_coordinates_names(ds)
321
+
322
+ # Union of everything we must keep
323
+ keep = set(variables) | set(dim_vars) | coords
324
+
325
+ # Only keep variables that exist
326
+ keep = [v for v in keep if v in list(ds.variables)]
327
+ return ds[keep]
328
+
329
+
292
330
  def filter_dataset_by_time(ds, start_time=None, end_time=None):
293
331
  """Subset an xarray.Dataset by time, robust to duplicated/non-monotonic indices.
294
332
 
@@ -318,6 +356,84 @@ def filter_dataset_by_time(ds, start_time=None, end_time=None):
318
356
  return ds.isel(time=np.where(mask)[0])
319
357
 
320
358
 
359
+ def open_parquet_files(
360
+ filepaths,
361
+ variables=None,
362
+ start_time=None,
363
+ end_time=None,
364
+ time_col="time",
365
+ use_threads=True,
366
+ ):
367
+ """Open Parquet files."""
368
+ import pyarrow.dataset as ds
369
+
370
+ # Open dataset
371
+ dataset = ds.dataset(
372
+ filepaths,
373
+ format="parquet",
374
+ )
375
+
376
+ # Define filters
377
+ filters = []
378
+ if start_time is not None:
379
+ start_time = check_time(start_time)
380
+ filters.append(ds.field(time_col) >= start_time)
381
+
382
+ if end_time is not None:
383
+ end_time = check_time(end_time)
384
+ filters.append(ds.field(time_col) <= end_time)
385
+
386
+ # Combine filters if any exist
387
+ filter_expr = None
388
+ if filters:
389
+ filter_expr = filters[0]
390
+ for f in filters[1:]:
391
+ filter_expr = filter_expr & f
392
+
393
+ # Read table and convert to pandas
394
+ df = dataset.to_table(
395
+ columns=variables,
396
+ filter=filter_expr,
397
+ use_threads=use_threads,
398
+ ).to_pandas()
399
+ return df
400
+
401
+
402
+ def ensure_safe_open_mfdataset(function):
403
+ """Decorator to ensure safe xarray open_mfdataset.
404
+
405
+ parallel argument is changed to False if:
406
+ - dask threading or single-threaded is active
407
+ - distributed multiprocessing with more than 1 thread per process
408
+
409
+ parallel argument is allowed to be True only if:
410
+ - distributed multiprocessing with only 1 thread per process
411
+ """
412
+ import dask
413
+
414
+ from disdrodb.utils.dask import check_parallel_validity
415
+
416
+ @functools.wraps(function)
417
+ def wrapper(*args, **kwargs):
418
+ # Check if it must be a delayed function
419
+ parallel = kwargs.get("parallel", False)
420
+ parallel = check_parallel_validity(parallel)
421
+ kwargs["parallel"] = parallel
422
+
423
+ # If parallel is True at this stage, means being using
424
+ # multiprocessing or dask.distributed with single thread
425
+ if parallel:
426
+ return function(*args, **kwargs)
427
+
428
+ # Call function with single threading
429
+ with dask.config.set(scheduler="single-threaded"): # "synchronous"
430
+ result = function(*args, **kwargs)
431
+ return result
432
+
433
+ return wrapper
434
+
435
+
436
+ @ensure_safe_open_mfdataset
321
437
  def open_netcdf_files(
322
438
  filepaths,
323
439
  chunks=-1,
@@ -326,11 +442,33 @@ def open_netcdf_files(
326
442
  variables=None,
327
443
  parallel=False,
328
444
  compute=True,
445
+ engine="netcdf4",
329
446
  **open_kwargs,
330
447
  ):
331
448
  """Open DISDRODB netCDF files using xarray.
332
449
 
333
- Using combine="nested" and join="outer" ensure that duplicated timesteps are not overwritten!
450
+ Using data_vars="minimal", coords="minimal", compat="override"
451
+ --> will only concatenate those variables with the time dimension,
452
+ --> will skip any checking for variables that don't have a time dimension
453
+ (simply pick the variable from the first file).
454
+ https://github.com/pydata/xarray/issues/1385#issuecomment-1958761334
455
+
456
+ Using combine="nested" and join="outer" ensure that duplicated timesteps
457
+ are not overwritten!
458
+
459
+ When decode_cf=False
460
+ --> lat,lon are data_vars and get concatenated without any checking or reading
461
+ When decode_cf=True
462
+ --> lat, lon are promoted to coords, then get checked for equality across all files
463
+
464
+ For L0B product, if sample_interval variable is present and varies with time,
465
+ this function concatenate the variable over time without problems.
466
+ For L0C product, if sample_interval changes across listed files,
467
+ only sample_interval of first file is reported.
468
+ --> open_dataset take care of just providing filepaths of files with same sample interval.
469
+ In L1 and L2 processing, only filepaths of files with same sample interval
470
+ must be passed to this function.
471
+
334
472
  """
335
473
  import xarray as xr
336
474
 
@@ -341,35 +479,64 @@ def open_netcdf_files(
341
479
  variables = np.unique(variables).tolist()
342
480
 
343
481
  # Define preprocessing function for parallel opening
344
- preprocess = (lambda ds: ds[variables]) if parallel and variables is not None else None
482
+ if parallel and variables is not None:
483
+
484
+ def preprocess(ds):
485
+ return subset_variables(ds, variables)
486
+
487
+ else:
488
+ preprocess = None
345
489
 
346
490
  # Open netcdf
491
+ xr.set_options(use_new_combine_kwarg_defaults=True)
347
492
  ds = xr.open_mfdataset(
348
493
  filepaths,
349
494
  chunks=chunks,
350
- data_vars="all",
351
495
  combine="nested",
352
- join="outer",
353
496
  concat_dim="time",
354
- engine="netcdf4",
355
- parallel=parallel,
356
- preprocess=preprocess,
357
- compat="no_conflicts",
497
+ data_vars="minimal", # ["sample_interval"], "all" would concat all across time
498
+ coords="minimal",
499
+ join="outer", # "exact"
500
+ compat="override", # "no_conflicts" slows down
358
501
  combine_attrs="override",
359
- coords="different", # maybe minimal? would remove lon/lat/alt?
502
+ preprocess=preprocess, # only if parallel=True
503
+ engine=engine,
504
+ parallel=parallel,
505
+ decode_cf=False, # assume encoding do not vary across files (e.g. "time" units)
506
+ decode_coords=False, # no effect if decode_cf=False
360
507
  decode_timedelta=False,
361
508
  cache=False,
362
509
  autoclose=True,
363
510
  **open_kwargs,
364
511
  )
365
- # - Subset variables
512
+
513
+ # Decode CF
514
+ # - Set to coordinates the variables
515
+ # - latitude/longitude/altitude
516
+ # - sample_interval
517
+ # - diameter/velocity bin width/upper/lower
518
+ ds = xr.decode_cf(ds, decode_times=True, decode_coords=True, decode_timedelta=False)
519
+
520
+ # Subset variables
521
+ # --> After decoding CF, when coordinates are properly set
522
+ # --> Othewerwise, coordinate variables would be removed unless listed in variables
366
523
  if variables is not None and preprocess is None:
367
524
  variables = [var for var in variables if var in ds]
368
525
  ds = ds[variables]
369
- # - Subset time
526
+
527
+ # Subset time
370
528
  if start_time is not None or end_time is not None:
371
529
  ds = filter_dataset_by_time(ds, start_time=start_time, end_time=end_time)
372
- # - If compute=True, load in memory and close connections to files
530
+
531
+ # Ensure coordinates are already loaded in memory
532
+ for coord in list(ds.coords):
533
+ ds[coord] = ds[coord].load()
534
+
535
+ # Update time coverage attributes
536
+ ds.attrs["time_coverage_start"] = str(ds.disdrodb.start_time)
537
+ ds.attrs["time_coverage_end"] = str(ds.disdrodb.end_time)
538
+
539
+ # If compute=True, load in memory and close connections to files
373
540
  if compute:
374
541
  dataset = ds.compute()
375
542
  ds.close()
@@ -387,8 +554,8 @@ def open_dataset(
387
554
  product,
388
555
  product_kwargs=None,
389
556
  debugging_mode: bool = False,
390
- data_archive_dir: Optional[str] = None,
391
- metadata_archive_dir: Optional[str] = None,
557
+ data_archive_dir: str | None = None,
558
+ metadata_archive_dir: str | None = None,
392
559
  chunks=-1,
393
560
  parallel=False,
394
561
  compute=False,
@@ -430,7 +597,7 @@ def open_dataset(
430
597
  xarray.Dataset
431
598
 
432
599
  """
433
- from disdrodb.l0.l0a_processing import read_l0a_dataframe
600
+ import xarray as xr
434
601
 
435
602
  # Extract product kwargs from open_kwargs
436
603
  product_kwargs = extract_product_kwargs(open_kwargs, product=product)
@@ -464,9 +631,46 @@ def open_dataset(
464
631
 
465
632
  # Open L0A Parquet files
466
633
  if product == "L0A":
467
- return read_l0a_dataframe(filepaths)
634
+ df = open_parquet_files(
635
+ filepaths=filepaths,
636
+ variables=variables,
637
+ start_time=start_time,
638
+ end_time=end_time,
639
+ use_threads=parallel,
640
+ )
641
+ return df
468
642
 
469
643
  # Open DISDRODB netCDF files using xarray
644
+ # - Special handling for L0C product with possible multiple sample intervals
645
+ if product == "L0C":
646
+ dict_sample_intervals = group_filepaths(filepaths, groups="sample_interval")
647
+ if len(dict_sample_intervals) > 1:
648
+ # Open separately each sample interval
649
+ list_ds = [
650
+ open_netcdf_files(
651
+ filepaths=filepaths,
652
+ chunks=chunks,
653
+ start_time=start_time,
654
+ end_time=end_time,
655
+ variables=variables,
656
+ parallel=parallel,
657
+ compute=compute,
658
+ **open_kwargs,
659
+ )
660
+ for filepaths in dict_sample_intervals.values()
661
+ ]
662
+ # Expand sample_interval coordinate for each dataset
663
+ list_ds = [ds.assign_coords(sample_interval=ds.sample_interval.expand_dims(time=ds.time)) for ds in list_ds]
664
+ # Concatenate along time dimension and sort by time
665
+ ds = xr.concat(list_ds, dim="time")
666
+ ds.attrs["measurement_interval"] = list(dict_sample_intervals)
667
+ ds = ds.sortby("time")
668
+ # Update time coverage attributes
669
+ ds.attrs["time_coverage_start"] = str(ds.disdrodb.start_time)
670
+ ds.attrs["time_coverage_end"] = str(ds.disdrodb.end_time)
671
+ return ds
672
+
673
+ # Otherwise, open all files together
470
674
  ds = open_netcdf_files(
471
675
  filepaths=filepaths,
472
676
  chunks=chunks,
@@ -477,10 +681,6 @@ def open_dataset(
477
681
  compute=compute,
478
682
  **open_kwargs,
479
683
  )
480
-
481
- # Ensure coordinates in memory
482
- # for coord in list(ds.coords):
483
- # ds[coord] = ds[coord].compute()
484
684
  return ds
485
685
 
486
686
 
disdrodb/api/path.py CHANGED
@@ -15,13 +15,13 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Define paths within the DISDRODB infrastructure."""
18
+
18
19
  import os
19
20
 
20
21
  from disdrodb.configs import get_data_archive_dir, get_metadata_archive_dir
21
22
  from disdrodb.constants import ARCHIVE_VERSION
22
23
  from disdrodb.utils.directories import check_directory_exists
23
24
  from disdrodb.utils.time import (
24
- ensure_sample_interval_in_seconds,
25
25
  get_file_start_end_time,
26
26
  seconds_to_temporal_resolution,
27
27
  )
@@ -803,10 +803,8 @@ def define_l0b_filename(ds, campaign_name: str, station_name: str) -> str:
803
803
  return filename
804
804
 
805
805
 
806
- def define_l0c_filename(ds, campaign_name: str, station_name: str) -> str:
806
+ def define_l0c_filename(ds, campaign_name: str, station_name: str, sample_interval: str) -> str:
807
807
  """Define L0C file name."""
808
- # TODO: add sample_interval as function argument
809
- sample_interval = int(ensure_sample_interval_in_seconds(ds["sample_interval"]).data.item())
810
808
  temporal_resolution = define_temporal_resolution(sample_interval, rolling=False)
811
809
  starting_time, ending_time = get_file_start_end_time(ds)
812
810
  starting_time = starting_time.strftime("%Y%m%d%H%M%S")
@@ -15,6 +15,7 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Wrapper to check DISDRODB Metadata Archive Compliance from terminal."""
18
+
18
19
  import sys
19
20
 
20
21
  import click
@@ -15,6 +15,7 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Script to check the validity of the DISDRODB products configuration files."""
18
+
18
19
  import sys
19
20
 
20
21
  import click
@@ -15,8 +15,8 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Script to create summary figures and tables for a DISDRODB stationn."""
18
+
18
19
  import sys
19
- from typing import Optional
20
20
 
21
21
  import click
22
22
 
@@ -49,15 +49,15 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
49
49
  )
50
50
  def disdrodb_create_summary(
51
51
  # Stations options
52
- data_sources: Optional[str] = None,
53
- campaign_names: Optional[str] = None,
54
- station_names: Optional[str] = None,
52
+ data_sources: str | None = None,
53
+ campaign_names: str | None = None,
54
+ station_names: str | None = None,
55
55
  # Processing options:
56
56
  parallel=False,
57
57
  temporal_resolution="1MIN",
58
58
  # DISDRODB root directories
59
- data_archive_dir: Optional[str] = None,
60
- metadata_archive_dir: Optional[str] = None,
59
+ data_archive_dir: str | None = None,
60
+ metadata_archive_dir: str | None = None,
61
61
  ):
62
62
  """Create summary figures and tables for DISDRODB stations.
63
63
 
@@ -15,8 +15,8 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Script to create summary figures and tables for a DISDRODB station."""
18
+
18
19
  import sys
19
- from typing import Optional
20
20
 
21
21
  import click
22
22
 
@@ -53,7 +53,7 @@ def disdrodb_create_summary_station(
53
53
  parallel=False,
54
54
  temporal_resolution="1MIN",
55
55
  # DISDRODB root directories
56
- data_archive_dir: Optional[str] = None,
56
+ data_archive_dir: str | None = None,
57
57
  ):
58
58
  """Create summary figures and tables for a specific DISDRODB station.
59
59
 
@@ -15,6 +15,7 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Routine to print the DISDRODB Data Archive directory."""
18
+
18
19
  import sys
19
20
 
20
21
  import click
@@ -17,7 +17,6 @@
17
17
  """Wrapper to download stations from the DISDRODB Decentralized Data Archive."""
18
18
 
19
19
  import sys
20
- from typing import Optional
21
20
 
22
21
  import click
23
22
 
@@ -38,11 +37,11 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
38
37
  @click_metadata_archive_dir_option
39
38
  @click_download_options
40
39
  def disdrodb_download_archive(
41
- data_sources: Optional[str] = None,
42
- campaign_names: Optional[str] = None,
43
- station_names: Optional[str] = None,
44
- data_archive_dir: Optional[str] = None,
45
- metadata_archive_dir: Optional[str] = None,
40
+ data_sources: str | None = None,
41
+ campaign_names: str | None = None,
42
+ station_names: str | None = None,
43
+ data_archive_dir: str | None = None,
44
+ metadata_archive_dir: str | None = None,
46
45
  force: bool = False,
47
46
  ):
48
47
  """Download raw data for multiple DISDRODB stations from the DISDRODB Decentralized Data Archive.
@@ -15,6 +15,7 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Routine to download the DISDRODB Metadata Data Archive."""
18
+
18
19
  import sys
19
20
  from pathlib import Path
20
21
 
@@ -17,7 +17,6 @@
17
17
  """Routine to download station data from the DISDRODB Decentralized Data Archive."""
18
18
 
19
19
  import sys
20
- from typing import Optional
21
20
 
22
21
  import click
23
22
 
@@ -41,8 +40,8 @@ def disdrodb_download_station(
41
40
  data_source: str,
42
41
  campaign_name: str,
43
42
  station_name: str,
44
- data_archive_dir: Optional[str] = None,
45
- metadata_archive_dir: Optional[str] = None,
43
+ data_archive_dir: str | None = None,
44
+ metadata_archive_dir: str | None = None,
46
45
  force: bool = False,
47
46
  ):
48
47
  """Download raw data of a single station from the DISDRODB Decentralized Data Archive.
@@ -15,8 +15,8 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Script to initialize the DISDRODB station directory structure."""
18
+
18
19
  import sys
19
- from typing import Optional
20
20
 
21
21
  import click
22
22
 
@@ -43,8 +43,8 @@ def disdrodb_initialize_station(
43
43
  campaign_name: str,
44
44
  station_name: str,
45
45
  # DISDRODB root directories
46
- data_archive_dir: Optional[str] = None,
47
- metadata_archive_dir: Optional[str] = None,
46
+ data_archive_dir: str | None = None,
47
+ metadata_archive_dir: str | None = None,
48
48
  ):
49
49
  """Initialize the DISDRODB directory structure for a new station.
50
50
 
@@ -15,6 +15,7 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Routine to print the DISDRODB Data Archive directory."""
18
+
18
19
  import sys
19
20
 
20
21
  import click
@@ -17,7 +17,6 @@
17
17
  """Routine to open the DISDRODB Data Archive."""
18
18
 
19
19
  import sys
20
- from typing import Optional
21
20
 
22
21
  import click
23
22
 
@@ -32,7 +31,7 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
32
31
  @click.command()
33
32
  @click_data_archive_dir_option
34
33
  def disdrodb_open_data_archive(
35
- data_archive_dir: Optional[str] = None,
34
+ data_archive_dir: str | None = None,
36
35
  ):
37
36
  """Open the DISDRODB Data Archive directory in the system file explorer.
38
37
 
@@ -17,7 +17,6 @@
17
17
  """Routine to open the DISDRODB Data Archive logs directory."""
18
18
 
19
19
  import sys
20
- from typing import Optional
21
20
 
22
21
  import click
23
22
 
@@ -36,8 +35,8 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
36
35
  def disdrodb_open_logs_directory(
37
36
  data_source: str,
38
37
  campaign_name: str,
39
- station_name: Optional[str] = None,
40
- data_archive_dir: Optional[str] = None,
38
+ station_name: str | None = None,
39
+ data_archive_dir: str | None = None,
41
40
  ):
42
41
  """Open the DISDRODB Data Archive station logs directory in the system file explorer.
43
42
 
@@ -17,7 +17,6 @@
17
17
  """Routine to open the DISDRODB Metadata Data Archive."""
18
18
 
19
19
  import sys
20
- from typing import Optional
21
20
 
22
21
  import click
23
22
 
@@ -32,7 +31,7 @@ sys.tracebacklimit = 0 # avoid full traceback error if occur
32
31
  @click.command()
33
32
  @click_metadata_archive_dir_option
34
33
  def disdrodb_open_metadata_archive(
35
- metadata_archive_dir: Optional[str] = None,
34
+ metadata_archive_dir: str | None = None,
36
35
  ):
37
36
  """Open the DISDRODB Metadata Archive directory in the system file explorer.
38
37