disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -3
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/io.py +14 -17
  7. disdrodb/api/path.py +42 -77
  8. disdrodb/api/search.py +89 -23
  9. disdrodb/cli/disdrodb_create_summary.py +11 -1
  10. disdrodb/cli/disdrodb_create_summary_station.py +10 -0
  11. disdrodb/cli/disdrodb_run_l0.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  15. disdrodb/cli/disdrodb_run_l1.py +1 -1
  16. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  17. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  18. disdrodb/configs.py +30 -83
  19. disdrodb/constants.py +4 -3
  20. disdrodb/data_transfer/download_data.py +4 -2
  21. disdrodb/docs.py +2 -2
  22. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  29. disdrodb/etc/products/L1/global.yaml +7 -1
  30. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  31. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +1 -1
  33. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
  35. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
  38. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  39. disdrodb/etc/products/L2M/global.yaml +11 -3
  40. disdrodb/l0/check_configs.py +49 -16
  41. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  42. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  43. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  44. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  47. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  48. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  49. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  50. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  51. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  52. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  53. disdrodb/l0/l0_reader.py +2 -2
  54. disdrodb/l0/l0b_processing.py +70 -15
  55. disdrodb/l0/l0c_processing.py +7 -3
  56. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
  57. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  58. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  59. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  60. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  61. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  62. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  63. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  64. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  65. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  66. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  67. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  68. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  69. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  71. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
  72. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  73. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  74. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  75. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  76. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  77. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  78. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  79. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  80. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  81. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  83. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  84. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  85. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  86. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  87. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
  88. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  89. disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
  90. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  91. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
  92. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  93. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  94. disdrodb/l1/beard_model.py +31 -129
  95. disdrodb/l1/fall_velocity.py +136 -83
  96. disdrodb/l1/filters.py +25 -28
  97. disdrodb/l1/processing.py +16 -17
  98. disdrodb/l1/resampling.py +101 -38
  99. disdrodb/l1_env/routines.py +46 -17
  100. disdrodb/l2/empirical_dsd.py +6 -0
  101. disdrodb/l2/processing.py +6 -5
  102. disdrodb/metadata/geolocation.py +0 -2
  103. disdrodb/metadata/search.py +3 -4
  104. disdrodb/psd/fitting.py +16 -13
  105. disdrodb/routines/l0.py +2 -2
  106. disdrodb/routines/l1.py +173 -60
  107. disdrodb/routines/l2.py +148 -284
  108. disdrodb/routines/options.py +345 -0
  109. disdrodb/routines/wrappers.py +14 -1
  110. disdrodb/scattering/axis_ratio.py +90 -84
  111. disdrodb/scattering/permittivity.py +6 -0
  112. disdrodb/summary/routines.py +735 -670
  113. disdrodb/utils/archiving.py +51 -44
  114. disdrodb/utils/attrs.py +3 -1
  115. disdrodb/utils/dask.py +4 -4
  116. disdrodb/utils/dict.py +33 -0
  117. disdrodb/utils/encoding.py +6 -1
  118. disdrodb/utils/routines.py +9 -8
  119. disdrodb/utils/time.py +11 -3
  120. disdrodb/viz/__init__.py +0 -13
  121. disdrodb/viz/plots.py +231 -1
  122. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
  123. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
  124. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  125. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  126. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  127. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
  128. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
  129. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
  130. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
  131. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  132. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
  133. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
  134. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
  135. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0
@@ -23,10 +23,7 @@ import pandas as pd
23
23
  from disdrodb.api.info import get_start_end_time_from_filepaths
24
24
  from disdrodb.api.io import open_netcdf_files
25
25
  from disdrodb.utils.event import group_timesteps_into_event
26
- from disdrodb.utils.time import (
27
- ensure_sorted_by_time,
28
- ensure_timedelta_seconds,
29
- )
26
+ from disdrodb.utils.time import ensure_sorted_by_time, temporal_resolution_to_seconds
30
27
 
31
28
  ####---------------------------------------------------------------------------------
32
29
  #### Time blocks
@@ -140,6 +137,7 @@ def identify_events(
140
137
  neighbor_time_interval : str
141
138
  The time interval around a given a timestep defining the neighborhood.
142
139
  Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
140
+ The neighbor_time_interval must be at least equal to the dataset sampling interval!
143
141
  neighbor_min_size : int, optional
144
142
  The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
145
143
  timestep to be considered non-isolated. Isolated timesteps are removed !
@@ -171,6 +169,12 @@ def identify_events(
171
169
  # Define candidate timesteps to group into events
172
170
  idx_valid = ds["N"].to_numpy() > min_drops
173
171
  timesteps = ds["time"].to_numpy()[idx_valid]
172
+ if "sample_interval" in ds:
173
+ sample_interval = ds["sample_interval"].compute().item()
174
+ if temporal_resolution_to_seconds(neighbor_time_interval) < sample_interval:
175
+ msg = "'neighbor_time_interval' must be at least equal to the dataset sample interval ({sample_interval} s)"
176
+ raise ValueError(msg)
177
+
174
178
  # Define event list
175
179
  event_list = group_timesteps_into_event(
176
180
  timesteps=timesteps,
@@ -326,29 +330,32 @@ def _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_star
326
330
  return results
327
331
 
328
332
 
329
- def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling): # noqa: ARG001
333
+ def group_files_by_temporal_partitions(
334
+ temporal_partitions,
335
+ filepaths,
336
+ block_starts_offset=0,
337
+ block_ends_offset=0,
338
+ ):
330
339
  """
331
340
  Provide information about the required files for each event.
332
341
 
333
- For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
334
- overlap with the event period, adjusted by the `accumulation_interval`. The event period is
335
- extended backward or forward based on the `rolling` parameter.
342
+ For each time block in `temporal_partitions`, the function identifies the `filepaths` that
343
+ overlap such time period. The time blocks of `temporal_partitions` can be adjusted using
344
+ block_starts_offset and block_ends_offset e.g. for resampling applications.
336
345
 
337
346
  Parameters
338
347
  ----------
339
- list_partitions : list of dict
340
- List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
348
+ temporal_partitions : list of dict
349
+ List of time blocks, where each time blocks is a dictionary containing at least 'start_time' and 'end_time'
341
350
  keys with `numpy.datetime64` values.
342
351
  filepaths : list of str
343
352
  List of file paths corresponding to data files.
344
- sample_interval : numpy.timedelta64 or int
345
- The sample interval of the input dataset.
346
- accumulation_interval : numpy.timedelta64 or int
347
- Time interval to adjust the event period for accumulation. If an integer is provided, it is
348
- assumed to be in seconds.
349
- rolling : bool
350
- If True, adjust the event period backward by `accumulation_interval` (rolling backward).
351
- If False, adjust forward (aggregate forward).
353
+ block_starts_offset: int
354
+ Optional offset (in seconds) to add to time blocks starts.
355
+ Provide negative offset to go back in time.
356
+ block_ends_offset: int
357
+ Optional offset (in seconds) to add to time blocks ends.
358
+ Provide negative offset to go back in time.
352
359
 
353
360
  Returns
354
361
  -------
@@ -359,54 +366,54 @@ def get_files_partitions(list_partitions, filepaths, sample_interval, accumulati
359
366
  - 'filepaths': List of file paths overlapping with the adjusted event period.
360
367
 
361
368
  """
362
- if len(filepaths) == 0 or len(list_partitions) == 0:
369
+ if len(filepaths) == 0 or len(temporal_partitions) == 0:
363
370
  return []
364
371
 
365
- # Ensure sample_interval and accumulation_interval is numpy.timedelta64
366
- accumulation_interval = ensure_timedelta_seconds(accumulation_interval)
367
- sample_interval = ensure_timedelta_seconds(sample_interval)
368
-
369
- # Define offset on event_end_time
370
- offset = accumulation_interval if sample_interval != accumulation_interval else ensure_timedelta_seconds(0)
371
-
372
372
  # Retrieve file start_time and end_time
373
373
  files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
374
374
 
375
375
  # Retrieve partitions blocks start and end time arrays
376
- block_starts = np.array([p["start_time"] for p in list_partitions]).astype("M8[s]")
377
- block_ends = np.array([p["end_time"] for p in list_partitions]).astype("M8[s]")
376
+ block_starts = np.array([p["start_time"] for p in temporal_partitions]).astype("M8[s]")
377
+ block_ends = np.array([p["end_time"] for p in temporal_partitions]).astype("M8[s]")
378
378
 
379
- # Add optional offset for resampling
380
- # TODO: expanding partition time should be done only at L1 stage when resampling
381
- # In disdrodb, the time reported is time at the start of the accumulation period !
382
- # If sensors report time at the end of measurement interval, we might being reporting time
383
- # with an inaccuracy equals to the sensor measurement interval.
384
- # We could correct for that at L0C stage already !
385
- block_ends = block_ends + offset
379
+ # Add optional offset to blocks' starts/ends (e.g. for resampling)
380
+ block_starts = block_starts + block_starts_offset
381
+ block_ends = block_ends + block_ends_offset
386
382
 
387
383
  # Map filepaths to corresponding time blocks
388
384
  list_event_info = _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_starts, block_ends)
389
385
  return list_event_info
390
386
 
391
387
 
392
- def get_files_per_time_block(filepaths, freq="day", tolerance_seconds=120):
388
+ def group_files_by_time_block(filepaths, freq="day", tolerance_seconds=120):
393
389
  """
394
- Organize files by the days they cover based on their start and end times.
390
+ Organize files by time blocks based on their start and end times.
391
+
392
+ If tolerance_seconds is specified, it adds some tolerance to files start and end_time.
393
+ This means that files starting/ending next to the time blocks boundaries will be included in both
394
+ time blocks. This can be useful to deal with imprecise time within files.
395
395
 
396
396
  Parameters
397
397
  ----------
398
398
  filepaths : list of str
399
399
  List of file paths to be processed.
400
+ freq: str
401
+ Frequency of the time block. The default frequency is 'day'.
402
+ tolerance_seconds: int
403
+ Tolerance in seconds to subtract/add to files start time and end time.
400
404
 
401
405
  Returns
402
406
  -------
403
- dict
404
- Dictionary where keys are days (as strings) and values are lists of file paths
405
- that cover those days.
407
+ list of dict
408
+ A list where each element is a dictionary containing:
409
+ - 'start_time': Adjusted start time of the event (`datetime.datetime64`).
410
+ - 'end_time': Adjusted end time of the event (`datetime.datetime64`).
411
+ - 'filepaths': List of file paths overlapping with the adjusted event period.
406
412
 
407
413
  Notes
408
414
  -----
409
- This function adds a tolerance of 60 seconds to account for imprecise time logging by the sensors.
415
+ In the DISDRODB L0C processing chain, a tolerance of 120 seconds is used to account
416
+ for the possible imprecise/drifting time logged by the sensors before it is corrected.
410
417
  """
411
418
  # Empty filepaths list return a dictionary
412
419
  if len(filepaths) == 0:
@@ -421,13 +428,13 @@ def get_files_per_time_block(filepaths, freq="day", tolerance_seconds=120):
421
428
  files_end_time = files_end_time + np.array(tolerance_seconds, dtype="m8[s]")
422
429
 
423
430
  # Identify candidate blocks
424
- list_partitions = identify_time_partitions(
431
+ temporal_partitions = identify_time_partitions(
425
432
  start_times=files_start_time,
426
433
  end_times=files_end_time,
427
434
  freq=freq,
428
435
  )
429
- block_starts = np.array([b["start_time"] for b in list_partitions]).astype("M8[s]")
430
- block_ends = np.array([b["end_time"] for b in list_partitions]).astype("M8[s]")
436
+ block_starts = np.array([b["start_time"] for b in temporal_partitions]).astype("M8[s]")
437
+ block_ends = np.array([b["end_time"] for b in temporal_partitions]).astype("M8[s]")
431
438
 
432
439
  # Map filepaths to corresponding time blocks
433
440
  list_event_info = _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_starts, block_ends)
disdrodb/utils/attrs.py CHANGED
@@ -31,7 +31,7 @@ def get_attrs_dict():
31
31
  """Get attributes dictionary for DISDRODB product variables and coordinates."""
32
32
  import disdrodb
33
33
 
34
- configs_path = os.path.join(disdrodb.__root_path__, "disdrodb", "etc", "configs")
34
+ configs_path = os.path.join(disdrodb.package_dir, "etc", "configs")
35
35
  attrs_dict = read_yaml(os.path.join(configs_path, "attributes.yaml"))
36
36
  return attrs_dict
37
37
 
@@ -95,6 +95,8 @@ def update_disdrodb_attrs(ds, product: str):
95
95
  # ----------------------------------------------
96
96
  # Add time_coverage_start and time_coverage_end
97
97
  if "time" in ds.dims:
98
+ ds["time"] = ds["time"].dt.floor("s") # ensure no sub-second values
99
+ ds["time"] = ds["time"].astype("datetime64[s]")
98
100
  attrs["time_coverage_start"] = str(ds["time"].data[0])
99
101
  attrs["time_coverage_end"] = str(ds["time"].data[-1])
100
102
 
disdrodb/utils/dask.py CHANGED
@@ -134,16 +134,16 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
134
134
  """
135
135
  from dask.distributed import get_client
136
136
 
137
+ if not parallel:
138
+ # Non-parallel mode: just return results directly
139
+ return list_tasks
140
+
137
141
  # Ensure logs_dir exists
138
142
  os.makedirs(logs_dir, exist_ok=True)
139
143
 
140
144
  # Define file name where to log failed dask tasks
141
145
  failed_log_path = os.path.join(logs_dir, "FAILED_DASK_TASKS.log")
142
146
 
143
- if not parallel:
144
- # Non-parallel mode: just return results directly
145
- return list_tasks
146
-
147
147
  # Ensure we have a Dask client
148
148
  try:
149
149
  client = get_client()
disdrodb/utils/dict.py ADDED
@@ -0,0 +1,33 @@
1
+ # -----------------------------------------------------------------------------.
2
+ # Copyright (c) 2021-2023 DISDRODB developers
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+
17
+ # -----------------------------------------------------------------------------.
18
+ """This module contains functions for manipulating dictionaries."""
19
+
20
+
21
+ def extract_product_kwargs(kwargs, product):
22
+ """Infer product kwargs dictionary."""
23
+ from disdrodb.api.checks import check_product
24
+ from disdrodb.constants import PRODUCTS_ARGUMENTS
25
+
26
+ check_product(product)
27
+ product_kwargs_keys = set(PRODUCTS_ARGUMENTS.get(product, []))
28
+ return extract_dictionary(kwargs, keys=product_kwargs_keys)
29
+
30
+
31
+ def extract_dictionary(dictionary, keys):
32
+ """Extract a subset of keys from the dictionary, removing them from the input dictionary."""
33
+ return {k: dictionary.pop(k) for k in keys if k in dictionary}
@@ -19,6 +19,7 @@
19
19
  """DISDRODB netCDF4 encoding utilities."""
20
20
  import os
21
21
 
22
+ import numpy as np
22
23
  import xarray as xr
23
24
 
24
25
  from disdrodb.utils.yaml import read_yaml
@@ -30,7 +31,7 @@ def get_encodings_dict():
30
31
  """Get encoding dictionary for DISDRODB product variables and coordinates."""
31
32
  import disdrodb
32
33
 
33
- configs_path = os.path.join(disdrodb.__root_path__, "disdrodb", "etc", "configs")
34
+ configs_path = os.path.join(disdrodb.package_dir, "etc", "configs")
34
35
  encodings_dict = read_yaml(os.path.join(configs_path, "encodings.yaml"))
35
36
  return encodings_dict
36
37
 
@@ -66,6 +67,8 @@ def set_encodings(ds: xr.Dataset, encodings_dict: dict) -> xr.Dataset:
66
67
 
67
68
  # Set time encoding
68
69
  if "time" in ds:
70
+ ds["time"] = ds["time"].dt.floor("s") # ensure no sub-second values
71
+ ds["time"] = ds["time"].astype("datetime64[s]")
69
72
  ds["time"].encoding.update(get_time_encoding())
70
73
 
71
74
  # Set the variable encodings
@@ -140,6 +143,8 @@ def get_time_encoding() -> dict:
140
143
  Time encoding.
141
144
  """
142
145
  encoding = {}
146
+ encoding["dtype"] = "int64" # if float trailing sub-seconds values
147
+ encoding["fillvalue"] = np.iinfo(np.int64).max
143
148
  encoding["units"] = EPOCH
144
149
  encoding["calendar"] = "proleptic_gregorian"
145
150
  return encoding
@@ -22,24 +22,28 @@ import shutil
22
22
  import tempfile
23
23
 
24
24
  from disdrodb.api.io import find_files
25
- from disdrodb.api.path import define_file_folder_path, define_temporal_resolution
25
+ from disdrodb.api.path import define_file_folder_path
26
26
  from disdrodb.utils.logger import (
27
27
  close_logger,
28
28
  create_logger_file,
29
29
  log_error,
30
30
  log_info,
31
31
  )
32
+ from disdrodb.utils.time import get_sampling_information
32
33
 
33
34
 
34
- def is_possible_product(accumulation_interval, sample_interval, rolling):
35
+ def is_possible_product(temporal_resolution, sample_interval):
35
36
  """Assess if production is possible given the requested accumulation interval and source sample_interval."""
37
+ # Retrieve accumulation_interval and rolling option
38
+ accumulation_interval, rolling = get_sampling_information(temporal_resolution)
39
+
36
40
  # Avoid rolling product generation at source sample interval
37
41
  if rolling and accumulation_interval == sample_interval:
38
42
  return False
39
43
  # Avoid product generation if the accumulation_interval is less than the sample interval
40
44
  if accumulation_interval < sample_interval:
41
45
  return False
42
- # Avoid producti generation if accumulation_interval is not multiple of sample_interval
46
+ # Avoid product generation if accumulation_interval is not multiple of sample_interval
43
47
  return accumulation_interval % sample_interval == 0
44
48
 
45
49
 
@@ -67,11 +71,8 @@ def try_get_required_filepaths(
67
71
  # If no files available, print informative message
68
72
  except Exception as e:
69
73
  temporal_resolution = ""
70
- if "sample_interval" in product_kwargs and "rolling" in product_kwargs:
71
- temporal_resolution = define_temporal_resolution(
72
- seconds=product_kwargs["sample_interval"],
73
- rolling=product_kwargs["rolling"],
74
- )
74
+ if "temporal_resolution" in product_kwargs:
75
+ temporal_resolution = product_kwargs["temporal_resolution"]
75
76
  print(str(e))
76
77
  msg = (
77
78
  f"{product} processing of {data_source} {campaign_name} {station_name} "
disdrodb/utils/time.py CHANGED
@@ -62,7 +62,7 @@ def seconds_to_temporal_resolution(seconds):
62
62
  return temporal_resolution
63
63
 
64
64
 
65
- def get_resampling_information(temporal_resolution):
65
+ def get_sampling_information(temporal_resolution):
66
66
  """
67
67
  Extract resampling information from the temporal_resolution string.
68
68
 
@@ -127,7 +127,7 @@ def temporal_resolution_to_seconds(temporal_resolution):
127
127
  seconds
128
128
  Duration in seconds.
129
129
  """
130
- seconds, _ = get_resampling_information(temporal_resolution)
130
+ seconds, _ = get_sampling_information(temporal_resolution)
131
131
  return seconds
132
132
 
133
133
 
@@ -235,6 +235,8 @@ def regularize_dataset(
235
235
  time_dim: str = "time",
236
236
  method: Optional[str] = None,
237
237
  fill_value=None,
238
+ start_time=None,
239
+ end_time=None,
238
240
  ):
239
241
  """Regularize a dataset across time dimension with uniform resolution.
240
242
 
@@ -265,7 +267,13 @@ def regularize_dataset(
265
267
  """
266
268
  attrs = xr_obj.attrs.copy()
267
269
  xr_obj = _check_time_sorted(xr_obj, time_dim=time_dim)
268
- start_time, end_time = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
270
+
271
+ # Define start time and end_time
272
+ start, end = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
273
+ if start_time is None:
274
+ start_time = start
275
+ if end_time is None:
276
+ end_time = end
269
277
 
270
278
  # Define new time index
271
279
  new_time_index = pd.date_range(
disdrodb/viz/__init__.py CHANGED
@@ -15,16 +15,3 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """DISDRODB Visualization Module."""
18
- from disdrodb.viz.plots import (
19
- compute_dense_lines,
20
- max_blend_images,
21
- plot_nd,
22
- to_rgba,
23
- )
24
-
25
- __all__ = [
26
- "compute_dense_lines",
27
- "max_blend_images",
28
- "plot_nd",
29
- "to_rgba",
30
- ]
disdrodb/viz/plots.py CHANGED
@@ -20,6 +20,29 @@ import numpy as np
20
20
  import psutil
21
21
  import xarray as xr
22
22
  from matplotlib.colors import LogNorm, Normalize
23
+ from matplotlib.gridspec import GridSpec
24
+
25
+ from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
26
+ from disdrodb.l2.empirical_dsd import get_drop_average_velocity
27
+
28
+ ####-------------------------------------------------------------------------------------------------------
29
+ #### N(D) visualizations
30
+
31
+
32
+ def _single_plot_nd_distribution(drop_number_concentration, diameter, diameter_bin_width):
33
+ fig, ax = plt.subplots(1, 1)
34
+ ax.bar(
35
+ diameter,
36
+ drop_number_concentration,
37
+ width=diameter_bin_width,
38
+ edgecolor="darkgray",
39
+ color="lightgray",
40
+ label="Data",
41
+ )
42
+ ax.set_title("Drop number concentration (N(D))")
43
+ ax.set_xlabel("Drop diameter (mm)")
44
+ ax.set_ylabel("N(D) [m-3 mm-1]")
45
+ return ax
23
46
 
24
47
 
25
48
  def plot_nd(ds, var="drop_number_concentration", cmap=None, norm=None):
@@ -27,8 +50,15 @@ def plot_nd(ds, var="drop_number_concentration", cmap=None, norm=None):
27
50
  # Check inputs
28
51
  if var not in ds:
29
52
  raise ValueError(f"{var} is not a xarray Dataset variable!")
53
+
30
54
  # Check only time and diameter dimensions are specified
31
- # TODO: DIAMETER_DIMENSION, "time"
55
+ if "time" not in ds.dims:
56
+ ax = _single_plot_nd_distribution(
57
+ drop_number_concentration=ds[var],
58
+ diameter=ds["diameter_bin_center"],
59
+ diameter_bin_width=ds["diameter_bin_width"],
60
+ )
61
+ return ax
32
62
 
33
63
  # Select N(D)
34
64
  ds_var = ds[[var]].compute()
@@ -53,6 +83,206 @@ def plot_nd(ds, var="drop_number_concentration", cmap=None, norm=None):
53
83
  return p
54
84
 
55
85
 
86
+ ####-------------------------------------------------------------------------------------------------------
87
+ #### Spectra visualizations
88
+
89
+
90
+ def _check_has_diameter_and_velocity_dims(da):
91
+ if DIAMETER_DIMENSION not in da.dims or VELOCITY_DIMENSION not in da.dims:
92
+ raise ValueError(f"The DataArray must have both '{DIAMETER_DIMENSION}' and '{VELOCITY_DIMENSION}' dimensions.")
93
+ return da
94
+
95
+
96
+ def _get_spectrum_variable(xr_obj, variable):
97
+ if not isinstance(xr_obj, (xr.Dataset, xr.DataArray)):
98
+ raise TypeError("Expecting xarray object as input.")
99
+ if isinstance(xr_obj, xr.Dataset):
100
+ if variable not in xr_obj:
101
+ raise ValueError(f"The dataset do not include {variable=}.")
102
+ xr_obj = xr_obj[variable]
103
+ xr_obj = _check_has_diameter_and_velocity_dims(xr_obj)
104
+ return xr_obj
105
+
106
+
107
+ def plot_spectrum(
108
+ xr_obj,
109
+ variable="raw_drop_number",
110
+ ax=None,
111
+ cmap=None,
112
+ norm=None,
113
+ extend="max",
114
+ add_colorbar=True,
115
+ cbar_kwargs=None,
116
+ title="Drop Spectrum",
117
+ **plot_kwargs,
118
+ ):
119
+ """Plot the spectrum.
120
+
121
+ Parameters
122
+ ----------
123
+ xr_obj : xarray.Dataset or xarray.DataArray
124
+ Input xarray object. If Dataset, the variable to plot must be specified.
125
+ If DataArray, it must have both diameter and velocity dimensions.
126
+ variable : str
127
+ Name of the variable to plot if xr_obj is a Dataset.
128
+ ax : matplotlib.axes.Axes, optional
129
+ Axes to plot on. If None, uses current axes or creates a new one.
130
+ cmap : Colormap, optional
131
+ Colormap to use. If None, uses 'Spectral_r' with 'under' set to 'none'.
132
+ norm : matplotlib.colors.Normalize, optional
133
+ Normalization for colormap. If None, uses LogNorm with vmin=1.
134
+ extend : {'neither', 'both', 'min', 'max'}, optional
135
+ Whether to draw arrows on the colorbar to indicate out-of-range values.
136
+ Default is 'max'.
137
+ add_colorbar : bool, optional
138
+ Whether to add a colorbar. Default is True.
139
+ cbar_kwargs : dict, optional
140
+ Additional keyword arguments for colorbar. If None, uses {'label': 'Number of particles '}.
141
+ title : str, optional
142
+ Title of the plot. Default is 'Drop Spectrum'.
143
+ **plot_kwargs : dict
144
+ Additional keyword arguments passed to xarray's plot.pcolormesh method.
145
+
146
+ Notes
147
+ -----
148
+ - If the input DataArray has a time dimension, it is summed over time before plotting
149
+ unless FacetGrid options (e.g., col, row) are specified in plot_kwargs.
150
+ - If FacetGrid options are used, the plot will create a grid of subplots for each time slice.
151
+ To create a FacetGrid plot, use:
152
+
153
+ ds.isel(time=slice(0, 9)).disdrodb.plot_spectrum(col="time", col_wrap=3)
154
+
155
+ """
156
+ # Retrieve spectrum
157
+ drop_number = _get_spectrum_variable(xr_obj, variable)
158
+
159
+ # Check if FacetGrid
160
+ is_facetgrid = "col" in plot_kwargs or "row" in plot_kwargs
161
+
162
+ # Sum over time dimension if still present
163
+ # - Unless FacetGrid options in plot_kwargs
164
+ if "time" in drop_number.dims and not is_facetgrid:
165
+ drop_number = drop_number.sum(dim="time")
166
+
167
+ # Define default cbar_kwargs if not specified
168
+ if cbar_kwargs is None:
169
+ cbar_kwargs = {"label": "Number of particles"}
170
+
171
+ # Define cmap and norm
172
+ if cmap is None:
173
+ cmap = plt.get_cmap("Spectral_r").copy()
174
+ cmap.set_under("none")
175
+
176
+ if norm is None:
177
+ norm = LogNorm(vmin=1, vmax=None) if drop_number.sum() > 0 else None
178
+
179
+ # Remove cbar_kwargs if add_colorbar=False
180
+ if not add_colorbar:
181
+ cbar_kwargs = None
182
+
183
+ # Plot
184
+ p = drop_number.plot.pcolormesh(
185
+ ax=ax,
186
+ x=DIAMETER_DIMENSION,
187
+ y=VELOCITY_DIMENSION,
188
+ cmap=cmap,
189
+ extend=extend,
190
+ norm=norm,
191
+ add_colorbar=add_colorbar,
192
+ cbar_kwargs=cbar_kwargs,
193
+ **plot_kwargs,
194
+ )
195
+ if not is_facetgrid:
196
+ p.axes.set_xlabel("Diamenter [mm]")
197
+ p.axes.set_ylabel("Fall velocity [m/s]")
198
+ p.axes.set_title(title)
199
+ else:
200
+ p.set_axis_labels("Diameter [mm]", "Fall velocity [m/s]")
201
+
202
+ return p
203
+
204
+
205
+ def plot_raw_and_filtered_spectra(
206
+ ds,
207
+ cmap=None,
208
+ norm=None,
209
+ extend="max",
210
+ add_theoretical_average_velocity=True,
211
+ add_measured_average_velocity=True,
212
+ figsize=(8, 4),
213
+ dpi=300,
214
+ ):
215
+ """Plot raw and filtered drop spectrum."""
216
+ # Retrieve spectrum arrays
217
+ drop_number = _get_spectrum_variable(ds, variable="drop_number")
218
+ if "time" in drop_number.dims:
219
+ drop_number = drop_number.sum(dim="time")
220
+ drop_number = drop_number.compute()
221
+
222
+ raw_drop_number = _get_spectrum_variable(ds, variable="raw_drop_number")
223
+ if "time" in raw_drop_number.dims:
224
+ raw_drop_number = raw_drop_number.sum(dim="time")
225
+ raw_drop_number = raw_drop_number.compute()
226
+
227
+ # Compute theoretical and measured average velocity if asked
228
+ if add_theoretical_average_velocity:
229
+ theoretical_average_velocity = ds["fall_velocity"]
230
+ if "time" in theoretical_average_velocity.dims:
231
+ theoretical_average_velocity = theoretical_average_velocity.mean(dim="time")
232
+ if add_measured_average_velocity:
233
+ measured_average_velocity = get_drop_average_velocity(drop_number)
234
+
235
+ # Define norm if not specified
236
+ if norm is None:
237
+ norm = LogNorm(1, raw_drop_number.max())
238
+
239
+ # Initialize figure
240
+ fig = plt.figure(figsize=figsize, dpi=dpi)
241
+ gs = GridSpec(1, 2, width_ratios=[1, 1.15], wspace=0.05) # More space for ax2
242
+ ax1 = fig.add_subplot(gs[0])
243
+ ax2 = fig.add_subplot(gs[1])
244
+
245
+ # Plot raw_drop_number
246
+ plot_spectrum(raw_drop_number, ax=ax1, cmap=cmap, norm=norm, extend=extend, add_colorbar=False, title="")
247
+
248
+ # Add velocities if asked
249
+ if add_theoretical_average_velocity:
250
+ theoretical_average_velocity.plot(ax=ax1, c="k", linestyle="dashed")
251
+ if add_measured_average_velocity:
252
+ measured_average_velocity.plot(ax=ax1, c="k", linestyle="dotted")
253
+
254
+ # Improve plot appearance
255
+ ax1.set_xlabel("Diamenter [mm]")
256
+ ax1.set_ylabel("Fall velocity [m/s]")
257
+ ax1.set_title("Raw Spectrum")
258
+
259
+ # Plot drop_number
260
+ plot_spectrum(drop_number, ax=ax2, cmap=cmap, norm=norm, extend=extend, add_colorbar=True, title="")
261
+
262
+ # Add velocities if asked
263
+ if add_theoretical_average_velocity:
264
+ theoretical_average_velocity.plot(ax=ax2, c="k", linestyle="dashed", label="Theoretical velocity")
265
+ if add_measured_average_velocity:
266
+ measured_average_velocity.plot(ax=ax2, c="k", linestyle="dotted", label="Measured average velocity")
267
+
268
+ # Improve plot appearance
269
+ ax2.set_yticks([])
270
+ ax2.set_yticklabels([])
271
+ ax2.set_xlabel("Diamenter [mm]")
272
+ ax2.set_ylabel("")
273
+ ax2.set_title("Filtered Spectrum")
274
+
275
+ # Add legend
276
+ if add_theoretical_average_velocity or add_measured_average_velocity:
277
+ ax2.legend(loc="lower right", frameon=False)
278
+
279
+ return fig
280
+
281
+
282
+ ####-------------------------------------------------------------------------------------------------------
283
+ #### DenseLines
284
+
285
+
56
286
  def normalize_array(arr, method="max"):
57
287
  """Normalize a NumPy array according to the chosen method.
58
288
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: disdrodb
3
- Version: 0.1.4
3
+ Version: 0.2.0
4
4
  Summary: disdrodb provides tools to download, standardize, share and analyze global disdrometer data.
5
5
  Author: Gionata Ghiggi
6
6
  Project-URL: homepage, https://github.com/ltelab/disdrodb
@@ -33,6 +33,7 @@ Requires-Dist: numpy
33
33
  Requires-Dist: scipy
34
34
  Requires-Dist: dask[distributed]
35
35
  Requires-Dist: xarray
36
+ Requires-Dist: bottleneck
36
37
  Requires-Dist: matplotlib
37
38
  Provides-Extra: dev
38
39
  Requires-Dist: jupyter; extra == "dev"