disdrodb 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +22 -4
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/io.py +20 -18
  6. disdrodb/api/path.py +42 -77
  7. disdrodb/api/search.py +89 -23
  8. disdrodb/cli/disdrodb_create_summary.py +1 -1
  9. disdrodb/cli/disdrodb_run_l0.py +1 -1
  10. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  11. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  13. disdrodb/cli/disdrodb_run_l1.py +1 -1
  14. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  15. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  16. disdrodb/configs.py +30 -83
  17. disdrodb/constants.py +4 -3
  18. disdrodb/data_transfer/download_data.py +4 -2
  19. disdrodb/docs.py +2 -2
  20. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  21. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  22. disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/global.yaml +6 -0
  29. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  30. disdrodb/etc/products/L2E/global.yaml +1 -1
  31. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/global.yaml +1 -1
  33. disdrodb/issue/checks.py +2 -2
  34. disdrodb/l0/check_configs.py +1 -1
  35. disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
  37. disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
  38. disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
  39. disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
  40. disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
  41. disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
  42. disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
  43. disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
  44. disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
  48. disdrodb/l0/l0_reader.py +2 -2
  49. disdrodb/l0/l0a_processing.py +6 -2
  50. disdrodb/l0/l0b_processing.py +26 -19
  51. disdrodb/l0/l0c_processing.py +17 -3
  52. disdrodb/l0/manuals/LPM_V0.pdf +0 -0
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
  54. disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
  55. disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
  56. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  57. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
  58. disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
  59. disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
  60. disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
  61. disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
  62. disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
  63. disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
  64. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  65. disdrodb/l0/readers/{PARSIVEL2 → PARSIVEL}/NASA/LPVEX.py +16 -28
  66. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +3 -3
  68. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  69. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
  73. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
  75. disdrodb/l0/readers/PARSIVEL2/{GPM/GCPEX.py → NORWAY/UIB.py} +54 -29
  76. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/{PANGASA.py → PAGASA.py} +6 -3
  77. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -1
  78. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  79. disdrodb/l0/readers/{PARSIVEL/GPM/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
  80. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +51 -24
  81. disdrodb/l0/readers/{PARSIVEL/GPM/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
  82. disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
  83. disdrodb/l1/beard_model.py +45 -1
  84. disdrodb/l1/fall_velocity.py +1 -6
  85. disdrodb/l1/filters.py +2 -0
  86. disdrodb/l1/processing.py +6 -5
  87. disdrodb/l1/resampling.py +101 -38
  88. disdrodb/l2/empirical_dsd.py +12 -8
  89. disdrodb/l2/processing.py +4 -3
  90. disdrodb/metadata/search.py +3 -4
  91. disdrodb/routines/l0.py +4 -4
  92. disdrodb/routines/l1.py +173 -60
  93. disdrodb/routines/l2.py +121 -269
  94. disdrodb/routines/options.py +347 -0
  95. disdrodb/routines/wrappers.py +9 -1
  96. disdrodb/scattering/axis_ratio.py +3 -0
  97. disdrodb/scattering/routines.py +1 -1
  98. disdrodb/summary/routines.py +765 -724
  99. disdrodb/utils/archiving.py +51 -44
  100. disdrodb/utils/attrs.py +1 -1
  101. disdrodb/utils/compression.py +4 -2
  102. disdrodb/utils/dask.py +35 -15
  103. disdrodb/utils/dict.py +33 -0
  104. disdrodb/utils/encoding.py +1 -1
  105. disdrodb/utils/manipulations.py +7 -1
  106. disdrodb/utils/routines.py +9 -8
  107. disdrodb/utils/time.py +9 -1
  108. disdrodb/viz/__init__.py +0 -13
  109. disdrodb/viz/plots.py +209 -0
  110. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
  111. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/RECORD +124 -95
  112. disdrodb/l0/readers/PARSIVEL/GPM/LPVEX.py +0 -85
  113. /disdrodb/etc/products/L2M/{GAMMA_GS_ND_MAE.yaml → MODELS/GAMMA_GS_ND_MAE.yaml} +0 -0
  114. /disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +0 -0
  115. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_LOG_ND_MAE.yaml → MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml} +0 -0
  116. /disdrodb/etc/products/L2M/{LOGNORMAL_GS_ND_MAE.yaml → MODELS/LOGNORMAL_GS_ND_MAE.yaml} +0 -0
  117. /disdrodb/etc/products/L2M/{LOGNORMAL_ML.yaml → MODELS/LOGNORMAL_ML.yaml} +0 -0
  118. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  119. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  120. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  121. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  122. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
  124. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
  125. {disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0
@@ -23,10 +23,7 @@ import pandas as pd
23
23
  from disdrodb.api.info import get_start_end_time_from_filepaths
24
24
  from disdrodb.api.io import open_netcdf_files
25
25
  from disdrodb.utils.event import group_timesteps_into_event
26
- from disdrodb.utils.time import (
27
- ensure_sorted_by_time,
28
- ensure_timedelta_seconds,
29
- )
26
+ from disdrodb.utils.time import ensure_sorted_by_time, temporal_resolution_to_seconds
30
27
 
31
28
  ####---------------------------------------------------------------------------------
32
29
  #### Time blocks
@@ -140,6 +137,7 @@ def identify_events(
140
137
  neighbor_time_interval : str
141
138
  The time interval around a given a timestep defining the neighborhood.
142
139
  Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
140
+ The neighbor_time_interval must be at least equal to the dataset sampling interval!
143
141
  neighbor_min_size : int, optional
144
142
  The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
145
143
  timestep to be considered non-isolated. Isolated timesteps are removed !
@@ -171,6 +169,12 @@ def identify_events(
171
169
  # Define candidate timesteps to group into events
172
170
  idx_valid = ds["N"].to_numpy() > min_drops
173
171
  timesteps = ds["time"].to_numpy()[idx_valid]
172
+ if "sample_interval" in ds:
173
+ sample_interval = ds["sample_interval"].compute().item()
174
+ if temporal_resolution_to_seconds(neighbor_time_interval) < sample_interval:
175
+ msg = "'neighbor_time_interval' must be at least equal to the dataset sample interval ({sample_interval} s)"
176
+ raise ValueError(msg)
177
+
174
178
  # Define event list
175
179
  event_list = group_timesteps_into_event(
176
180
  timesteps=timesteps,
@@ -326,29 +330,32 @@ def _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_star
326
330
  return results
327
331
 
328
332
 
329
- def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling): # noqa: ARG001
333
+ def group_files_by_temporal_partitions(
334
+ temporal_partitions,
335
+ filepaths,
336
+ block_starts_offset=0,
337
+ block_ends_offset=0,
338
+ ):
330
339
  """
331
340
  Provide information about the required files for each event.
332
341
 
333
- For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
334
- overlap with the event period, adjusted by the `accumulation_interval`. The event period is
335
- extended backward or forward based on the `rolling` parameter.
342
+ For each time block in `temporal_partitions`, the function identifies the `filepaths` that
343
+ overlap such time period. The time blocks of `temporal_partitions` can be adjusted using
344
+ block_starts_offset and block_ends_offset e.g. for resampling applications.
336
345
 
337
346
  Parameters
338
347
  ----------
339
- list_partitions : list of dict
340
- List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
348
+ temporal_partitions : list of dict
349
+ List of time blocks, where each time blocks is a dictionary containing at least 'start_time' and 'end_time'
341
350
  keys with `numpy.datetime64` values.
342
351
  filepaths : list of str
343
352
  List of file paths corresponding to data files.
344
- sample_interval : numpy.timedelta64 or int
345
- The sample interval of the input dataset.
346
- accumulation_interval : numpy.timedelta64 or int
347
- Time interval to adjust the event period for accumulation. If an integer is provided, it is
348
- assumed to be in seconds.
349
- rolling : bool
350
- If True, adjust the event period backward by `accumulation_interval` (rolling backward).
351
- If False, adjust forward (aggregate forward).
353
+ block_starts_offset: int
354
+ Optional offset (in seconds) to add to time blocks starts.
355
+ Provide negative offset to go back in time.
356
+ block_ends_offset: int
357
+ Optional offset (in seconds) to add to time blocks ends.
358
+ Provide negative offset to go back in time.
352
359
 
353
360
  Returns
354
361
  -------
@@ -359,54 +366,54 @@ def get_files_partitions(list_partitions, filepaths, sample_interval, accumulati
359
366
  - 'filepaths': List of file paths overlapping with the adjusted event period.
360
367
 
361
368
  """
362
- if len(filepaths) == 0 or len(list_partitions) == 0:
369
+ if len(filepaths) == 0 or len(temporal_partitions) == 0:
363
370
  return []
364
371
 
365
- # Ensure sample_interval and accumulation_interval is numpy.timedelta64
366
- accumulation_interval = ensure_timedelta_seconds(accumulation_interval)
367
- sample_interval = ensure_timedelta_seconds(sample_interval)
368
-
369
- # Define offset on event_end_time
370
- offset = accumulation_interval if sample_interval != accumulation_interval else ensure_timedelta_seconds(0)
371
-
372
372
  # Retrieve file start_time and end_time
373
373
  files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
374
374
 
375
375
  # Retrieve partitions blocks start and end time arrays
376
- block_starts = np.array([p["start_time"] for p in list_partitions]).astype("M8[s]")
377
- block_ends = np.array([p["end_time"] for p in list_partitions]).astype("M8[s]")
376
+ block_starts = np.array([p["start_time"] for p in temporal_partitions]).astype("M8[s]")
377
+ block_ends = np.array([p["end_time"] for p in temporal_partitions]).astype("M8[s]")
378
378
 
379
- # Add optional offset for resampling
380
- # TODO: expanding partition time should be done only at L1 stage when resampling
381
- # In disdrodb, the time reported is time at the start of the accumulation period !
382
- # If sensors report time at the end of measurement interval, we might being reporting time
383
- # with an inaccuracy equals to the sensor measurement interval.
384
- # We could correct for that at L0C stage already !
385
- block_ends = block_ends + offset
379
+ # Add optional offset to blocks' starts/ends (e.g. for resampling)
380
+ block_starts = block_starts + block_starts_offset
381
+ block_ends = block_ends + block_ends_offset
386
382
 
387
383
  # Map filepaths to corresponding time blocks
388
384
  list_event_info = _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_starts, block_ends)
389
385
  return list_event_info
390
386
 
391
387
 
392
- def get_files_per_time_block(filepaths, freq="day", tolerance_seconds=120):
388
+ def group_files_by_time_block(filepaths, freq="day", tolerance_seconds=120):
393
389
  """
394
- Organize files by the days they cover based on their start and end times.
390
+ Organize files by time blocks based on their start and end times.
391
+
392
+ If tolerance_seconds is specified, it adds some tolerance to files start and end_time.
393
+ This means that files starting/ending next to the time blocks boundaries will be included in both
394
+ time blocks. This can be useful to deal with imprecise time within files.
395
395
 
396
396
  Parameters
397
397
  ----------
398
398
  filepaths : list of str
399
399
  List of file paths to be processed.
400
+ freq: str
401
+ Frequency of the time block. The default frequency is 'day'.
402
+ tolerance_seconds: int
403
+ Tolerance in seconds to subtract/add to files start time and end time.
400
404
 
401
405
  Returns
402
406
  -------
403
- dict
404
- Dictionary where keys are days (as strings) and values are lists of file paths
405
- that cover those days.
407
+ list of dict
408
+ A list where each element is a dictionary containing:
409
+ - 'start_time': Adjusted start time of the event (`datetime.datetime64`).
410
+ - 'end_time': Adjusted end time of the event (`datetime.datetime64`).
411
+ - 'filepaths': List of file paths overlapping with the adjusted event period.
406
412
 
407
413
  Notes
408
414
  -----
409
- This function adds a tolerance of 60 seconds to account for imprecise time logging by the sensors.
415
+ In the DISDRODB L0C processing chain, a tolerance of 120 seconds is used to account
416
+ for the possible imprecise/drifting time logged by the sensors before it is corrected.
410
417
  """
411
418
  # Empty filepaths list return a dictionary
412
419
  if len(filepaths) == 0:
@@ -421,13 +428,13 @@ def get_files_per_time_block(filepaths, freq="day", tolerance_seconds=120):
421
428
  files_end_time = files_end_time + np.array(tolerance_seconds, dtype="m8[s]")
422
429
 
423
430
  # Identify candidate blocks
424
- list_partitions = identify_time_partitions(
431
+ temporal_partitions = identify_time_partitions(
425
432
  start_times=files_start_time,
426
433
  end_times=files_end_time,
427
434
  freq=freq,
428
435
  )
429
- block_starts = np.array([b["start_time"] for b in list_partitions]).astype("M8[s]")
430
- block_ends = np.array([b["end_time"] for b in list_partitions]).astype("M8[s]")
436
+ block_starts = np.array([b["start_time"] for b in temporal_partitions]).astype("M8[s]")
437
+ block_ends = np.array([b["end_time"] for b in temporal_partitions]).astype("M8[s]")
431
438
 
432
439
  # Map filepaths to corresponding time blocks
433
440
  list_event_info = _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_starts, block_ends)
disdrodb/utils/attrs.py CHANGED
@@ -31,7 +31,7 @@ def get_attrs_dict():
31
31
  """Get attributes dictionary for DISDRODB product variables and coordinates."""
32
32
  import disdrodb
33
33
 
34
- configs_path = os.path.join(disdrodb.__root_path__, "disdrodb", "etc", "configs")
34
+ configs_path = os.path.join(disdrodb.package_dir, "etc", "configs")
35
35
  attrs_dict = read_yaml(os.path.join(configs_path, "attributes.yaml"))
36
36
  return attrs_dict
37
37
 
@@ -82,7 +82,7 @@ def unzip_file_on_terminal(filepath: str, dest_path: str) -> str:
82
82
  subprocess.run(cmd, check=True)
83
83
 
84
84
 
85
- def _zip_dir(dir_path: str) -> str:
85
+ def _zip_dir(dir_path: str, dst_dir=None) -> str:
86
86
  """Zip a directory into a file located in the same directory.
87
87
 
88
88
  Parameters
@@ -95,7 +95,9 @@ def _zip_dir(dir_path: str) -> str:
95
95
  str
96
96
  Path of the zip archive.
97
97
  """
98
- output_path_without_extension = os.path.join(tempfile.gettempdir(), os.path.basename(dir_path))
98
+ if dst_dir is None:
99
+ dst_dir = tempfile.gettempdir()
100
+ output_path_without_extension = os.path.join(dst_dir, os.path.basename(dir_path))
99
101
  output_path = output_path_without_extension + ".zip"
100
102
  shutil.make_archive(output_path_without_extension, "zip", dir_path)
101
103
  return output_path
disdrodb/utils/dask.py CHANGED
@@ -113,7 +113,13 @@ def close_dask_cluster(cluster, client):
113
113
  logger.setLevel(original_level)
114
114
 
115
115
 
116
- def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
116
+ def _batch_iterable(iterable, n):
117
+ """Yield successive n-sized chunks from iterable."""
118
+ for i in range(0, len(iterable), n):
119
+ yield iterable[i : i + n]
120
+
121
+
122
+ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str, max_tasks_per_batch=5_000):
117
123
  """
118
124
  Execute Dask tasks and skip failed ones.
119
125
 
@@ -125,6 +131,9 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
125
131
  Whether to execute in parallel with Dask or not.
126
132
  logs_dir : str
127
133
  Directory to store FAILED_TASKS.log.
134
+ max_tasks_per_batch : int or None, optional
135
+ Maximum number of tasks to submit to `client.compute()` at once.
136
+ The default is 5000. Dask struggle if more than 10_000 tasks are submitted.
128
137
 
129
138
  Returns
130
139
  -------
@@ -134,34 +143,45 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
134
143
  """
135
144
  from dask.distributed import get_client
136
145
 
146
+ if not parallel:
147
+ # Non-parallel mode: just return results directly
148
+ return list_tasks
149
+
137
150
  # Ensure logs_dir exists
138
151
  os.makedirs(logs_dir, exist_ok=True)
139
152
 
140
153
  # Define file name where to log failed dask tasks
141
154
  failed_log_path = os.path.join(logs_dir, "FAILED_DASK_TASKS.log")
142
155
 
143
- if not parallel:
144
- # Non-parallel mode: just return results directly
145
- return list_tasks
146
-
147
156
  # Ensure we have a Dask client
148
157
  try:
149
158
  client = get_client()
150
159
  except ValueError:
151
160
  raise ValueError("No Dask Distributed Client found.")
152
161
 
153
- # Compute tasks (all concurrently)
154
- # - Runs tasks == num_workers * threads_per_worker (which is 1 for DISDRODB)
155
- # - If errors occurs in some, skip it
156
- futures = client.compute(list_tasks)
157
- results = client.gather(futures, errors="skip")
162
+ all_results = []
163
+ failed_futures = []
164
+
165
+ # Batch execution
166
+ task_batches = list(_batch_iterable(list_tasks, max_tasks_per_batch)) if max_tasks_per_batch else [list_tasks]
167
+
168
+ for batch in task_batches:
169
+ # Compute tasks (all concurrently)
170
+ # - Runs tasks == num_workers * threads_per_worker (which is 1 for DISDRODB)
171
+ # - If errors occurs in some, skip it
172
+ futures = client.compute(batch)
173
+ results = client.gather(futures, errors="skip")
174
+
175
+ # Identify and collect failed futures
176
+ batch_failed = [f for f in futures if f.status != "finished"]
177
+ failed_futures.extend(batch_failed)
158
178
 
159
- # Collect failed futures
160
- failed_futures = [f for f in futures if f.status != "finished"] # "error"
179
+ # Collect results from successful tasks
180
+ all_results.extend(results)
161
181
 
162
182
  # If no tasks failed, return results
163
183
  if not failed_futures:
164
- return results
184
+ return all_results
165
185
 
166
186
  # Otherwise define log file listing failed tasks
167
187
  with open(failed_log_path, "w") as f:
@@ -170,5 +190,5 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
170
190
  f.write(f"ERROR - DASK TASK FAILURE - Task {fut.key} failed: {err}\n")
171
191
 
172
192
  # Append to list of log filepaths (results) the dask failing log
173
- results.append(failed_log_path)
174
- return results
193
+ all_results.append(failed_log_path)
194
+ return all_results
disdrodb/utils/dict.py ADDED
@@ -0,0 +1,33 @@
1
+ # -----------------------------------------------------------------------------.
2
+ # Copyright (c) 2021-2023 DISDRODB developers
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+
17
+ # -----------------------------------------------------------------------------.
18
+ """This module contains functions for manipulating dictionaries."""
19
+
20
+
21
+ def extract_product_kwargs(kwargs, product):
22
+ """Infer product kwargs dictionary."""
23
+ from disdrodb.api.checks import check_product
24
+ from disdrodb.constants import PRODUCTS_ARGUMENTS
25
+
26
+ check_product(product)
27
+ product_kwargs_keys = set(PRODUCTS_ARGUMENTS.get(product, []))
28
+ return extract_dictionary(kwargs, keys=product_kwargs_keys)
29
+
30
+
31
+ def extract_dictionary(dictionary, keys):
32
+ """Extract a subset of keys from the dictionary, removing them from the input dictionary."""
33
+ return {k: dictionary.pop(k) for k in keys if k in dictionary}
@@ -31,7 +31,7 @@ def get_encodings_dict():
31
31
  """Get encoding dictionary for DISDRODB product variables and coordinates."""
32
32
  import disdrodb
33
33
 
34
- configs_path = os.path.join(disdrodb.__root_path__, "disdrodb", "etc", "configs")
34
+ configs_path = os.path.join(disdrodb.package_dir, "etc", "configs")
35
35
  encodings_dict = read_yaml(os.path.join(configs_path, "encodings.yaml"))
36
36
  return encodings_dict
37
37
 
@@ -26,7 +26,13 @@ from disdrodb.utils.xarray import unstack_datarray_dimension
26
26
 
27
27
  def get_diameter_bin_edges(ds):
28
28
  """Retrieve diameter bin edges."""
29
- bin_edges = np.append(ds["diameter_bin_lower"].compute().data, ds["diameter_bin_upper"].compute().data[-1])
29
+ bin_edges = np.append(ds["diameter_bin_lower"].to_numpy(), ds["diameter_bin_upper"].to_numpy()[-1])
30
+ return bin_edges
31
+
32
+
33
+ def get_velocity_bin_edges(ds):
34
+ """Retrieve velocity bin edges."""
35
+ bin_edges = np.append(ds["velocity_bin_lower"].to_numpy(), ds["velocity_bin_upper"].to_numpy()[-1])
30
36
  return bin_edges
31
37
 
32
38
 
@@ -22,24 +22,28 @@ import shutil
22
22
  import tempfile
23
23
 
24
24
  from disdrodb.api.io import find_files
25
- from disdrodb.api.path import define_file_folder_path, define_temporal_resolution
25
+ from disdrodb.api.path import define_file_folder_path
26
26
  from disdrodb.utils.logger import (
27
27
  close_logger,
28
28
  create_logger_file,
29
29
  log_error,
30
30
  log_info,
31
31
  )
32
+ from disdrodb.utils.time import get_sampling_information
32
33
 
33
34
 
34
- def is_possible_product(accumulation_interval, sample_interval, rolling):
35
+ def is_possible_product(temporal_resolution, sample_interval):
35
36
  """Assess if production is possible given the requested accumulation interval and source sample_interval."""
37
+ # Retrieve accumulation_interval and rolling option
38
+ accumulation_interval, rolling = get_sampling_information(temporal_resolution)
39
+
36
40
  # Avoid rolling product generation at source sample interval
37
41
  if rolling and accumulation_interval == sample_interval:
38
42
  return False
39
43
  # Avoid product generation if the accumulation_interval is less than the sample interval
40
44
  if accumulation_interval < sample_interval:
41
45
  return False
42
- # Avoid producti generation if accumulation_interval is not multiple of sample_interval
46
+ # Avoid product generation if accumulation_interval is not multiple of sample_interval
43
47
  return accumulation_interval % sample_interval == 0
44
48
 
45
49
 
@@ -67,11 +71,8 @@ def try_get_required_filepaths(
67
71
  # If no files available, print informative message
68
72
  except Exception as e:
69
73
  temporal_resolution = ""
70
- if "sample_interval" in product_kwargs and "rolling" in product_kwargs:
71
- temporal_resolution = define_temporal_resolution(
72
- seconds=product_kwargs["sample_interval"],
73
- rolling=product_kwargs["rolling"],
74
- )
74
+ if "temporal_resolution" in product_kwargs:
75
+ temporal_resolution = product_kwargs["temporal_resolution"]
75
76
  print(str(e))
76
77
  msg = (
77
78
  f"{product} processing of {data_source} {campaign_name} {station_name} "
disdrodb/utils/time.py CHANGED
@@ -235,6 +235,8 @@ def regularize_dataset(
235
235
  time_dim: str = "time",
236
236
  method: Optional[str] = None,
237
237
  fill_value=None,
238
+ start_time=None,
239
+ end_time=None,
238
240
  ):
239
241
  """Regularize a dataset across time dimension with uniform resolution.
240
242
 
@@ -265,7 +267,13 @@ def regularize_dataset(
265
267
  """
266
268
  attrs = xr_obj.attrs.copy()
267
269
  xr_obj = _check_time_sorted(xr_obj, time_dim=time_dim)
268
- start_time, end_time = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
270
+
271
+ # Define start time and end_time
272
+ start, end = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
273
+ if start_time is None:
274
+ start_time = start
275
+ if end_time is None:
276
+ end_time = end
269
277
 
270
278
  # Define new time index
271
279
  new_time_index = pd.date_range(
disdrodb/viz/__init__.py CHANGED
@@ -15,16 +15,3 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """DISDRODB Visualization Module."""
18
- from disdrodb.viz.plots import (
19
- compute_dense_lines,
20
- max_blend_images,
21
- plot_nd,
22
- to_rgba,
23
- )
24
-
25
- __all__ = [
26
- "compute_dense_lines",
27
- "max_blend_images",
28
- "plot_nd",
29
- "to_rgba",
30
- ]
disdrodb/viz/plots.py CHANGED
@@ -20,6 +20,13 @@ import numpy as np
20
20
  import psutil
21
21
  import xarray as xr
22
22
  from matplotlib.colors import LogNorm, Normalize
23
+ from matplotlib.gridspec import GridSpec
24
+
25
+ from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
26
+ from disdrodb.l2.empirical_dsd import get_drop_average_velocity
27
+
28
+ ####-------------------------------------------------------------------------------------------------------
29
+ #### N(D) visualizations
23
30
 
24
31
 
25
32
  def _single_plot_nd_distribution(drop_number_concentration, diameter, diameter_bin_width):
@@ -76,6 +83,208 @@ def plot_nd(ds, var="drop_number_concentration", cmap=None, norm=None):
76
83
  return p
77
84
 
78
85
 
86
+ ####-------------------------------------------------------------------------------------------------------
87
+ #### Spectra visualizations
88
+
89
+
90
+ def _check_has_diameter_and_velocity_dims(da):
91
+ if DIAMETER_DIMENSION not in da.dims or VELOCITY_DIMENSION not in da.dims:
92
+ raise ValueError(f"The DataArray must have both '{DIAMETER_DIMENSION}' and '{VELOCITY_DIMENSION}' dimensions.")
93
+ return da
94
+
95
+
96
+ def _get_spectrum_variable(xr_obj, variable):
97
+ if not isinstance(xr_obj, (xr.Dataset, xr.DataArray)):
98
+ raise TypeError("Expecting xarray object as input.")
99
+ if VELOCITY_DIMENSION not in xr_obj.dims:
100
+ raise ValueError("2D spectrum not available.")
101
+ if isinstance(xr_obj, xr.Dataset):
102
+ if variable not in xr_obj:
103
+ raise ValueError(f"The dataset do not include {variable=}.")
104
+ xr_obj = xr_obj[variable]
105
+ xr_obj = _check_has_diameter_and_velocity_dims(xr_obj)
106
+ return xr_obj
107
+
108
+
109
+ def plot_spectrum(
110
+ xr_obj,
111
+ variable="raw_drop_number",
112
+ ax=None,
113
+ cmap=None,
114
+ norm=None,
115
+ extend="max",
116
+ add_colorbar=True,
117
+ cbar_kwargs=None,
118
+ title="Drop Spectrum",
119
+ **plot_kwargs,
120
+ ):
121
+ """Plot the spectrum.
122
+
123
+ Parameters
124
+ ----------
125
+ xr_obj : xarray.Dataset or xarray.DataArray
126
+ Input xarray object. If Dataset, the variable to plot must be specified.
127
+ If DataArray, it must have both diameter and velocity dimensions.
128
+ variable : str
129
+ Name of the variable to plot if xr_obj is a Dataset.
130
+ ax : matplotlib.axes.Axes, optional
131
+ Axes to plot on. If None, uses current axes or creates a new one.
132
+ cmap : Colormap, optional
133
+ Colormap to use. If None, uses 'Spectral_r' with 'under' set to 'none'.
134
+ norm : matplotlib.colors.Normalize, optional
135
+ Normalization for colormap. If None, uses LogNorm with vmin=1.
136
+ extend : {'neither', 'both', 'min', 'max'}, optional
137
+ Whether to draw arrows on the colorbar to indicate out-of-range values.
138
+ Default is 'max'.
139
+ add_colorbar : bool, optional
140
+ Whether to add a colorbar. Default is True.
141
+ cbar_kwargs : dict, optional
142
+ Additional keyword arguments for colorbar. If None, uses {'label': 'Number of particles '}.
143
+ title : str, optional
144
+ Title of the plot. Default is 'Drop Spectrum'.
145
+ **plot_kwargs : dict
146
+ Additional keyword arguments passed to xarray's plot.pcolormesh method.
147
+
148
+ Notes
149
+ -----
150
+ - If the input DataArray has a time dimension, it is summed over time before plotting
151
+ unless FacetGrid options (e.g., col, row) are specified in plot_kwargs.
152
+ - If FacetGrid options are used, the plot will create a grid of subplots for each time slice.
153
+ To create a FacetGrid plot, use:
154
+
155
+ ds.isel(time=slice(0, 9)).disdrodb.plot_spectrum(col="time", col_wrap=3)
156
+
157
+ """
158
+ # Retrieve spectrum
159
+ drop_number = _get_spectrum_variable(xr_obj, variable)
160
+
161
+ # Check if FacetGrid
162
+ is_facetgrid = "col" in plot_kwargs or "row" in plot_kwargs
163
+
164
+ # Sum over time dimension if still present
165
+ # - Unless FacetGrid options in plot_kwargs
166
+ if "time" in drop_number.dims and not is_facetgrid:
167
+ drop_number = drop_number.sum(dim="time")
168
+
169
+ # Define default cbar_kwargs if not specified
170
+ if cbar_kwargs is None:
171
+ cbar_kwargs = {"label": "Number of particles"}
172
+
173
+ # Define cmap and norm
174
+ if cmap is None:
175
+ cmap = plt.get_cmap("Spectral_r").copy()
176
+ cmap.set_under("none")
177
+
178
+ if norm is None:
179
+ norm = LogNorm(vmin=1, vmax=None) if drop_number.sum() > 0 else None
180
+
181
+ # Remove cbar_kwargs if add_colorbar=False
182
+ if not add_colorbar:
183
+ cbar_kwargs = None
184
+
185
+ # Plot
186
+ p = drop_number.plot.pcolormesh(
187
+ ax=ax,
188
+ x=DIAMETER_DIMENSION,
189
+ y=VELOCITY_DIMENSION,
190
+ cmap=cmap,
191
+ extend=extend,
192
+ norm=norm,
193
+ add_colorbar=add_colorbar,
194
+ cbar_kwargs=cbar_kwargs,
195
+ **plot_kwargs,
196
+ )
197
+ if not is_facetgrid:
198
+ p.axes.set_xlabel("Diamenter [mm]")
199
+ p.axes.set_ylabel("Fall velocity [m/s]")
200
+ p.axes.set_title(title)
201
+ else:
202
+ p.set_axis_labels("Diameter [mm]", "Fall velocity [m/s]")
203
+
204
+ return p
205
+
206
+
207
+ def plot_raw_and_filtered_spectra(
208
+ ds,
209
+ cmap=None,
210
+ norm=None,
211
+ extend="max",
212
+ add_theoretical_average_velocity=True,
213
+ add_measured_average_velocity=True,
214
+ figsize=(8, 4),
215
+ dpi=300,
216
+ ):
217
+ """Plot raw and filtered drop spectrum."""
218
+ # Retrieve spectrum arrays
219
+ drop_number = _get_spectrum_variable(ds, variable="drop_number")
220
+ if "time" in drop_number.dims:
221
+ drop_number = drop_number.sum(dim="time")
222
+ drop_number = drop_number.compute()
223
+
224
+ raw_drop_number = _get_spectrum_variable(ds, variable="raw_drop_number")
225
+ if "time" in raw_drop_number.dims:
226
+ raw_drop_number = raw_drop_number.sum(dim="time")
227
+ raw_drop_number = raw_drop_number.compute()
228
+
229
+ # Compute theoretical and measured average velocity if asked
230
+ if add_theoretical_average_velocity:
231
+ theoretical_average_velocity = ds["fall_velocity"]
232
+ if "time" in theoretical_average_velocity.dims:
233
+ theoretical_average_velocity = theoretical_average_velocity.mean(dim="time")
234
+ if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
235
+ measured_average_velocity = get_drop_average_velocity(drop_number)
236
+
237
+ # Define norm if not specified
238
+ if norm is None:
239
+ norm = LogNorm(1, raw_drop_number.max())
240
+
241
+ # Initialize figure
242
+ fig = plt.figure(figsize=figsize, dpi=dpi)
243
+ gs = GridSpec(1, 2, width_ratios=[1, 1.15], wspace=0.05) # More space for ax2
244
+ ax1 = fig.add_subplot(gs[0])
245
+ ax2 = fig.add_subplot(gs[1])
246
+
247
+ # Plot raw_drop_number
248
+ plot_spectrum(raw_drop_number, ax=ax1, cmap=cmap, norm=norm, extend=extend, add_colorbar=False, title="")
249
+
250
+ # Add velocities if asked
251
+ if add_theoretical_average_velocity:
252
+ theoretical_average_velocity.plot(ax=ax1, c="k", linestyle="dashed")
253
+ if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
254
+ measured_average_velocity.plot(ax=ax1, c="k", linestyle="dotted")
255
+
256
+ # Improve plot appearance
257
+ ax1.set_xlabel("Diamenter [mm]")
258
+ ax1.set_ylabel("Fall velocity [m/s]")
259
+ ax1.set_title("Raw Spectrum")
260
+
261
+ # Plot drop_number
262
+ plot_spectrum(drop_number, ax=ax2, cmap=cmap, norm=norm, extend=extend, add_colorbar=True, title="")
263
+
264
+ # Add velocities if asked
265
+ if add_theoretical_average_velocity:
266
+ theoretical_average_velocity.plot(ax=ax2, c="k", linestyle="dashed", label="Theoretical velocity")
267
+ if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
268
+ measured_average_velocity.plot(ax=ax2, c="k", linestyle="dotted", label="Measured average velocity")
269
+
270
+ # Improve plot appearance
271
+ ax2.set_yticks([])
272
+ ax2.set_yticklabels([])
273
+ ax2.set_xlabel("Diamenter [mm]")
274
+ ax2.set_ylabel("")
275
+ ax2.set_title("Filtered Spectrum")
276
+
277
+ # Add legend
278
+ if add_theoretical_average_velocity or add_measured_average_velocity:
279
+ ax2.legend(loc="lower right", frameon=False)
280
+
281
+ return fig
282
+
283
+
284
+ ####-------------------------------------------------------------------------------------------------------
285
+ #### DenseLines
286
+
287
+
79
288
  def normalize_array(arr, method="max"):
80
289
  """Normalize a NumPy array according to the chosen method.
81
290