disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +5 -5
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  37. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  38. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  39. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  40. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  41. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  42. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  43. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  44. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  45. disdrodb/l0/l0a_processing.py +30 -30
  46. disdrodb/l0/l0b_nc_processing.py +108 -2
  47. disdrodb/l0/l0b_processing.py +4 -4
  48. disdrodb/l0/l0c_processing.py +5 -13
  49. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  50. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  51. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  52. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  53. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  54. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  55. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  56. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  57. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  58. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  59. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  60. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  61. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  63. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  64. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  65. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  66. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  67. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  68. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  69. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  70. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  71. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  72. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
  73. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  74. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  75. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  76. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
  77. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  78. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  79. disdrodb/l0/routines.py +105 -14
  80. disdrodb/l1/__init__.py +5 -0
  81. disdrodb/l1/filters.py +34 -20
  82. disdrodb/l1/processing.py +45 -44
  83. disdrodb/l1/resampling.py +77 -66
  84. disdrodb/l1/routines.py +35 -43
  85. disdrodb/l1_env/routines.py +18 -3
  86. disdrodb/l2/__init__.py +7 -0
  87. disdrodb/l2/empirical_dsd.py +58 -10
  88. disdrodb/l2/event.py +27 -120
  89. disdrodb/l2/processing.py +267 -116
  90. disdrodb/l2/routines.py +618 -254
  91. disdrodb/metadata/standards.py +3 -1
  92. disdrodb/psd/fitting.py +463 -144
  93. disdrodb/psd/models.py +8 -5
  94. disdrodb/routines.py +3 -3
  95. disdrodb/scattering/__init__.py +16 -4
  96. disdrodb/scattering/axis_ratio.py +56 -36
  97. disdrodb/scattering/permittivity.py +486 -0
  98. disdrodb/scattering/routines.py +701 -159
  99. disdrodb/summary/__init__.py +17 -0
  100. disdrodb/summary/routines.py +4120 -0
  101. disdrodb/utils/attrs.py +68 -125
  102. disdrodb/utils/compression.py +30 -1
  103. disdrodb/utils/dask.py +59 -8
  104. disdrodb/utils/dataframe.py +61 -7
  105. disdrodb/utils/directories.py +35 -15
  106. disdrodb/utils/encoding.py +33 -19
  107. disdrodb/utils/logger.py +13 -6
  108. disdrodb/utils/manipulations.py +71 -0
  109. disdrodb/utils/subsetting.py +214 -0
  110. disdrodb/utils/time.py +165 -19
  111. disdrodb/utils/writer.py +20 -7
  112. disdrodb/utils/xarray.py +2 -4
  113. disdrodb/viz/__init__.py +13 -0
  114. disdrodb/viz/plots.py +327 -0
  115. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  116. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
  117. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  118. disdrodb/l1/encoding_attrs.py +0 -642
  119. disdrodb/l2/processing_options.py +0 -213
  120. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  121. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  122. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  123. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
disdrodb/l2/event.py CHANGED
@@ -15,110 +15,17 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Functions for event definition."""
18
- import dask
18
+
19
19
  import numpy as np
20
20
  import pandas as pd
21
- import xarray as xr
22
21
 
23
22
  from disdrodb.api.info import get_start_end_time_from_filepaths
24
- from disdrodb.utils.time import acronym_to_seconds, ensure_sorted_by_time
25
-
26
-
27
- @dask.delayed
28
- def _delayed_open_dataset(filepath):
29
- with dask.config.set(scheduler="synchronous"):
30
- ds = xr.open_dataset(filepath, chunks={}, autoclose=True, decode_timedelta=False, cache=False)
31
- return ds
32
-
33
-
34
- def identify_events(
35
- filepaths,
36
- parallel=False,
37
- min_n_drops=5,
38
- neighbor_min_size=2,
39
- neighbor_time_interval="5MIN",
40
- intra_event_max_time_gap="6H",
41
- event_min_duration="5MIN",
42
- event_min_size=3,
43
- ):
44
- """Return a list of rainy events.
45
-
46
- Rainy timesteps are defined when N > min_n_drops.
47
- Any rainy isolated timesteps (based on neighborhood criteria) is removed.
48
- Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
49
- exceed `intra_event_max_time_gap`. Finally, events that do not meet minimum size or duration
50
- requirements are filtered out.
51
-
52
- Parameters
53
- ----------
54
- filepaths: list
55
- List of L1C file paths.
56
- parallel: bool
57
- Whether to load the files in parallel.
58
- Set parallel=True only in a multiprocessing environment.
59
- The default is False.
60
- neighbor_time_interval : str
61
- The time interval around a given a timestep defining the neighborhood.
62
- Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
63
- neighbor_min_size : int, optional
64
- The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
65
- timestep to be considered non-isolated. Isolated timesteps are removed !
66
- - If `neighbor_min_size=0, then no timestep is considered isolated and no filtering occurs.
67
- - If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
68
- - If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
69
- Defaults to 1.
70
- intra_event_max_time_gap: str
71
- The maximum time interval between two timesteps to be considered part of the same event.
72
- This parameters is used to group timesteps into events !
73
- event_min_duration : str
74
- The minimum duration an event must span. Events shorter than this duration are discarded.
75
- event_min_size : int, optional
76
- The minimum number of valid timesteps required for an event. Defaults to 1.
77
-
78
- Returns
79
- -------
80
- list of dict
81
- A list of events, where each event is represented as a dictionary with keys:
82
- - "start_time": np.datetime64, start time of the event
83
- - "end_time": np.datetime64, end time of the event
84
- - "duration": np.timedelta64, duration of the event
85
- - "n_timesteps": int, number of valid timesteps in the event
86
- """
87
- # Open datasets in parallel
88
- if parallel:
89
- list_ds = dask.compute([_delayed_open_dataset(filepath) for filepath in filepaths])[0]
90
- else:
91
- list_ds = [xr.open_dataset(filepath, chunks={}, cache=False, decode_timedelta=False) for filepath in filepaths]
92
- # Filter dataset for requested variables
93
- variables = ["time", "N"]
94
- list_ds = [ds[variables] for ds in list_ds]
95
- # Concat datasets
96
- ds = xr.concat(list_ds, dim="time", compat="no_conflicts", combine_attrs="override")
97
- # Read in memory the variable needed
98
- ds = ds.compute()
99
- # Close file on disk
100
- _ = [ds.close() for ds in list_ds]
101
- del list_ds
102
- # Sort dataset by time
103
- ds = ensure_sorted_by_time(ds)
104
- # Define candidate timesteps to group into events
105
- idx_valid = ds["N"].data > min_n_drops
106
- timesteps = ds["time"].data[idx_valid]
107
- # Define event list
108
- event_list = group_timesteps_into_event(
109
- timesteps=timesteps,
110
- neighbor_min_size=neighbor_min_size,
111
- neighbor_time_interval=neighbor_time_interval,
112
- intra_event_max_time_gap=intra_event_max_time_gap,
113
- event_min_duration=event_min_duration,
114
- event_min_size=event_min_size,
115
- )
116
- return event_list
23
+ from disdrodb.utils.time import ensure_timedelta_seconds_interval, temporal_resolution_to_seconds
117
24
 
118
25
 
119
26
  def group_timesteps_into_event(
120
27
  timesteps,
121
- intra_event_max_time_gap,
28
+ event_max_time_gap,
122
29
  event_min_size=0,
123
30
  event_min_duration="0S",
124
31
  neighbor_min_size=0,
@@ -130,7 +37,7 @@ def group_timesteps_into_event(
130
37
  This function groups valid candidate timesteps into events by considering how they cluster
131
38
  in time. Any isolated timesteps (based on neighborhood criteria) are first removed. Then,
132
39
  consecutive timesteps are grouped into the same event if the time gap between them does not
133
- exceed `intra_event_max_time_gap`. Finally, events that do not meet minimum size or duration
40
+ exceed `event_max_time_gap`. Finally, events that do not meet minimum size or duration
134
41
  requirements are filtered out.
135
42
 
136
43
  Please note that neighbor_min_size and neighbor_time_interval are very sensitive to the
@@ -150,7 +57,7 @@ def group_timesteps_into_event(
150
57
  - If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
151
58
  - If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
152
59
  Defaults to 1.
153
- intra_event_max_time_gap: str
60
+ event_max_time_gap: str
154
61
  The maximum time interval between two timesteps to be considered part of the same event.
155
62
  This parameters is used to group timesteps into events !
156
63
  event_min_duration : str
@@ -168,9 +75,9 @@ def group_timesteps_into_event(
168
75
  - "n_timesteps": int, number of valid timesteps in the event
169
76
  """
170
77
  # Retrieve datetime arguments
171
- neighbor_time_interval = pd.Timedelta(acronym_to_seconds(neighbor_time_interval), unit="seconds")
172
- intra_event_max_time_gap = pd.Timedelta(acronym_to_seconds(intra_event_max_time_gap), unit="seconds")
173
- event_min_duration = pd.Timedelta(acronym_to_seconds(event_min_duration), unit="seconds")
78
+ neighbor_time_interval = pd.Timedelta(temporal_resolution_to_seconds(neighbor_time_interval), unit="seconds")
79
+ event_max_time_gap = pd.Timedelta(temporal_resolution_to_seconds(event_max_time_gap), unit="seconds")
80
+ event_min_duration = pd.Timedelta(temporal_resolution_to_seconds(event_min_duration), unit="seconds")
174
81
 
175
82
  # Remove isolated timesteps
176
83
  timesteps = remove_isolated_timesteps(
@@ -180,8 +87,8 @@ def group_timesteps_into_event(
180
87
  )
181
88
 
182
89
  # Group timesteps into events
183
- # - If two timesteps are separated by less than intra_event_max_time_gap, are considered the same event
184
- events = group_timesteps_into_events(timesteps, intra_event_max_time_gap)
90
+ # - If two timesteps are separated by less than event_max_time_gap, are considered the same event
91
+ events = group_timesteps_into_events(timesteps, event_max_time_gap)
185
92
 
186
93
  # Define list of event
187
94
  event_list = [
@@ -270,7 +177,7 @@ def remove_isolated_timesteps(timesteps, neighbor_min_size, neighbor_time_interv
270
177
  return non_isolated_timesteps
271
178
 
272
179
 
273
- def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
180
+ def group_timesteps_into_events(timesteps, event_max_time_gap):
274
181
  """
275
182
  Group valid timesteps into events based on a maximum allowed dry interval.
276
183
 
@@ -278,7 +185,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
278
185
  ----------
279
186
  timesteps : array-like of np.datetime64
280
187
  Sorted array of valid timesteps.
281
- intra_event_max_time_gap : np.timedelta64
188
+ event_max_time_gap : np.timedelta64
282
189
  Maximum time interval allowed between consecutive valid timesteps for them
283
190
  to be considered part of the same event.
284
191
 
@@ -297,9 +204,9 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
297
204
  # Compute differences between consecutive timesteps
298
205
  diffs = np.diff(timesteps)
299
206
 
300
- # Identify the indices where the gap is larger than intra_event_max_time_gap
207
+ # Identify the indices where the gap is larger than event_max_time_gap
301
208
  # These indices represent boundaries between events
302
- break_indices = np.where(diffs > intra_event_max_time_gap)[0] + 1
209
+ break_indices = np.where(diffs > event_max_time_gap)[0] + 1
303
210
 
304
211
  # Split the timesteps at the identified break points
305
212
  events = np.split(timesteps, break_indices)
@@ -311,7 +218,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
311
218
  # current_t = timesteps[i]
312
219
  # previous_t = timesteps[i - 1]
313
220
 
314
- # if current_t - previous_t <= intra_event_max_time_gap:
221
+ # if current_t - previous_t <= event_max_time_gap:
315
222
  # current_event.append(current_t)
316
223
  # else:
317
224
  # events.append(current_event)
@@ -324,21 +231,23 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
324
231
  ####-----------------------------------------------------------------------------------.
325
232
 
326
233
 
327
- def get_events_info(list_events, filepaths, accumulation_interval, rolling):
234
+ def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling): # noqa: ARG001
328
235
  """
329
236
  Provide information about the required files for each event.
330
237
 
331
- For each event in `list_events`, this function identifies the file paths from `filepaths` that
238
+ For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
332
239
  overlap with the event period, adjusted by the `accumulation_interval`. The event period is
333
240
  extended backward or forward based on the `rolling` parameter.
334
241
 
335
242
  Parameters
336
243
  ----------
337
- list_events : list of dict
244
+ list_partitions : list of dict
338
245
  List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
339
246
  keys with `numpy.datetime64` values.
340
247
  filepaths : list of str
341
248
  List of file paths corresponding to data files.
249
+ sample_interval : numpy.timedelta64 or int
250
+ The sample interval of the input dataset.
342
251
  accumulation_interval : numpy.timedelta64 or int
343
252
  Time interval to adjust the event period for accumulation. If an integer is provided, it is
344
253
  assumed to be in seconds.
@@ -355,25 +264,23 @@ def get_events_info(list_events, filepaths, accumulation_interval, rolling):
355
264
  - 'filepaths': List of file paths overlapping with the adjusted event period.
356
265
 
357
266
  """
358
- # Ensure accumulation_interval is numpy.timedelta64
359
- if not isinstance(accumulation_interval, np.timedelta64):
360
- accumulation_interval = np.timedelta64(accumulation_interval, "s")
267
+ # Ensure sample_interval and accumulation_interval is numpy.timedelta64
268
+ accumulation_interval = ensure_timedelta_seconds_interval(accumulation_interval)
269
+ sample_interval = ensure_timedelta_seconds_interval(sample_interval)
361
270
 
362
271
  # Retrieve file start_time and end_time
363
272
  files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
364
273
 
365
274
  # Retrieve information for each event
366
275
  event_info = []
367
- for event_dict in list_events:
276
+ for event_dict in list_partitions:
368
277
  # Retrieve event time period
369
278
  event_start_time = event_dict["start_time"]
370
279
  event_end_time = event_dict["end_time"]
371
280
 
372
- # Add buffer to account for accumulation interval
373
- if rolling: # backward
374
- event_start_time = event_start_time - np.array(accumulation_interval, dtype="m8[s]")
375
- else: # aggregate forward
376
- event_end_time = event_end_time + np.array(accumulation_interval, dtype="m8[s]")
281
+ # Adapt event_end_time if accumulation interval different from sample interval
282
+ if sample_interval != accumulation_interval:
283
+ event_end_time = event_end_time + accumulation_interval
377
284
 
378
285
  # Derive event filepaths
379
286
  overlaps = (files_start_time <= event_end_time) & (files_end_time >= event_start_time)