disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. disdrodb/__init__.py +68 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +177 -24
  7. disdrodb/api/configs.py +3 -3
  8. disdrodb/api/info.py +13 -13
  9. disdrodb/api/io.py +281 -22
  10. disdrodb/api/path.py +184 -195
  11. disdrodb/api/search.py +18 -9
  12. disdrodb/cli/disdrodb_create_summary.py +103 -0
  13. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  14. disdrodb/cli/disdrodb_run_l0.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
  19. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  20. disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
  21. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  22. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  23. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  24. disdrodb/configs.py +149 -4
  25. disdrodb/constants.py +61 -0
  26. disdrodb/data_transfer/download_data.py +127 -11
  27. disdrodb/etc/configs/attributes.yaml +339 -0
  28. disdrodb/etc/configs/encodings.yaml +473 -0
  29. disdrodb/etc/products/L1/global.yaml +13 -0
  30. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  31. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +22 -0
  33. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  34. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  35. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  38. disdrodb/etc/products/L2M/global.yaml +26 -0
  39. disdrodb/issue/writer.py +2 -0
  40. disdrodb/l0/__init__.py +13 -0
  41. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  42. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  43. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  44. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  45. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  46. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  48. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  49. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  50. disdrodb/l0/l0a_processing.py +37 -32
  51. disdrodb/l0/l0b_nc_processing.py +118 -8
  52. disdrodb/l0/l0b_processing.py +30 -65
  53. disdrodb/l0/l0c_processing.py +369 -259
  54. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  55. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  56. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  58. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  59. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  63. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  66. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  67. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  69. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  71. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  72. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  73. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
  74. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  75. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  76. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  79. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  80. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  81. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  83. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
  84. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  85. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  86. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  87. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  88. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  89. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  90. disdrodb/l1/__init__.py +5 -0
  91. disdrodb/l1/fall_velocity.py +46 -0
  92. disdrodb/l1/filters.py +34 -20
  93. disdrodb/l1/processing.py +46 -45
  94. disdrodb/l1/resampling.py +77 -66
  95. disdrodb/l1_env/routines.py +18 -3
  96. disdrodb/l2/__init__.py +7 -0
  97. disdrodb/l2/empirical_dsd.py +58 -10
  98. disdrodb/l2/processing.py +268 -117
  99. disdrodb/metadata/checks.py +132 -125
  100. disdrodb/metadata/standards.py +3 -1
  101. disdrodb/psd/fitting.py +631 -345
  102. disdrodb/psd/models.py +9 -6
  103. disdrodb/routines/__init__.py +54 -0
  104. disdrodb/{l0/routines.py → routines/l0.py} +316 -355
  105. disdrodb/{l1/routines.py → routines/l1.py} +76 -116
  106. disdrodb/routines/l2.py +1019 -0
  107. disdrodb/{routines.py → routines/wrappers.py} +98 -10
  108. disdrodb/scattering/__init__.py +16 -4
  109. disdrodb/scattering/axis_ratio.py +61 -37
  110. disdrodb/scattering/permittivity.py +504 -0
  111. disdrodb/scattering/routines.py +746 -184
  112. disdrodb/summary/__init__.py +17 -0
  113. disdrodb/summary/routines.py +4196 -0
  114. disdrodb/utils/archiving.py +434 -0
  115. disdrodb/utils/attrs.py +68 -125
  116. disdrodb/utils/cli.py +5 -5
  117. disdrodb/utils/compression.py +30 -1
  118. disdrodb/utils/dask.py +121 -9
  119. disdrodb/utils/dataframe.py +61 -7
  120. disdrodb/utils/decorators.py +31 -0
  121. disdrodb/utils/directories.py +35 -15
  122. disdrodb/utils/encoding.py +37 -19
  123. disdrodb/{l2 → utils}/event.py +15 -173
  124. disdrodb/utils/logger.py +14 -7
  125. disdrodb/utils/manipulations.py +81 -0
  126. disdrodb/utils/routines.py +166 -0
  127. disdrodb/utils/subsetting.py +214 -0
  128. disdrodb/utils/time.py +35 -177
  129. disdrodb/utils/writer.py +20 -7
  130. disdrodb/utils/xarray.py +5 -4
  131. disdrodb/viz/__init__.py +13 -0
  132. disdrodb/viz/plots.py +398 -0
  133. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
  134. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
  135. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
  136. disdrodb/l1/encoding_attrs.py +0 -642
  137. disdrodb/l2/processing_options.py +0 -213
  138. disdrodb/l2/routines.py +0 -868
  139. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  140. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  141. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  142. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,434 @@
1
+ # -----------------------------------------------------------------------------.
2
+ # Copyright (c) 2021-2023 DISDRODB developers
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ # -----------------------------------------------------------------------------.
17
+ """Utility function for DISDRODB product archiving."""
18
+ import datetime
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+ from disdrodb.api.info import get_start_end_time_from_filepaths
24
+ from disdrodb.api.io import open_netcdf_files
25
+ from disdrodb.utils.event import group_timesteps_into_event
26
+ from disdrodb.utils.time import (
27
+ ensure_sorted_by_time,
28
+ ensure_timedelta_seconds,
29
+ )
30
+
31
+ ####---------------------------------------------------------------------------------
32
+ #### Time blocks
33
+
34
+
35
+ def check_freq(freq: str) -> None:
36
+ """Check validity of freq argument."""
37
+ valid_freq = ["none", "year", "season", "quarter", "month", "day", "hour"]
38
+ if not isinstance(freq, str):
39
+ raise TypeError("'freq' must be a string.")
40
+ if freq not in valid_freq:
41
+ raise ValueError(
42
+ f"'freq' '{freq}' is not possible. Must be one of: {valid_freq}.",
43
+ )
44
+ return freq
45
+
46
+
47
+ def generate_time_blocks(
48
+ start_time: np.datetime64,
49
+ end_time: np.datetime64,
50
+ freq: str,
51
+ inclusive_end_time: bool = True,
52
+ ) -> np.ndarray:
53
+ """Generate time blocks between `start_time` and `end_time` for a given frequency.
54
+
55
+ Parameters
56
+ ----------
57
+ start_time : numpy.datetime64
58
+ Inclusive start of the overall time range.
59
+ end_time : numpy.datetime64
60
+ End of the overall time range. Inclusive by default (see inclusive_end_time argument).
61
+ freq : str
62
+ Frequency specifier. Accepted values are:
63
+ - 'none' : return a single block [start_time, end_time]
64
+ - 'day' : split into daily blocks
65
+ - 'month' : split into calendar months
66
+ - 'quarter' : split into calendar quarters
67
+ - 'year' : split into calendar years
68
+ - 'season' : split into meteorological seasons (MAM, JJA, SON, DJF)
69
+ inclusive_end_time: bool
70
+ The default is True.
71
+ If False, if the last block end_time is equal to input end_time, such block is removed.
72
+
73
+ Returns
74
+ -------
75
+ numpy.ndarray
76
+ Array of shape (n, 2) with dtype datetime64[s], where each row is [block_start, block_end].
77
+
78
+ """
79
+ freq = check_freq(freq)
80
+ if freq == "none":
81
+ return np.array([[start_time, end_time]], dtype="datetime64[s]")
82
+
83
+ # Mapping from our custom freq to pandas frequency codes
84
+ freq_map = {
85
+ "hour": "h",
86
+ "day": "d",
87
+ "month": "M",
88
+ "quarter": "Q",
89
+ "year": "Y",
90
+ "season": "Q-FEB", # seasons DJF, MAM, JJA, SON
91
+ }
92
+
93
+ # Define periods
94
+ periods = pd.period_range(start=start_time, end=end_time, freq=freq_map[freq])
95
+
96
+ # Create time blocks
97
+ blocks = []
98
+ for period in periods:
99
+ start = period.start_time.to_datetime64().astype("datetime64[s]")
100
+ if freq == "quarter":
101
+ end = period.end_time.floor("s").to_datetime64().astype("datetime64[s]")
102
+ else:
103
+ end = period.end_time.to_datetime64().astype("datetime64[s]")
104
+ blocks.append([start, end])
105
+ blocks = np.array(blocks, dtype="datetime64[s]")
106
+
107
+ if not inclusive_end_time and len(blocks) > 0 and blocks[-1, 0] == end_time:
108
+ blocks = blocks[:-1]
109
+ return blocks
110
+
111
+
112
+ ####----------------------------------------------------------------------------
113
+ #### Event/Time partitioning
114
+ def identify_events(
115
+ filepaths,
116
+ parallel=False,
117
+ min_drops=5,
118
+ neighbor_min_size=2,
119
+ neighbor_time_interval="5MIN",
120
+ event_max_time_gap="6H",
121
+ event_min_duration="5MIN",
122
+ event_min_size=3,
123
+ ):
124
+ """Return a list of rainy events.
125
+
126
+ Rainy timesteps are defined when N > min_drops.
127
+ Any rainy isolated timesteps (based on neighborhood criteria) is removed.
128
+ Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
129
+ exceed `event_max_time_gap`. Finally, events that do not meet minimum size or duration
130
+ requirements are filtered out.
131
+
132
+ Parameters
133
+ ----------
134
+ filepaths: list
135
+ List of L1C file paths.
136
+ parallel: bool
137
+ Whether to load the files in parallel.
138
+ Set parallel=True only in a multiprocessing environment.
139
+ The default is False.
140
+ neighbor_time_interval : str
141
+ The time interval around a given a timestep defining the neighborhood.
142
+ Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
143
+ neighbor_min_size : int, optional
144
+ The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
145
+ timestep to be considered non-isolated. Isolated timesteps are removed !
146
+ - If `neighbor_min_size=0, then no timestep is considered isolated and no filtering occurs.
147
+ - If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
148
+ - If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
149
+ Defaults to 1.
150
+ event_max_time_gap: str
151
+ The maximum time interval between two timesteps to be considered part of the same event.
152
+ This parameters is used to group timesteps into events !
153
+ event_min_duration : str
154
+ The minimum duration an event must span. Events shorter than this duration are discarded.
155
+ event_min_size : int, optional
156
+ The minimum number of valid timesteps required for an event. Defaults to 1.
157
+
158
+ Returns
159
+ -------
160
+ list of dict
161
+ A list of events, where each event is represented as a dictionary with keys:
162
+ - "start_time": np.datetime64, start time of the event
163
+ - "end_time": np.datetime64, end time of the event
164
+ - "duration": np.timedelta64, duration of the event
165
+ - "n_timesteps": int, number of valid timesteps in the event
166
+ """
167
+ # Open datasets in parallel
168
+ ds = open_netcdf_files(filepaths, variables=["time", "N"], parallel=parallel, compute=True)
169
+ # Sort dataset by time
170
+ ds = ensure_sorted_by_time(ds)
171
+ # Define candidate timesteps to group into events
172
+ idx_valid = ds["N"].to_numpy() > min_drops
173
+ timesteps = ds["time"].to_numpy()[idx_valid]
174
+ # Define event list
175
+ event_list = group_timesteps_into_event(
176
+ timesteps=timesteps,
177
+ neighbor_min_size=neighbor_min_size,
178
+ neighbor_time_interval=neighbor_time_interval,
179
+ event_max_time_gap=event_max_time_gap,
180
+ event_min_duration=event_min_duration,
181
+ event_min_size=event_min_size,
182
+ )
183
+ del ds
184
+ return event_list
185
+
186
+
187
+ def identify_time_partitions(start_times, end_times, freq: str) -> list[dict]:
188
+ """Identify the set of time blocks covered by files.
189
+
190
+ The result is a minimal, sorted, and unique set of time partitions.
191
+ 'start_times' and end_times can be derived using get_start_end_time_from_filepaths.
192
+
193
+ Parameters
194
+ ----------
195
+ start_times : numpy.ndarray of datetime64[s]
196
+ Array of inclusive start times for each file.
197
+ end_times : numpy.ndarray of datetime64[s]
198
+ Array of inclusive end times for each file.
199
+ freq : {'none', 'hour', 'day', 'month', 'quarter', 'season', 'year'}
200
+ Frequency determining the granularity of candidate blocks.
201
+ See `generate_time_blocks` for more details.
202
+
203
+ Returns
204
+ -------
205
+ list of dict
206
+ A list of dictionaries, each containing:
207
+
208
+ - `start_time` (numpy.datetime64[s])
209
+ Inclusive start of a time block.
210
+ - `end_time` (numpy.datetime64[s])
211
+ Inclusive end of a time block.
212
+
213
+ Only those blocks that overlap at least one file's interval are returned.
214
+ The list is sorted by `start_time` and contains no duplicate blocks.
215
+ """
216
+ # Define files time coverage
217
+ start_time, end_time = start_times.min(), end_times.max()
218
+
219
+ # Compute candidate time blocks
220
+ blocks = generate_time_blocks(start_time, end_time, freq=freq)
221
+
222
+ # Select time blocks with files
223
+ mask = (blocks[:, 0][:, None] <= end_times) & (blocks[:, 1][:, None] >= start_times)
224
+ blocks = blocks[mask.any(axis=1)]
225
+
226
+ # Ensure sorted unique time blocks
227
+ order = np.argsort(blocks[:, 0])
228
+ blocks = np.unique(blocks[order], axis=0)
229
+
230
+ # Convert to list of dicts
231
+ list_time_blocks = [{"start_time": start_time, "end_time": end_time} for start_time, end_time in blocks]
232
+ return list_time_blocks
233
+
234
+
235
+ def define_temporal_partitions(filepaths, strategy, parallel, strategy_options):
236
+ """Define temporal file processing partitions.
237
+
238
+ Parameters
239
+ ----------
240
+ filepaths : list
241
+ List of files paths to be processed
242
+
243
+ strategy : str
244
+ Which partitioning strategy to apply:
245
+
246
+ - ``'time_block'`` defines fixed time intervals (e.g. monthly) covering input files.
247
+ - ``'event'`` detect clusters of precipitation ("events").
248
+
249
+ parallel : bool
250
+ If True, parallel data loading is used to identify events.
251
+
252
+ strategy_options : dict
253
+ Dictionary with strategy-specific parameters:
254
+
255
+ If ``strategy == 'time_block'``, supported options are:
256
+
257
+ - ``freq``: Time unit for blocks. One of {'year', 'season', 'month', 'day'}.
258
+
259
+ See identify_time_partitions for more information.
260
+
261
+ If ``strategy == 'event'``, supported options are:
262
+
263
+ - ``min_drops`` : int
264
+ Minimum number of drops to consider a timestep.
265
+ - ``neighbor_min_size`` : int
266
+ Minimum cluster size for merging neighboring events.
267
+ - ``neighbor_time_interval`` : str
268
+ Time window (e.g. "5MIN") to merge adjacent clusters.
269
+ - ``event_max_time_gap`` : str
270
+ Maximum allowed gap (e.g. "6H") within a single event.
271
+ - ``event_min_duration`` : str
272
+ Minimum total duration (e.g. "5MIN") of an event.
273
+ - ``event_min_size`` : int
274
+ Minimum number of records in an event.
275
+
276
+ See identify_events for more information.
277
+
278
+ Returns
279
+ -------
280
+ list
281
+ A list of dictionaries, each containing:
282
+
283
+ - ``start_time`` (numpy.datetime64[s])
284
+ Inclusive start of an event or time block.
285
+ - ``end_time`` (numpy.datetime64[s])
286
+ Inclusive end of an event or time block.
287
+
288
+ Notes
289
+ -----
290
+ - The ``'event'`` strategy requires loading data into memory to identify clusters.
291
+ - The ``'time_block'`` strategy can operate on metadata alone, without full data loading.
292
+ - The ``'event'`` strategy implicitly performs data selection on which files to process !
293
+ - The ``'time_block'`` strategy does not performs data selection on which files to process !
294
+ """
295
+ if strategy not in ["time_block", "event"]:
296
+ raise ValueError(f"Unknown strategy: {strategy!r}. Must be 'time_block' or 'event'.")
297
+ if strategy == "event":
298
+ return identify_events(filepaths, parallel=parallel, **strategy_options)
299
+
300
+ start_times, end_times = get_start_end_time_from_filepaths(filepaths)
301
+ return identify_time_partitions(start_times=start_times, end_times=end_times, **strategy_options)
302
+
303
+
304
+ ####----------------------------------------------------------------------------
305
+ #### Filepaths partitioning
306
+
307
+
308
+ def _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_starts, block_ends):
309
+ """Map each block start_time to list of overlapping filepaths."""
310
+ # Use broadcasting to create a boolean matrix indicating which files cover which time block
311
+ # Broadcasting: (n_files, n_blocks)
312
+ mask = (files_start_time[:, None] <= block_ends[None, :]) & (files_end_time[:, None] >= block_starts[None, :])
313
+ # Create a list with the a dictionary for each block
314
+ filepaths = np.array(filepaths)
315
+ results = []
316
+ for i, (start, end) in enumerate(zip(block_starts, block_ends)):
317
+ indices = np.where(mask[:, i])[0]
318
+ if indices.size > 0:
319
+ results.append(
320
+ {
321
+ "start_time": start.astype(datetime.datetime),
322
+ "end_time": end.astype(datetime.datetime),
323
+ "filepaths": filepaths[indices].tolist(),
324
+ },
325
+ )
326
+ return results
327
+
328
+
329
+ def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling): # noqa: ARG001
330
+ """
331
+ Provide information about the required files for each event.
332
+
333
+ For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
334
+ overlap with the event period, adjusted by the `accumulation_interval`. The event period is
335
+ extended backward or forward based on the `rolling` parameter.
336
+
337
+ Parameters
338
+ ----------
339
+ list_partitions : list of dict
340
+ List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
341
+ keys with `numpy.datetime64` values.
342
+ filepaths : list of str
343
+ List of file paths corresponding to data files.
344
+ sample_interval : numpy.timedelta64 or int
345
+ The sample interval of the input dataset.
346
+ accumulation_interval : numpy.timedelta64 or int
347
+ Time interval to adjust the event period for accumulation. If an integer is provided, it is
348
+ assumed to be in seconds.
349
+ rolling : bool
350
+ If True, adjust the event period backward by `accumulation_interval` (rolling backward).
351
+ If False, adjust forward (aggregate forward).
352
+
353
+ Returns
354
+ -------
355
+ list of dict
356
+ A list where each element is a dictionary containing:
357
+ - 'start_time': Adjusted start time of the event (`datetime.datetime64`).
358
+ - 'end_time': Adjusted end time of the event (`datetime.datetime64`).
359
+ - 'filepaths': List of file paths overlapping with the adjusted event period.
360
+
361
+ """
362
+ if len(filepaths) == 0 or len(list_partitions) == 0:
363
+ return []
364
+
365
+ # Ensure sample_interval and accumulation_interval is numpy.timedelta64
366
+ accumulation_interval = ensure_timedelta_seconds(accumulation_interval)
367
+ sample_interval = ensure_timedelta_seconds(sample_interval)
368
+
369
+ # Define offset on event_end_time
370
+ offset = accumulation_interval if sample_interval != accumulation_interval else ensure_timedelta_seconds(0)
371
+
372
+ # Retrieve file start_time and end_time
373
+ files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
374
+
375
+ # Retrieve partitions blocks start and end time arrays
376
+ block_starts = np.array([p["start_time"] for p in list_partitions]).astype("M8[s]")
377
+ block_ends = np.array([p["end_time"] for p in list_partitions]).astype("M8[s]")
378
+
379
+ # Add optional offset for resampling
380
+ # TODO: expanding partition time should be done only at L1 stage when resampling
381
+ # In disdrodb, the time reported is time at the start of the accumulation period !
382
+ # If sensors report time at the end of measurement interval, we might being reporting time
383
+ # with an inaccuracy equals to the sensor measurement interval.
384
+ # We could correct for that at L0C stage already !
385
+ block_ends = block_ends + offset
386
+
387
+ # Map filepaths to corresponding time blocks
388
+ list_event_info = _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_starts, block_ends)
389
+ return list_event_info
390
+
391
+
392
+ def get_files_per_time_block(filepaths, freq="day", tolerance_seconds=120):
393
+ """
394
+ Organize files by the days they cover based on their start and end times.
395
+
396
+ Parameters
397
+ ----------
398
+ filepaths : list of str
399
+ List of file paths to be processed.
400
+
401
+ Returns
402
+ -------
403
+ dict
404
+ Dictionary where keys are days (as strings) and values are lists of file paths
405
+ that cover those days.
406
+
407
+ Notes
408
+ -----
409
+ This function adds a tolerance of 60 seconds to account for imprecise time logging by the sensors.
410
+ """
411
+ # Empty filepaths list return a dictionary
412
+ if len(filepaths) == 0:
413
+ return []
414
+
415
+ # Retrieve file start_time and end_time
416
+ files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
417
+
418
+ # Add tolerance to account for imprecise time logging by the sensors
419
+ # - Example: timestep 23:59:30 might be 00.00 and goes into the next day file ...
420
+ files_start_time = files_start_time - np.array(tolerance_seconds, dtype="m8[s]")
421
+ files_end_time = files_end_time + np.array(tolerance_seconds, dtype="m8[s]")
422
+
423
+ # Identify candidate blocks
424
+ list_partitions = identify_time_partitions(
425
+ start_times=files_start_time,
426
+ end_times=files_end_time,
427
+ freq=freq,
428
+ )
429
+ block_starts = np.array([b["start_time"] for b in list_partitions]).astype("M8[s]")
430
+ block_ends = np.array([b["end_time"] for b in list_partitions]).astype("M8[s]")
431
+
432
+ # Map filepaths to corresponding time blocks
433
+ list_event_info = _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_starts, block_ends)
434
+ return list_event_info
disdrodb/utils/attrs.py CHANGED
@@ -18,15 +18,26 @@
18
18
  # -----------------------------------------------------------------------------.
19
19
  """DISDRODB netCDF4 attributes utilities."""
20
20
  import datetime
21
+ import os
21
22
 
22
- from disdrodb import ARCHIVE_VERSION, CONVENTIONS, SOFTWARE_VERSION
23
+ from disdrodb.constants import ARCHIVE_VERSION, CONVENTIONS, COORDINATES, SOFTWARE_VERSION
24
+ from disdrodb.utils.yaml import read_yaml
23
25
 
24
26
  ####---------------------------------------------------------------------.
25
- #### Variable attributes
27
+ #### Variable and coordinates attributes
28
+
29
+
30
+ def get_attrs_dict():
31
+ """Get attributes dictionary for DISDRODB product variables and coordinates."""
32
+ import disdrodb
33
+
34
+ configs_path = os.path.join(disdrodb.__root_path__, "disdrodb", "etc", "configs")
35
+ attrs_dict = read_yaml(os.path.join(configs_path, "attributes.yaml"))
36
+ return attrs_dict
26
37
 
27
38
 
28
39
  def set_attrs(ds, attrs_dict):
29
- """Set attributes to the variables of the xr.Dataset."""
40
+ """Set attributes to the variables and coordinates of the xr.Dataset."""
30
41
  for var in attrs_dict:
31
42
  if var in ds:
32
43
  ds[var].attrs.update(attrs_dict[var])
@@ -37,104 +48,13 @@ def set_attrs(ds, attrs_dict):
37
48
  #### Coordinates attributes
38
49
 
39
50
 
40
- def get_coords_attrs_dict():
41
- """Return dictionary with DISDRODB coordinates attributes."""
42
- attrs_dict = {}
43
- # Define diameter attributes
44
- attrs_dict["diameter_bin_center"] = {
45
- "name": "diameter_bin_center",
46
- "standard_name": "diameter_bin_center",
47
- "long_name": "diameter_bin_center",
48
- "units": "mm",
49
- "description": "Bin center drop diameter value",
50
- }
51
- attrs_dict["diameter_bin_width"] = {
52
- "name": "diameter_bin_width",
53
- "standard_name": "diameter_bin_width",
54
- "long_name": "diameter_bin_width",
55
- "units": "mm",
56
- "description": "Drop diameter bin width",
57
- }
58
- attrs_dict["diameter_bin_upper"] = {
59
- "name": "diameter_bin_upper",
60
- "standard_name": "diameter_bin_upper",
61
- "long_name": "diameter_bin_upper",
62
- "units": "mm",
63
- "description": "Bin upper bound drop diameter value",
64
- }
65
- attrs_dict["velocity_bin_lower"] = {
66
- "name": "velocity_bin_lower",
67
- "standard_name": "velocity_bin_lower",
68
- "long_name": "velocity_bin_lower",
69
- "units": "mm",
70
- "description": "Bin lower bound drop diameter value",
71
- }
72
- # Define velocity attributes
73
- attrs_dict["velocity_bin_center"] = {
74
- "name": "velocity_bin_center",
75
- "standard_name": "velocity_bin_center",
76
- "long_name": "velocity_bin_center",
77
- "units": "m/s",
78
- "description": "Bin center drop fall velocity value",
79
- }
80
- attrs_dict["velocity_bin_width"] = {
81
- "name": "velocity_bin_width",
82
- "standard_name": "velocity_bin_width",
83
- "long_name": "velocity_bin_width",
84
- "units": "m/s",
85
- "description": "Drop fall velocity bin width",
86
- }
87
- attrs_dict["velocity_bin_upper"] = {
88
- "name": "velocity_bin_upper",
89
- "standard_name": "velocity_bin_upper",
90
- "long_name": "velocity_bin_upper",
91
- "units": "m/s",
92
- "description": "Bin upper bound drop fall velocity value",
93
- }
94
- attrs_dict["velocity_bin_lower"] = {
95
- "name": "velocity_bin_lower",
96
- "standard_name": "velocity_bin_lower",
97
- "long_name": "velocity_bin_lower",
98
- "units": "m/s",
99
- "description": "Bin lower bound drop fall velocity value",
100
- }
101
- # Define geolocation attributes
102
- attrs_dict["latitude"] = {
103
- "name": "latitude",
104
- "standard_name": "latitude",
105
- "long_name": "Latitude",
106
- "units": "degrees_north",
107
- }
108
- attrs_dict["longitude"] = {
109
- "name": "longitude",
110
- "standard_name": "longitude",
111
- "long_name": "Longitude",
112
- "units": "degrees_east",
113
- }
114
- attrs_dict["altitude"] = {
115
- "name": "altitude",
116
- "standard_name": "altitude",
117
- "long_name": "Altitude",
118
- "units": "m",
119
- "description": "Elevation above sea level",
120
- }
121
- # Define time attributes
122
- attrs_dict["time"] = {
123
- "name": "time",
124
- "standard_name": "time",
125
- "long_name": "time",
126
- "description": "UTC Time",
127
- }
128
-
129
- return attrs_dict
130
-
131
-
132
51
  def set_coordinate_attributes(ds):
133
52
  """Set coordinates attributes."""
134
53
  # Get attributes dictionary
135
- attrs_dict = get_coords_attrs_dict()
54
+ attrs_dict = get_attrs_dict()
55
+ coords_dict = {coord: attrs_dict[coord] for coord in COORDINATES if coord in attrs_dict}
136
56
  # Set attributes
137
- ds = set_attrs(ds, attrs_dict)
57
+ ds = set_attrs(ds, coords_dict)
138
58
  return ds
139
59
 
140
60
 
@@ -142,14 +62,14 @@ def set_coordinate_attributes(ds):
142
62
  #### DISDRODB Global Attributes
143
63
 
144
64
 
145
- def set_disdrodb_attrs(ds, product: str):
65
+ def update_disdrodb_attrs(ds, product: str):
146
66
  """Add DISDRODB processing information to the netCDF global attributes.
147
67
 
148
68
  It assumes stations metadata are already added the dataset.
149
69
 
150
70
  Parameters
151
71
  ----------
152
- ds : xarray.Dataset
72
+ ds : xarray dataset.
153
73
  Dataset
154
74
  product: str
155
75
  DISDRODB product.
@@ -159,30 +79,53 @@ def set_disdrodb_attrs(ds, product: str):
159
79
  xarray dataset
160
80
  Dataset.
161
81
  """
162
- # Add dataset conventions
163
- ds.attrs["Conventions"] = CONVENTIONS
164
-
165
- # Add featureType
166
- if "platform_type" in ds.attrs:
167
- platform_type = ds.attrs["platform_type"]
168
- if platform_type == "fixed":
169
- ds.attrs["featureType"] = "timeSeries"
170
- else:
171
- ds.attrs["featureType"] = "trajectory"
82
+ attrs = ds.attrs.copy()
83
+
84
+ # ----------------------------------------------
85
+ # Drop metadata not relevant for DISDRODB products
86
+ keys_to_drop = [
87
+ "disdrodb_reader",
88
+ "disdrodb_data_url",
89
+ "raw_data_glob_pattern",
90
+ "raw_data_format",
91
+ ]
92
+ for key in keys_to_drop:
93
+ _ = attrs.pop(key, None)
94
+
95
+ # ----------------------------------------------
96
+ # Add time_coverage_start and time_coverage_end
97
+ if "time" in ds.dims:
98
+ attrs["time_coverage_start"] = str(ds["time"].data[0])
99
+ attrs["time_coverage_end"] = str(ds["time"].data[-1])
172
100
 
173
- # Update DISDRODDB attributes
174
- ds = update_disdrodb_attrs(ds=ds, product=product)
101
+ # ----------------------------------------------
102
+ # Set DISDRODDB attributes
103
+ # - Add DISDRODB processing info
104
+ now = datetime.datetime.utcnow()
105
+ current_time = now.strftime("%Y-%m-%d %H:%M:%S")
106
+ attrs["disdrodb_processing_date"] = current_time
107
+ # - Add DISDRODB product and version
108
+ attrs["disdrodb_product_version"] = ARCHIVE_VERSION
109
+ attrs["disdrodb_software_version"] = SOFTWARE_VERSION
110
+ attrs["disdrodb_product"] = product
111
+
112
+ # ----------------------------------------------
113
+ # Finalize attributes dictionary
114
+ # - Sort attributes alphabetically
115
+ attrs = dict(sorted(attrs.items()))
116
+ # - Set attributes
117
+ ds.attrs = attrs
175
118
  return ds
176
119
 
177
120
 
178
- def update_disdrodb_attrs(ds, product: str):
121
+ def set_disdrodb_attrs(ds, product: str):
179
122
  """Add DISDRODB processing information to the netCDF global attributes.
180
123
 
181
124
  It assumes stations metadata are already added the dataset.
182
125
 
183
126
  Parameters
184
127
  ----------
185
- ds : xarray dataset.
128
+ ds : xarray.Dataset
186
129
  Dataset
187
130
  product: str
188
131
  DISDRODB product.
@@ -192,17 +135,17 @@ def update_disdrodb_attrs(ds, product: str):
192
135
  xarray dataset
193
136
  Dataset.
194
137
  """
195
- # Add time_coverage_start and time_coverage_end
196
- ds.attrs["time_coverage_start"] = str(ds["time"].data[0])
197
- ds.attrs["time_coverage_end"] = str(ds["time"].data[-1])
138
+ # Add dataset conventions
139
+ ds.attrs["Conventions"] = CONVENTIONS
198
140
 
199
- # DISDRODDB attributes
200
- # - Add DISDRODB processing info
201
- now = datetime.datetime.utcnow()
202
- current_time = now.strftime("%Y-%m-%d %H:%M:%S")
203
- ds.attrs["disdrodb_processing_date"] = current_time
204
- # - Add DISDRODB product and version
205
- ds.attrs["disdrodb_product_version"] = ARCHIVE_VERSION
206
- ds.attrs["disdrodb_software_version"] = SOFTWARE_VERSION
207
- ds.attrs["disdrodb_product"] = product
141
+ # Add featureType
142
+ if "platform_type" in ds.attrs:
143
+ platform_type = ds.attrs["platform_type"]
144
+ if platform_type == "fixed":
145
+ ds.attrs["featureType"] = "timeSeries"
146
+ else:
147
+ ds.attrs["featureType"] = "trajectory"
148
+
149
+ # Update DISDRODDB attributes
150
+ ds = update_disdrodb_attrs(ds=ds, product=product)
208
151
  return ds