disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +68 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +177 -24
- disdrodb/api/configs.py +3 -3
- disdrodb/api/info.py +13 -13
- disdrodb/api/io.py +281 -22
- disdrodb/api/path.py +184 -195
- disdrodb/api/search.py +18 -9
- disdrodb/cli/disdrodb_create_summary.py +103 -0
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
- disdrodb/cli/disdrodb_run_l1_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +127 -11
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/issue/writer.py +2 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +37 -32
- disdrodb/l0/l0b_nc_processing.py +118 -8
- disdrodb/l0/l0b_processing.py +30 -65
- disdrodb/l0/l0c_processing.py +369 -259
- disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
- disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/fall_velocity.py +46 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +46 -45
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/processing.py +268 -117
- disdrodb/metadata/checks.py +132 -125
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +631 -345
- disdrodb/psd/models.py +9 -6
- disdrodb/routines/__init__.py +54 -0
- disdrodb/{l0/routines.py → routines/l0.py} +316 -355
- disdrodb/{l1/routines.py → routines/l1.py} +76 -116
- disdrodb/routines/l2.py +1019 -0
- disdrodb/{routines.py → routines/wrappers.py} +98 -10
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +61 -37
- disdrodb/scattering/permittivity.py +504 -0
- disdrodb/scattering/routines.py +746 -184
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4196 -0
- disdrodb/utils/archiving.py +434 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/cli.py +5 -5
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +121 -9
- disdrodb/utils/dataframe.py +61 -7
- disdrodb/utils/decorators.py +31 -0
- disdrodb/utils/directories.py +35 -15
- disdrodb/utils/encoding.py +37 -19
- disdrodb/{l2 → utils}/event.py +15 -173
- disdrodb/utils/logger.py +14 -7
- disdrodb/utils/manipulations.py +81 -0
- disdrodb/utils/routines.py +166 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +35 -177
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +5 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +398 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
- disdrodb/l1/encoding_attrs.py +0 -642
- disdrodb/l2/processing_options.py +0 -213
- disdrodb/l2/routines.py +0 -868
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
disdrodb/{l2 → utils}/event.py
RENAMED
|
@@ -15,110 +15,16 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Functions for event definition."""
|
|
18
|
-
|
|
18
|
+
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pandas as pd
|
|
21
|
-
import xarray as xr
|
|
22
|
-
|
|
23
|
-
from disdrodb.api.info import get_start_end_time_from_filepaths
|
|
24
|
-
from disdrodb.utils.time import acronym_to_seconds, ensure_sorted_by_time
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
@dask.delayed
|
|
28
|
-
def _delayed_open_dataset(filepath):
|
|
29
|
-
with dask.config.set(scheduler="synchronous"):
|
|
30
|
-
ds = xr.open_dataset(filepath, chunks={}, autoclose=True, decode_timedelta=False, cache=False)
|
|
31
|
-
return ds
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def identify_events(
|
|
35
|
-
filepaths,
|
|
36
|
-
parallel=False,
|
|
37
|
-
min_n_drops=5,
|
|
38
|
-
neighbor_min_size=2,
|
|
39
|
-
neighbor_time_interval="5MIN",
|
|
40
|
-
intra_event_max_time_gap="6H",
|
|
41
|
-
event_min_duration="5MIN",
|
|
42
|
-
event_min_size=3,
|
|
43
|
-
):
|
|
44
|
-
"""Return a list of rainy events.
|
|
45
|
-
|
|
46
|
-
Rainy timesteps are defined when N > min_n_drops.
|
|
47
|
-
Any rainy isolated timesteps (based on neighborhood criteria) is removed.
|
|
48
|
-
Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
|
|
49
|
-
exceed `intra_event_max_time_gap`. Finally, events that do not meet minimum size or duration
|
|
50
|
-
requirements are filtered out.
|
|
51
|
-
|
|
52
|
-
Parameters
|
|
53
|
-
----------
|
|
54
|
-
filepaths: list
|
|
55
|
-
List of L1C file paths.
|
|
56
|
-
parallel: bool
|
|
57
|
-
Whether to load the files in parallel.
|
|
58
|
-
Set parallel=True only in a multiprocessing environment.
|
|
59
|
-
The default is False.
|
|
60
|
-
neighbor_time_interval : str
|
|
61
|
-
The time interval around a given a timestep defining the neighborhood.
|
|
62
|
-
Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
|
|
63
|
-
neighbor_min_size : int, optional
|
|
64
|
-
The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
|
|
65
|
-
timestep to be considered non-isolated. Isolated timesteps are removed !
|
|
66
|
-
- If `neighbor_min_size=0, then no timestep is considered isolated and no filtering occurs.
|
|
67
|
-
- If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
|
|
68
|
-
- If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
|
|
69
|
-
Defaults to 1.
|
|
70
|
-
intra_event_max_time_gap: str
|
|
71
|
-
The maximum time interval between two timesteps to be considered part of the same event.
|
|
72
|
-
This parameters is used to group timesteps into events !
|
|
73
|
-
event_min_duration : str
|
|
74
|
-
The minimum duration an event must span. Events shorter than this duration are discarded.
|
|
75
|
-
event_min_size : int, optional
|
|
76
|
-
The minimum number of valid timesteps required for an event. Defaults to 1.
|
|
77
21
|
|
|
78
|
-
|
|
79
|
-
-------
|
|
80
|
-
list of dict
|
|
81
|
-
A list of events, where each event is represented as a dictionary with keys:
|
|
82
|
-
- "start_time": np.datetime64, start time of the event
|
|
83
|
-
- "end_time": np.datetime64, end time of the event
|
|
84
|
-
- "duration": np.timedelta64, duration of the event
|
|
85
|
-
- "n_timesteps": int, number of valid timesteps in the event
|
|
86
|
-
"""
|
|
87
|
-
# Open datasets in parallel
|
|
88
|
-
if parallel:
|
|
89
|
-
list_ds = dask.compute([_delayed_open_dataset(filepath) for filepath in filepaths])[0]
|
|
90
|
-
else:
|
|
91
|
-
list_ds = [xr.open_dataset(filepath, chunks={}, cache=False, decode_timedelta=False) for filepath in filepaths]
|
|
92
|
-
# Filter dataset for requested variables
|
|
93
|
-
variables = ["time", "N"]
|
|
94
|
-
list_ds = [ds[variables] for ds in list_ds]
|
|
95
|
-
# Concat datasets
|
|
96
|
-
ds = xr.concat(list_ds, dim="time", compat="no_conflicts", combine_attrs="override")
|
|
97
|
-
# Read in memory the variable needed
|
|
98
|
-
ds = ds.compute()
|
|
99
|
-
# Close file on disk
|
|
100
|
-
_ = [ds.close() for ds in list_ds]
|
|
101
|
-
del list_ds
|
|
102
|
-
# Sort dataset by time
|
|
103
|
-
ds = ensure_sorted_by_time(ds)
|
|
104
|
-
# Define candidate timesteps to group into events
|
|
105
|
-
idx_valid = ds["N"].data > min_n_drops
|
|
106
|
-
timesteps = ds["time"].data[idx_valid]
|
|
107
|
-
# Define event list
|
|
108
|
-
event_list = group_timesteps_into_event(
|
|
109
|
-
timesteps=timesteps,
|
|
110
|
-
neighbor_min_size=neighbor_min_size,
|
|
111
|
-
neighbor_time_interval=neighbor_time_interval,
|
|
112
|
-
intra_event_max_time_gap=intra_event_max_time_gap,
|
|
113
|
-
event_min_duration=event_min_duration,
|
|
114
|
-
event_min_size=event_min_size,
|
|
115
|
-
)
|
|
116
|
-
return event_list
|
|
22
|
+
from disdrodb.utils.time import temporal_resolution_to_seconds
|
|
117
23
|
|
|
118
24
|
|
|
119
25
|
def group_timesteps_into_event(
|
|
120
26
|
timesteps,
|
|
121
|
-
|
|
27
|
+
event_max_time_gap,
|
|
122
28
|
event_min_size=0,
|
|
123
29
|
event_min_duration="0S",
|
|
124
30
|
neighbor_min_size=0,
|
|
@@ -130,7 +36,7 @@ def group_timesteps_into_event(
|
|
|
130
36
|
This function groups valid candidate timesteps into events by considering how they cluster
|
|
131
37
|
in time. Any isolated timesteps (based on neighborhood criteria) are first removed. Then,
|
|
132
38
|
consecutive timesteps are grouped into the same event if the time gap between them does not
|
|
133
|
-
exceed `
|
|
39
|
+
exceed `event_max_time_gap`. Finally, events that do not meet minimum size or duration
|
|
134
40
|
requirements are filtered out.
|
|
135
41
|
|
|
136
42
|
Please note that neighbor_min_size and neighbor_time_interval are very sensitive to the
|
|
@@ -150,7 +56,7 @@ def group_timesteps_into_event(
|
|
|
150
56
|
- If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
|
|
151
57
|
- If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
|
|
152
58
|
Defaults to 1.
|
|
153
|
-
|
|
59
|
+
event_max_time_gap: str
|
|
154
60
|
The maximum time interval between two timesteps to be considered part of the same event.
|
|
155
61
|
This parameters is used to group timesteps into events !
|
|
156
62
|
event_min_duration : str
|
|
@@ -168,9 +74,9 @@ def group_timesteps_into_event(
|
|
|
168
74
|
- "n_timesteps": int, number of valid timesteps in the event
|
|
169
75
|
"""
|
|
170
76
|
# Retrieve datetime arguments
|
|
171
|
-
neighbor_time_interval = pd.Timedelta(
|
|
172
|
-
|
|
173
|
-
event_min_duration = pd.Timedelta(
|
|
77
|
+
neighbor_time_interval = pd.Timedelta(temporal_resolution_to_seconds(neighbor_time_interval), unit="seconds")
|
|
78
|
+
event_max_time_gap = pd.Timedelta(temporal_resolution_to_seconds(event_max_time_gap), unit="seconds")
|
|
79
|
+
event_min_duration = pd.Timedelta(temporal_resolution_to_seconds(event_min_duration), unit="seconds")
|
|
174
80
|
|
|
175
81
|
# Remove isolated timesteps
|
|
176
82
|
timesteps = remove_isolated_timesteps(
|
|
@@ -180,8 +86,8 @@ def group_timesteps_into_event(
|
|
|
180
86
|
)
|
|
181
87
|
|
|
182
88
|
# Group timesteps into events
|
|
183
|
-
# - If two timesteps are separated by less than
|
|
184
|
-
events = group_timesteps_into_events(timesteps,
|
|
89
|
+
# - If two timesteps are separated by less than event_max_time_gap, are considered the same event
|
|
90
|
+
events = group_timesteps_into_events(timesteps, event_max_time_gap)
|
|
185
91
|
|
|
186
92
|
# Define list of event
|
|
187
93
|
event_list = [
|
|
@@ -270,7 +176,7 @@ def remove_isolated_timesteps(timesteps, neighbor_min_size, neighbor_time_interv
|
|
|
270
176
|
return non_isolated_timesteps
|
|
271
177
|
|
|
272
178
|
|
|
273
|
-
def group_timesteps_into_events(timesteps,
|
|
179
|
+
def group_timesteps_into_events(timesteps, event_max_time_gap):
|
|
274
180
|
"""
|
|
275
181
|
Group valid timesteps into events based on a maximum allowed dry interval.
|
|
276
182
|
|
|
@@ -278,7 +184,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
|
|
|
278
184
|
----------
|
|
279
185
|
timesteps : array-like of np.datetime64
|
|
280
186
|
Sorted array of valid timesteps.
|
|
281
|
-
|
|
187
|
+
event_max_time_gap : np.timedelta64
|
|
282
188
|
Maximum time interval allowed between consecutive valid timesteps for them
|
|
283
189
|
to be considered part of the same event.
|
|
284
190
|
|
|
@@ -297,9 +203,9 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
|
|
|
297
203
|
# Compute differences between consecutive timesteps
|
|
298
204
|
diffs = np.diff(timesteps)
|
|
299
205
|
|
|
300
|
-
# Identify the indices where the gap is larger than
|
|
206
|
+
# Identify the indices where the gap is larger than event_max_time_gap
|
|
301
207
|
# These indices represent boundaries between events
|
|
302
|
-
break_indices = np.where(diffs >
|
|
208
|
+
break_indices = np.where(diffs > event_max_time_gap)[0] + 1
|
|
303
209
|
|
|
304
210
|
# Split the timesteps at the identified break points
|
|
305
211
|
events = np.split(timesteps, break_indices)
|
|
@@ -311,7 +217,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
|
|
|
311
217
|
# current_t = timesteps[i]
|
|
312
218
|
# previous_t = timesteps[i - 1]
|
|
313
219
|
|
|
314
|
-
# if current_t - previous_t <=
|
|
220
|
+
# if current_t - previous_t <= event_max_time_gap:
|
|
315
221
|
# current_event.append(current_t)
|
|
316
222
|
# else:
|
|
317
223
|
# events.append(current_event)
|
|
@@ -322,67 +228,3 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
|
|
|
322
228
|
|
|
323
229
|
|
|
324
230
|
####-----------------------------------------------------------------------------------.
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
def get_events_info(list_events, filepaths, accumulation_interval, rolling):
|
|
328
|
-
"""
|
|
329
|
-
Provide information about the required files for each event.
|
|
330
|
-
|
|
331
|
-
For each event in `list_events`, this function identifies the file paths from `filepaths` that
|
|
332
|
-
overlap with the event period, adjusted by the `accumulation_interval`. The event period is
|
|
333
|
-
extended backward or forward based on the `rolling` parameter.
|
|
334
|
-
|
|
335
|
-
Parameters
|
|
336
|
-
----------
|
|
337
|
-
list_events : list of dict
|
|
338
|
-
List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
|
|
339
|
-
keys with `numpy.datetime64` values.
|
|
340
|
-
filepaths : list of str
|
|
341
|
-
List of file paths corresponding to data files.
|
|
342
|
-
accumulation_interval : numpy.timedelta64 or int
|
|
343
|
-
Time interval to adjust the event period for accumulation. If an integer is provided, it is
|
|
344
|
-
assumed to be in seconds.
|
|
345
|
-
rolling : bool
|
|
346
|
-
If True, adjust the event period backward by `accumulation_interval` (rolling backward).
|
|
347
|
-
If False, adjust forward (aggregate forward).
|
|
348
|
-
|
|
349
|
-
Returns
|
|
350
|
-
-------
|
|
351
|
-
list of dict
|
|
352
|
-
A list where each element is a dictionary containing:
|
|
353
|
-
- 'start_time': Adjusted start time of the event (`numpy.datetime64`).
|
|
354
|
-
- 'end_time': Adjusted end time of the event (`numpy.datetime64`).
|
|
355
|
-
- 'filepaths': List of file paths overlapping with the adjusted event period.
|
|
356
|
-
|
|
357
|
-
"""
|
|
358
|
-
# Ensure accumulation_interval is numpy.timedelta64
|
|
359
|
-
if not isinstance(accumulation_interval, np.timedelta64):
|
|
360
|
-
accumulation_interval = np.timedelta64(accumulation_interval, "s")
|
|
361
|
-
|
|
362
|
-
# Retrieve file start_time and end_time
|
|
363
|
-
files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
|
|
364
|
-
|
|
365
|
-
# Retrieve information for each event
|
|
366
|
-
event_info = []
|
|
367
|
-
for event_dict in list_events:
|
|
368
|
-
# Retrieve event time period
|
|
369
|
-
event_start_time = event_dict["start_time"]
|
|
370
|
-
event_end_time = event_dict["end_time"]
|
|
371
|
-
|
|
372
|
-
# Add buffer to account for accumulation interval
|
|
373
|
-
if rolling: # backward
|
|
374
|
-
event_start_time = event_start_time - np.array(accumulation_interval, dtype="m8[s]")
|
|
375
|
-
else: # aggregate forward
|
|
376
|
-
event_end_time = event_end_time + np.array(accumulation_interval, dtype="m8[s]")
|
|
377
|
-
|
|
378
|
-
# Derive event filepaths
|
|
379
|
-
overlaps = (files_start_time <= event_end_time) & (files_end_time >= event_start_time)
|
|
380
|
-
event_filepaths = np.array(filepaths)[overlaps].tolist()
|
|
381
|
-
|
|
382
|
-
# Create dictionary
|
|
383
|
-
if len(event_filepaths) > 0:
|
|
384
|
-
event_info.append(
|
|
385
|
-
{"start_time": event_start_time, "end_time": event_end_time, "filepaths": event_filepaths},
|
|
386
|
-
)
|
|
387
|
-
|
|
388
|
-
return event_info
|
disdrodb/utils/logger.py
CHANGED
|
@@ -42,7 +42,7 @@ def create_logger_file(logs_dir, filename, parallel):
|
|
|
42
42
|
format_type = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
43
43
|
handler.setFormatter(logging.Formatter(format_type))
|
|
44
44
|
logger.addHandler(handler)
|
|
45
|
-
logger.setLevel(logging.
|
|
45
|
+
logger.setLevel(logging.INFO)
|
|
46
46
|
|
|
47
47
|
# Define logger filepath
|
|
48
48
|
# - LogCaptureHandler of pytest does not have baseFilename attribute --> So set None
|
|
@@ -164,9 +164,16 @@ def _define_station_summary_log_file(list_logs, summary_filepath):
|
|
|
164
164
|
|
|
165
165
|
|
|
166
166
|
def _define_station_problem_log_file(list_logs, problem_filepath):
|
|
167
|
-
#
|
|
168
|
-
list_keywords = ["ERROR"]
|
|
169
|
-
|
|
167
|
+
# Copy the log of files with errors
|
|
168
|
+
list_keywords = ["ERROR"]
|
|
169
|
+
# Exclude lines with the following patterns
|
|
170
|
+
list_patterns = [
|
|
171
|
+
# Caused by no data with L2E and L2M filtering
|
|
172
|
+
"No timesteps with rain rate",
|
|
173
|
+
"No timesteps with N",
|
|
174
|
+
"No timesteps with Nbins",
|
|
175
|
+
]
|
|
176
|
+
# Compile patterns to search, escaping any special regex characters
|
|
170
177
|
re_keyword = re.compile("|".join(list_keywords))
|
|
171
178
|
# Compile patterns to ignore, escaping any special regex characters
|
|
172
179
|
re_patterns = re.compile("|".join(map(re.escape, list_patterns))) if list_patterns else None
|
|
@@ -221,7 +228,7 @@ def create_product_logs(
|
|
|
221
228
|
|
|
222
229
|
The logs directory structure is the follow:
|
|
223
230
|
/logs
|
|
224
|
-
- /files/<
|
|
231
|
+
- /files/<product_name>/<station> (same structure as data ... a log for each processed file)
|
|
225
232
|
- /summary
|
|
226
233
|
--> SUMMARY.<PRODUCT_ACRONYM>.<CAMPAIGN_NAME>.<STATION_NAME>.log
|
|
227
234
|
- /problems
|
|
@@ -269,7 +276,7 @@ def create_product_logs(
|
|
|
269
276
|
# Product options
|
|
270
277
|
**product_kwargs,
|
|
271
278
|
)
|
|
272
|
-
list_logs = list_files(logs_dir,
|
|
279
|
+
list_logs = list_files(logs_dir, recursive=True)
|
|
273
280
|
|
|
274
281
|
# --------------------------------------------------------.
|
|
275
282
|
# LogCaptureHandler of pytest does not have baseFilename attribute, so it returns None
|
|
@@ -332,5 +339,5 @@ def create_product_logs(
|
|
|
332
339
|
|
|
333
340
|
# --------------------------------------------------------.
|
|
334
341
|
# Remove /problem directory if empty !
|
|
335
|
-
if len(
|
|
342
|
+
if len(list_files(logs_problem_dir, glob_pattern="*.log")) == 0:
|
|
336
343
|
os.rmdir(logs_problem_dir)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# -----------------------------------------------------------------------------.
|
|
4
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
5
|
+
#
|
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
# (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
# GNU General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU General Public License
|
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
|
+
# -----------------------------------------------------------------------------.
|
|
19
|
+
"""Include functions helping for DISDRODB product manipulations."""
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
|
|
23
|
+
from disdrodb.constants import DIAMETER_DIMENSION
|
|
24
|
+
from disdrodb.utils.xarray import unstack_datarray_dimension
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_diameter_bin_edges(ds):
|
|
28
|
+
"""Retrieve diameter bin edges."""
|
|
29
|
+
bin_edges = np.append(ds["diameter_bin_lower"].compute().data, ds["diameter_bin_upper"].compute().data[-1])
|
|
30
|
+
return bin_edges
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def convert_from_decibel(x):
|
|
34
|
+
"""Convert dB to unit."""
|
|
35
|
+
return np.power(10.0, 0.1 * x) # x/10
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def convert_to_decibel(x):
|
|
39
|
+
"""Convert unit to dB."""
|
|
40
|
+
return 10 * np.log10(x)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def unstack_radar_variables(ds):
|
|
44
|
+
"""Unstack radar variables."""
|
|
45
|
+
from disdrodb.scattering import RADAR_VARIABLES
|
|
46
|
+
|
|
47
|
+
for var in RADAR_VARIABLES:
|
|
48
|
+
if var in ds:
|
|
49
|
+
ds_unstack = unstack_datarray_dimension(ds[var], dim="frequency", prefix="", suffix="_")
|
|
50
|
+
ds.update(ds_unstack)
|
|
51
|
+
ds = ds.drop_vars(var)
|
|
52
|
+
if "frequency" in ds.dims:
|
|
53
|
+
ds = ds.drop_dims("frequency")
|
|
54
|
+
return ds
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_diameter_coords_dict_from_bin_edges(diameter_bin_edges):
|
|
58
|
+
"""Get dictionary with all relevant diameter coordinates."""
|
|
59
|
+
if np.size(diameter_bin_edges) < 2:
|
|
60
|
+
raise ValueError("Expecting at least 2 values defining bin edges.")
|
|
61
|
+
diameter_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
|
|
62
|
+
diameter_bin_width = np.diff(diameter_bin_edges)
|
|
63
|
+
diameter_bin_lower = diameter_bin_edges[:-1]
|
|
64
|
+
diameter_bin_upper = diameter_bin_edges[1:]
|
|
65
|
+
coords_dict = {
|
|
66
|
+
"diameter_bin_center": (DIAMETER_DIMENSION, diameter_bin_center),
|
|
67
|
+
"diameter_bin_width": (DIAMETER_DIMENSION, diameter_bin_width),
|
|
68
|
+
"diameter_bin_lower": (DIAMETER_DIMENSION, diameter_bin_lower),
|
|
69
|
+
"diameter_bin_upper": (DIAMETER_DIMENSION, diameter_bin_upper),
|
|
70
|
+
}
|
|
71
|
+
return coords_dict
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def resample_drop_number_concentration(drop_number_concentration, diameter_bin_edges, method="linear"):
|
|
75
|
+
"""Resample drop number concentration N(D) DataArray to high resolution diameter bins."""
|
|
76
|
+
diameters_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
|
|
77
|
+
|
|
78
|
+
da = drop_number_concentration.interp(coords={"diameter_bin_center": diameters_bin_center}, method=method)
|
|
79
|
+
coords_dict = get_diameter_coords_dict_from_bin_edges(diameter_bin_edges)
|
|
80
|
+
da = da.assign_coords(coords_dict)
|
|
81
|
+
return da
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# -----------------------------------------------------------------------------.
|
|
4
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
5
|
+
#
|
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
# (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
# GNU General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU General Public License
|
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
|
+
# -----------------------------------------------------------------------------.
|
|
19
|
+
"""Utilities for DISDRODB processing routines."""
|
|
20
|
+
import os
|
|
21
|
+
import shutil
|
|
22
|
+
import tempfile
|
|
23
|
+
|
|
24
|
+
from disdrodb.api.io import find_files
|
|
25
|
+
from disdrodb.api.path import define_file_folder_path, define_temporal_resolution
|
|
26
|
+
from disdrodb.utils.logger import (
|
|
27
|
+
close_logger,
|
|
28
|
+
create_logger_file,
|
|
29
|
+
log_error,
|
|
30
|
+
log_info,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def is_possible_product(accumulation_interval, sample_interval, rolling):
|
|
35
|
+
"""Assess if production is possible given the requested accumulation interval and source sample_interval."""
|
|
36
|
+
# Avoid rolling product generation at source sample interval
|
|
37
|
+
if rolling and accumulation_interval == sample_interval:
|
|
38
|
+
return False
|
|
39
|
+
# Avoid product generation if the accumulation_interval is less than the sample interval
|
|
40
|
+
if accumulation_interval < sample_interval:
|
|
41
|
+
return False
|
|
42
|
+
# Avoid producti generation if accumulation_interval is not multiple of sample_interval
|
|
43
|
+
return accumulation_interval % sample_interval == 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def try_get_required_filepaths(
|
|
47
|
+
product,
|
|
48
|
+
data_archive_dir,
|
|
49
|
+
data_source,
|
|
50
|
+
campaign_name,
|
|
51
|
+
station_name,
|
|
52
|
+
debugging_mode,
|
|
53
|
+
**product_kwargs,
|
|
54
|
+
):
|
|
55
|
+
"""Try to retrieve required filepaths for a product, or return None if unavailable."""
|
|
56
|
+
try:
|
|
57
|
+
filepaths = find_files(
|
|
58
|
+
data_archive_dir=data_archive_dir,
|
|
59
|
+
data_source=data_source,
|
|
60
|
+
campaign_name=campaign_name,
|
|
61
|
+
station_name=station_name,
|
|
62
|
+
product=product,
|
|
63
|
+
debugging_mode=debugging_mode,
|
|
64
|
+
**product_kwargs,
|
|
65
|
+
)
|
|
66
|
+
return filepaths
|
|
67
|
+
# If no files available, print informative message
|
|
68
|
+
except Exception as e:
|
|
69
|
+
temporal_resolution = ""
|
|
70
|
+
if "sample_interval" in product_kwargs and "rolling" in product_kwargs:
|
|
71
|
+
temporal_resolution = define_temporal_resolution(
|
|
72
|
+
seconds=product_kwargs["sample_interval"],
|
|
73
|
+
rolling=product_kwargs["rolling"],
|
|
74
|
+
)
|
|
75
|
+
print(str(e))
|
|
76
|
+
msg = (
|
|
77
|
+
f"{product} processing of {data_source} {campaign_name} {station_name} "
|
|
78
|
+
f"has not been launched because of missing {product} {temporal_resolution} data."
|
|
79
|
+
)
|
|
80
|
+
print(msg)
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def run_product_generation(
|
|
85
|
+
product: str,
|
|
86
|
+
logs_dir: str,
|
|
87
|
+
logs_filename: str,
|
|
88
|
+
parallel: bool,
|
|
89
|
+
verbose: bool,
|
|
90
|
+
folder_partitioning: str,
|
|
91
|
+
core_func: callable,
|
|
92
|
+
core_func_kwargs: dict,
|
|
93
|
+
pass_logger=False,
|
|
94
|
+
):
|
|
95
|
+
"""
|
|
96
|
+
Generic wrapper for DISDRODB product generation.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
product : str
|
|
101
|
+
Product name (e.g., "L0A", "L0B", ...).
|
|
102
|
+
|
|
103
|
+
logs_dir : str
|
|
104
|
+
Logs directory.
|
|
105
|
+
logs_filename : str
|
|
106
|
+
Logs filename.
|
|
107
|
+
parallel : bool
|
|
108
|
+
Parallel flag (for logger).
|
|
109
|
+
verbose : bool
|
|
110
|
+
Verbose logging flag.
|
|
111
|
+
folder_partitioning : str
|
|
112
|
+
Partitioning scheme.
|
|
113
|
+
core_func : callable
|
|
114
|
+
Function with signature `core_func(logger)` that does the product-specific work.
|
|
115
|
+
Must return an xarray.Dataset or pandas.DataFrame (used to determine log subdir).
|
|
116
|
+
"""
|
|
117
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
118
|
+
# Initialize log file
|
|
119
|
+
logger, tmp_logger_filepath = create_logger_file(
|
|
120
|
+
logs_dir=tmpdir,
|
|
121
|
+
filename=logs_filename,
|
|
122
|
+
parallel=parallel,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Inform that product creation has started
|
|
126
|
+
log_info(logger, f"{product} processing of {logs_filename} has started.", verbose=verbose)
|
|
127
|
+
|
|
128
|
+
# Initialize object
|
|
129
|
+
obj = None # if None, means the product creation failed
|
|
130
|
+
|
|
131
|
+
# Add logger to core_func_kwargs if specified
|
|
132
|
+
if pass_logger:
|
|
133
|
+
core_func_kwargs["logger"] = logger
|
|
134
|
+
|
|
135
|
+
# Try product creation
|
|
136
|
+
try:
|
|
137
|
+
# Run product creation
|
|
138
|
+
obj = core_func(**core_func_kwargs)
|
|
139
|
+
|
|
140
|
+
# Inform that product creation has ended
|
|
141
|
+
log_info(logger, f"{product} processing of {logs_filename} has ended.", verbose=verbose)
|
|
142
|
+
|
|
143
|
+
# Report error if the case
|
|
144
|
+
except Exception as e:
|
|
145
|
+
log_error(logger, f"{type(e).__name__}: {e}", verbose=verbose)
|
|
146
|
+
|
|
147
|
+
finally:
|
|
148
|
+
# Close logger
|
|
149
|
+
close_logger(logger)
|
|
150
|
+
|
|
151
|
+
# Move log file to final logs directory
|
|
152
|
+
success_flag = obj is not None
|
|
153
|
+
if success_flag: # and "time" in obj and len(obj["time"]) > 0:
|
|
154
|
+
logs_dir = define_file_folder_path(obj, dir_path=logs_dir, folder_partitioning=folder_partitioning)
|
|
155
|
+
os.makedirs(logs_dir, exist_ok=True)
|
|
156
|
+
if tmp_logger_filepath is not None: # (when running pytest, tmp_logger_filepath is None)
|
|
157
|
+
logger_filepath = os.path.join(logs_dir, os.path.basename(tmp_logger_filepath))
|
|
158
|
+
shutil.move(tmp_logger_filepath, logger_filepath)
|
|
159
|
+
else:
|
|
160
|
+
logger_filepath = None
|
|
161
|
+
|
|
162
|
+
# Free memory
|
|
163
|
+
del obj
|
|
164
|
+
|
|
165
|
+
# Return logger filepath
|
|
166
|
+
return logger_filepath
|