disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +64 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +139 -9
- disdrodb/api/configs.py +4 -2
- disdrodb/api/info.py +10 -10
- disdrodb/api/io.py +237 -18
- disdrodb/api/path.py +81 -75
- disdrodb/api/search.py +6 -6
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
- disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +5 -5
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +30 -30
- disdrodb/l0/l0b_nc_processing.py +108 -2
- disdrodb/l0/l0b_processing.py +4 -4
- disdrodb/l0/l0c_processing.py +5 -13
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l0/routines.py +105 -14
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +45 -44
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1/routines.py +35 -43
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/event.py +27 -120
- disdrodb/l2/processing.py +267 -116
- disdrodb/l2/routines.py +618 -254
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +463 -144
- disdrodb/psd/models.py +8 -5
- disdrodb/routines.py +3 -3
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +56 -36
- disdrodb/scattering/permittivity.py +486 -0
- disdrodb/scattering/routines.py +701 -159
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4120 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +59 -8
- disdrodb/utils/dataframe.py +61 -7
- disdrodb/utils/directories.py +35 -15
- disdrodb/utils/encoding.py +33 -19
- disdrodb/utils/logger.py +13 -6
- disdrodb/utils/manipulations.py +71 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +165 -19
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +2 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +327 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
- disdrodb/l1/encoding_attrs.py +0 -642
- disdrodb/l2/processing_options.py +0 -213
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
disdrodb/l2/routines.py
CHANGED
|
@@ -16,40 +16,47 @@
|
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Implements routines for DISDRODB L2 processing."""
|
|
18
18
|
|
|
19
|
+
import copy
|
|
19
20
|
import datetime
|
|
21
|
+
import json
|
|
20
22
|
import logging
|
|
21
23
|
import os
|
|
24
|
+
import shutil
|
|
22
25
|
import time
|
|
23
26
|
from typing import Optional
|
|
24
27
|
|
|
25
28
|
import dask
|
|
26
29
|
import numpy as np
|
|
27
30
|
import pandas as pd
|
|
28
|
-
import xarray as xr
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
from disdrodb import is_pytmatrix_available
|
|
32
|
+
from disdrodb.api.checks import check_station_inputs
|
|
32
33
|
from disdrodb.api.create_directories import (
|
|
33
34
|
create_logs_directory,
|
|
34
35
|
create_product_directory,
|
|
35
36
|
)
|
|
36
|
-
from disdrodb.api.info import group_filepaths
|
|
37
|
-
from disdrodb.api.io import find_files
|
|
37
|
+
from disdrodb.api.info import get_start_end_time_from_filepaths, group_filepaths
|
|
38
|
+
from disdrodb.api.io import find_files, open_netcdf_files
|
|
38
39
|
from disdrodb.api.path import (
|
|
39
|
-
|
|
40
|
+
define_file_folder_path,
|
|
40
41
|
define_l2e_filename,
|
|
41
42
|
define_l2m_filename,
|
|
43
|
+
define_temporal_resolution,
|
|
42
44
|
)
|
|
43
45
|
from disdrodb.api.search import get_required_product
|
|
44
|
-
from disdrodb.configs import
|
|
46
|
+
from disdrodb.configs import (
|
|
47
|
+
get_data_archive_dir,
|
|
48
|
+
get_metadata_archive_dir,
|
|
49
|
+
get_model_options,
|
|
50
|
+
get_product_options,
|
|
51
|
+
get_product_temporal_resolutions,
|
|
52
|
+
)
|
|
45
53
|
from disdrodb.l1.resampling import resample_dataset
|
|
46
|
-
from disdrodb.l2.event import
|
|
54
|
+
from disdrodb.l2.event import get_files_partitions, group_timesteps_into_event
|
|
47
55
|
from disdrodb.l2.processing import (
|
|
48
|
-
generate_l2_empirical,
|
|
49
|
-
generate_l2_model,
|
|
50
56
|
generate_l2_radar,
|
|
57
|
+
generate_l2e,
|
|
58
|
+
generate_l2m,
|
|
51
59
|
)
|
|
52
|
-
from disdrodb.l2.processing_options import get_l2_processing_options
|
|
53
60
|
from disdrodb.metadata import read_station_metadata
|
|
54
61
|
from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
|
|
55
62
|
from disdrodb.utils.list import flatten_list
|
|
@@ -62,12 +69,382 @@ from disdrodb.utils.logger import (
|
|
|
62
69
|
log_error,
|
|
63
70
|
log_info,
|
|
64
71
|
)
|
|
65
|
-
from disdrodb.utils.time import
|
|
72
|
+
from disdrodb.utils.time import (
|
|
73
|
+
ensure_sample_interval_in_seconds,
|
|
74
|
+
ensure_sorted_by_time,
|
|
75
|
+
generate_time_blocks,
|
|
76
|
+
get_resampling_information,
|
|
77
|
+
)
|
|
66
78
|
from disdrodb.utils.writer import write_product
|
|
67
79
|
|
|
68
80
|
logger = logging.getLogger(__name__)
|
|
69
81
|
|
|
70
82
|
|
|
83
|
+
####----------------------------------------------------------------------------.
|
|
84
|
+
def identify_events(
|
|
85
|
+
filepaths,
|
|
86
|
+
parallel=False,
|
|
87
|
+
min_drops=5,
|
|
88
|
+
neighbor_min_size=2,
|
|
89
|
+
neighbor_time_interval="5MIN",
|
|
90
|
+
event_max_time_gap="6H",
|
|
91
|
+
event_min_duration="5MIN",
|
|
92
|
+
event_min_size=3,
|
|
93
|
+
):
|
|
94
|
+
"""Return a list of rainy events.
|
|
95
|
+
|
|
96
|
+
Rainy timesteps are defined when N > min_drops.
|
|
97
|
+
Any rainy isolated timesteps (based on neighborhood criteria) is removed.
|
|
98
|
+
Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
|
|
99
|
+
exceed `event_max_time_gap`. Finally, events that do not meet minimum size or duration
|
|
100
|
+
requirements are filtered out.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
filepaths: list
|
|
105
|
+
List of L1C file paths.
|
|
106
|
+
parallel: bool
|
|
107
|
+
Whether to load the files in parallel.
|
|
108
|
+
Set parallel=True only in a multiprocessing environment.
|
|
109
|
+
The default is False.
|
|
110
|
+
neighbor_time_interval : str
|
|
111
|
+
The time interval around a given a timestep defining the neighborhood.
|
|
112
|
+
Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
|
|
113
|
+
neighbor_min_size : int, optional
|
|
114
|
+
The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
|
|
115
|
+
timestep to be considered non-isolated. Isolated timesteps are removed !
|
|
116
|
+
- If `neighbor_min_size=0, then no timestep is considered isolated and no filtering occurs.
|
|
117
|
+
- If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
|
|
118
|
+
- If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
|
|
119
|
+
Defaults to 1.
|
|
120
|
+
event_max_time_gap: str
|
|
121
|
+
The maximum time interval between two timesteps to be considered part of the same event.
|
|
122
|
+
This parameters is used to group timesteps into events !
|
|
123
|
+
event_min_duration : str
|
|
124
|
+
The minimum duration an event must span. Events shorter than this duration are discarded.
|
|
125
|
+
event_min_size : int, optional
|
|
126
|
+
The minimum number of valid timesteps required for an event. Defaults to 1.
|
|
127
|
+
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
list of dict
|
|
131
|
+
A list of events, where each event is represented as a dictionary with keys:
|
|
132
|
+
- "start_time": np.datetime64, start time of the event
|
|
133
|
+
- "end_time": np.datetime64, end time of the event
|
|
134
|
+
- "duration": np.timedelta64, duration of the event
|
|
135
|
+
- "n_timesteps": int, number of valid timesteps in the event
|
|
136
|
+
"""
|
|
137
|
+
# Open datasets in parallel
|
|
138
|
+
ds = open_netcdf_files(filepaths, variables=["time", "N"], parallel=parallel)
|
|
139
|
+
# Sort dataset by time
|
|
140
|
+
ds = ensure_sorted_by_time(ds)
|
|
141
|
+
# Define candidate timesteps to group into events
|
|
142
|
+
idx_valid = ds["N"].data > min_drops
|
|
143
|
+
timesteps = ds["time"].data[idx_valid]
|
|
144
|
+
# Define event list
|
|
145
|
+
event_list = group_timesteps_into_event(
|
|
146
|
+
timesteps=timesteps,
|
|
147
|
+
neighbor_min_size=neighbor_min_size,
|
|
148
|
+
neighbor_time_interval=neighbor_time_interval,
|
|
149
|
+
event_max_time_gap=event_max_time_gap,
|
|
150
|
+
event_min_duration=event_min_duration,
|
|
151
|
+
event_min_size=event_min_size,
|
|
152
|
+
)
|
|
153
|
+
return event_list
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def identify_time_partitions(filepaths: list[str], freq: str) -> list[dict]:
|
|
157
|
+
"""Identify the set of time blocks covered by files.
|
|
158
|
+
|
|
159
|
+
The result is a minimal, sorted, and unique set of time partitions.
|
|
160
|
+
|
|
161
|
+
Parameters
|
|
162
|
+
----------
|
|
163
|
+
filepaths : list of str
|
|
164
|
+
Paths to input files from which start and end times will be extracted
|
|
165
|
+
via `get_start_end_time_from_filepaths`.
|
|
166
|
+
freq : {'none', 'hour', 'day', 'month', 'quarter', 'season', 'year'}
|
|
167
|
+
Frequency determining the granularity of candidate blocks.
|
|
168
|
+
See `generate_time_blocks` for more details.
|
|
169
|
+
|
|
170
|
+
Returns
|
|
171
|
+
-------
|
|
172
|
+
list of dict
|
|
173
|
+
A list of dictionaries, each containing:
|
|
174
|
+
|
|
175
|
+
- `start_time` (numpy.datetime64[s])
|
|
176
|
+
Inclusive start of a time block.
|
|
177
|
+
- `end_time` (numpy.datetime64[s])
|
|
178
|
+
Inclusive end of a time block.
|
|
179
|
+
|
|
180
|
+
Only those blocks that overlap at least one file's interval are returned.
|
|
181
|
+
The list is sorted by `start_time` and contains no duplicate blocks.
|
|
182
|
+
"""
|
|
183
|
+
# Define file start time and end time
|
|
184
|
+
start_times, end_times = get_start_end_time_from_filepaths(filepaths)
|
|
185
|
+
|
|
186
|
+
# Define files time coverage
|
|
187
|
+
start_time, end_time = start_times.min(), end_times.max()
|
|
188
|
+
|
|
189
|
+
# Compute candidate time blocks
|
|
190
|
+
blocks = generate_time_blocks(start_time, end_time, freq=freq) # end_time non inclusive is correct?
|
|
191
|
+
|
|
192
|
+
# Select time blocks with files
|
|
193
|
+
mask = (blocks[:, 0][:, None] <= end_times) & (blocks[:, 1][:, None] >= start_times)
|
|
194
|
+
blocks = blocks[mask.any(axis=1)]
|
|
195
|
+
|
|
196
|
+
# Ensure sorted unique time blocks
|
|
197
|
+
order = np.argsort(blocks[:, 0])
|
|
198
|
+
blocks = np.unique(blocks[order], axis=0)
|
|
199
|
+
|
|
200
|
+
# Convert to list of dicts
|
|
201
|
+
list_time_blocks = [{"start_time": start_time, "end_time": end_time} for start_time, end_time in blocks]
|
|
202
|
+
return list_time_blocks
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def is_possible_product(accumulation_interval, sample_interval, rolling):
|
|
206
|
+
"""Assess if production is possible given the requested accumulation interval and source sample_interval."""
|
|
207
|
+
# Avoid rolling product generation at source sample interval
|
|
208
|
+
if rolling and accumulation_interval == sample_interval:
|
|
209
|
+
return False
|
|
210
|
+
# Avoid product generation if the accumulation_interval is less than the sample interval
|
|
211
|
+
if accumulation_interval < sample_interval:
|
|
212
|
+
return False
|
|
213
|
+
# Avoid producti generation if accumulation_interval is not multiple of sample_interval
|
|
214
|
+
return accumulation_interval % sample_interval == 0
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def define_temporal_partitions(filepaths, strategy, parallel, strategy_options):
|
|
218
|
+
"""Define temporal file processing partitions.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
filepaths : list
|
|
223
|
+
List of files paths to be processed
|
|
224
|
+
|
|
225
|
+
strategy : str
|
|
226
|
+
Which partitioning strategy to apply:
|
|
227
|
+
|
|
228
|
+
- ``'time_block'`` defines fixed time intervals (e.g. monthly) covering input files.
|
|
229
|
+
- ``'event'`` detect clusters of precipitation ("events").
|
|
230
|
+
|
|
231
|
+
parallel : bool
|
|
232
|
+
If True, parallel data loading is used to identify events.
|
|
233
|
+
|
|
234
|
+
strategy_options : dict
|
|
235
|
+
Dictionary with strategy-specific parameters:
|
|
236
|
+
|
|
237
|
+
If ``strategy == 'time_block'``, supported options are:
|
|
238
|
+
|
|
239
|
+
- ``freq``: Time unit for blocks. One of {'year', 'season', 'month', 'day'}.
|
|
240
|
+
|
|
241
|
+
See identify_time_partitions for more information.
|
|
242
|
+
|
|
243
|
+
If ``strategy == 'event'``, supported options are:
|
|
244
|
+
|
|
245
|
+
- ``min_drops`` : int
|
|
246
|
+
Minimum number of drops to consider a timestep.
|
|
247
|
+
- ``neighbor_min_size`` : int
|
|
248
|
+
Minimum cluster size for merging neighboring events.
|
|
249
|
+
- ``neighbor_time_interval`` : str
|
|
250
|
+
Time window (e.g. "5MIN") to merge adjacent clusters.
|
|
251
|
+
- ``event_max_time_gap`` : str
|
|
252
|
+
Maximum allowed gap (e.g. "6H") within a single event.
|
|
253
|
+
- ``event_min_duration`` : str
|
|
254
|
+
Minimum total duration (e.g. "5MIN") of an event.
|
|
255
|
+
- ``event_min_size`` : int
|
|
256
|
+
Minimum number of records in an event.
|
|
257
|
+
|
|
258
|
+
See identify_events for more information.
|
|
259
|
+
|
|
260
|
+
Returns
|
|
261
|
+
-------
|
|
262
|
+
list
|
|
263
|
+
A list of dictionaries, each containing:
|
|
264
|
+
|
|
265
|
+
- ``start_time`` (numpy.datetime64[s])
|
|
266
|
+
Inclusive start of an event or time block.
|
|
267
|
+
- ``end_time`` (numpy.datetime64[s])
|
|
268
|
+
Inclusive end of an event or time block.
|
|
269
|
+
|
|
270
|
+
Notes
|
|
271
|
+
-----
|
|
272
|
+
- The ``'event'`` strategy requires loading data into memory to identify clusters.
|
|
273
|
+
- The ``'time_block'`` strategy can operate on metadata alone, without full data loading.
|
|
274
|
+
- The ``'event'`` strategy implicitly performs data selection on which files to process !
|
|
275
|
+
- The ``'time_block'`` strategy does not performs data selection on which files to process !
|
|
276
|
+
"""
|
|
277
|
+
if strategy not in ["time_block", "event"]:
|
|
278
|
+
raise ValueError(f"Unknown strategy: {strategy!r}. Must be 'time_block' or 'event'.")
|
|
279
|
+
if strategy == "event":
|
|
280
|
+
return identify_events(filepaths, parallel=parallel, **strategy_options)
|
|
281
|
+
|
|
282
|
+
return identify_time_partitions(filepaths, **strategy_options)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class ProcessingOptions:
|
|
286
|
+
"""Define L2 products processing options."""
|
|
287
|
+
|
|
288
|
+
def __init__(self, product, filepaths, parallel, temporal_resolutions=None):
|
|
289
|
+
"""Define L2 products processing options."""
|
|
290
|
+
import disdrodb
|
|
291
|
+
|
|
292
|
+
# ---------------------------------------------------------------------.
|
|
293
|
+
# Define temporal resolutions for which to retrieve processing options
|
|
294
|
+
if temporal_resolutions is None:
|
|
295
|
+
temporal_resolutions = get_product_temporal_resolutions(product)
|
|
296
|
+
elif isinstance(temporal_resolutions, str):
|
|
297
|
+
temporal_resolutions = [temporal_resolutions]
|
|
298
|
+
|
|
299
|
+
# ---------------------------------------------------------------------.
|
|
300
|
+
# Get product options at various temporal resolutions
|
|
301
|
+
dict_product_options = {
|
|
302
|
+
temporal_resolution: get_product_options(product, temporal_resolution=temporal_resolution)
|
|
303
|
+
for temporal_resolution in temporal_resolutions
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
# ---------------------------------------------------------------------.
|
|
307
|
+
# Group filepaths by source sample intervals
|
|
308
|
+
# - Typically the sample interval is fixed and is just one
|
|
309
|
+
# - Some stations might change the sample interval along the years
|
|
310
|
+
# - For each sample interval, separated processing take place here after !
|
|
311
|
+
dict_filepaths = group_filepaths(filepaths, groups="sample_interval")
|
|
312
|
+
|
|
313
|
+
# ---------------------------------------------------------------------.
|
|
314
|
+
# Retrieve processing information for each temporal resolution
|
|
315
|
+
dict_folder_partitioning = {}
|
|
316
|
+
dict_files_partitions = {}
|
|
317
|
+
_cache_dict_list_partitions: dict[str, dict] = {}
|
|
318
|
+
for temporal_resolution in temporal_resolutions:
|
|
319
|
+
|
|
320
|
+
# -------------------------------------------------------------------------.
|
|
321
|
+
# Retrieve product options
|
|
322
|
+
product_options = dict_product_options[temporal_resolution]
|
|
323
|
+
|
|
324
|
+
# Retrieve accumulation_interval and rolling option
|
|
325
|
+
accumulation_interval, rolling = get_resampling_information(temporal_resolution)
|
|
326
|
+
|
|
327
|
+
# Extract processing options
|
|
328
|
+
archive_options = product_options.pop("archive_options")
|
|
329
|
+
|
|
330
|
+
dict_product_options[temporal_resolution] = product_options
|
|
331
|
+
# -------------------------------------------------------------------------.
|
|
332
|
+
# Define folder partitioning
|
|
333
|
+
if "folder_partitioning" not in archive_options:
|
|
334
|
+
dict_folder_partitioning[temporal_resolution] = disdrodb.config.get("folder_partitioning")
|
|
335
|
+
else:
|
|
336
|
+
dict_folder_partitioning[temporal_resolution] = archive_options.pop("folder_partitioning")
|
|
337
|
+
|
|
338
|
+
# -------------------------------------------------------------------------.
|
|
339
|
+
# Define list of temporal partitions
|
|
340
|
+
# - [{start_time:xxx, end_time: xxx}, ....]
|
|
341
|
+
# - Either strategy: "event" or "time_block" or save_by_time_block"
|
|
342
|
+
# - "event" requires loading data into memory to identify events
|
|
343
|
+
# --> Does some data filtering on what to process !
|
|
344
|
+
# - "time_block" does not require loading data into memory
|
|
345
|
+
# --> Does not do data filtering on what to process !
|
|
346
|
+
# --> Here we cache dict_list_partitions so that we don't need to recompute
|
|
347
|
+
# stuffs if processing options are the same
|
|
348
|
+
key = json.dumps(archive_options, sort_keys=True)
|
|
349
|
+
if key not in _cache_dict_list_partitions:
|
|
350
|
+
_cache_dict_list_partitions[key] = {
|
|
351
|
+
sample_interval: define_temporal_partitions(filepaths, parallel=parallel, **archive_options)
|
|
352
|
+
for sample_interval, filepaths in dict_filepaths.items()
|
|
353
|
+
}
|
|
354
|
+
dict_list_partitions = _cache_dict_list_partitions[key].copy() # To avoid in-place replacement
|
|
355
|
+
|
|
356
|
+
# ------------------------------------------------------------------.
|
|
357
|
+
# Group filepaths by temporal partitions
|
|
358
|
+
# - This is done separately for each possible source sample interval
|
|
359
|
+
# - It groups filepaths by start_time and end_time provided by list_partitions
|
|
360
|
+
# - Here 'events' can also simply be period of times ('day', 'months', ...)
|
|
361
|
+
# - When aggregating/resampling/accumulating data, we need to load also
|
|
362
|
+
# some data after the actual event end_time to ensure that the resampled dataset
|
|
363
|
+
# contains the event_end_time
|
|
364
|
+
# --> get_files_partitions adjust the event end_time to accounts for the required "border" data.
|
|
365
|
+
files_partitions = [
|
|
366
|
+
get_files_partitions(
|
|
367
|
+
list_partitions=list_partitions,
|
|
368
|
+
filepaths=dict_filepaths[sample_interval],
|
|
369
|
+
sample_interval=sample_interval,
|
|
370
|
+
accumulation_interval=accumulation_interval,
|
|
371
|
+
rolling=rolling,
|
|
372
|
+
)
|
|
373
|
+
for sample_interval, list_partitions in dict_list_partitions.items()
|
|
374
|
+
if product != "L2E"
|
|
375
|
+
or is_possible_product(
|
|
376
|
+
accumulation_interval=accumulation_interval,
|
|
377
|
+
sample_interval=sample_interval,
|
|
378
|
+
rolling=rolling,
|
|
379
|
+
)
|
|
380
|
+
]
|
|
381
|
+
files_partitions = flatten_list(files_partitions)
|
|
382
|
+
dict_files_partitions[temporal_resolution] = files_partitions
|
|
383
|
+
|
|
384
|
+
# ------------------------------------------------------------------.
|
|
385
|
+
# Keep only temporal_resolutions for which events could be defined
|
|
386
|
+
# - Remove e.g when not compatible accumulation_interval with source sample_interval
|
|
387
|
+
temporal_resolutions = [
|
|
388
|
+
temporal_resolution
|
|
389
|
+
for temporal_resolution in temporal_resolutions
|
|
390
|
+
if len(dict_files_partitions[temporal_resolution]) > 0
|
|
391
|
+
]
|
|
392
|
+
# ------------------------------------------------------------------.
|
|
393
|
+
# Add attributes
|
|
394
|
+
self.temporal_resolutions = temporal_resolutions
|
|
395
|
+
self.dict_files_partitions = dict_files_partitions
|
|
396
|
+
self.dict_product_options = dict_product_options
|
|
397
|
+
self.dict_folder_partitioning = dict_folder_partitioning
|
|
398
|
+
|
|
399
|
+
def get_files_partitions(self, temporal_resolution):
|
|
400
|
+
"""Return files partitions dictionary for a specific L2E product."""
|
|
401
|
+
return self.dict_files_partitions[temporal_resolution]
|
|
402
|
+
|
|
403
|
+
def get_product_options(self, temporal_resolution):
|
|
404
|
+
"""Return product options dictionary for a specific L2E product."""
|
|
405
|
+
return self.dict_product_options[temporal_resolution]
|
|
406
|
+
|
|
407
|
+
def get_folder_partitioning(self, temporal_resolution):
|
|
408
|
+
"""Return the folder partitioning for a specific L2E product."""
|
|
409
|
+
# to be used for logs and files !
|
|
410
|
+
return self.dict_folder_partitioning[temporal_resolution]
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def precompute_scattering_tables(
|
|
414
|
+
frequency,
|
|
415
|
+
num_points,
|
|
416
|
+
diameter_max,
|
|
417
|
+
canting_angle_std,
|
|
418
|
+
axis_ratio_model,
|
|
419
|
+
permittivity_model,
|
|
420
|
+
water_temperature,
|
|
421
|
+
elevation_angle,
|
|
422
|
+
verbose=True,
|
|
423
|
+
):
|
|
424
|
+
"""Precompute the pyTMatrix scattering tables required for radar variables simulations."""
|
|
425
|
+
from disdrodb.scattering.routines import get_list_simulations_params, load_scatterer
|
|
426
|
+
|
|
427
|
+
# Define parameters for all requested simulations
|
|
428
|
+
list_params = get_list_simulations_params(
|
|
429
|
+
frequency=frequency,
|
|
430
|
+
num_points=num_points,
|
|
431
|
+
diameter_max=diameter_max,
|
|
432
|
+
canting_angle_std=canting_angle_std,
|
|
433
|
+
axis_ratio_model=axis_ratio_model,
|
|
434
|
+
permittivity_model=permittivity_model,
|
|
435
|
+
water_temperature=water_temperature,
|
|
436
|
+
elevation_angle=elevation_angle,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
# Compute require scattering tables
|
|
440
|
+
for params in list_params:
|
|
441
|
+
# Initialize scattering table
|
|
442
|
+
_ = load_scatterer(
|
|
443
|
+
verbose=verbose,
|
|
444
|
+
**params,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
|
|
71
448
|
####----------------------------------------------------------------------------.
|
|
72
449
|
#### L2E
|
|
73
450
|
|
|
@@ -80,15 +457,13 @@ def _generate_l2e(
|
|
|
80
457
|
filepaths,
|
|
81
458
|
data_dir,
|
|
82
459
|
logs_dir,
|
|
460
|
+
folder_partitioning,
|
|
83
461
|
campaign_name,
|
|
84
462
|
station_name,
|
|
85
463
|
# L2E options
|
|
86
464
|
accumulation_interval,
|
|
87
465
|
rolling,
|
|
88
|
-
|
|
89
|
-
# Radar options
|
|
90
|
-
radar_simulation_enabled,
|
|
91
|
-
radar_simulation_options,
|
|
466
|
+
product_options,
|
|
92
467
|
# Processing options
|
|
93
468
|
force,
|
|
94
469
|
verbose,
|
|
@@ -98,53 +473,37 @@ def _generate_l2e(
|
|
|
98
473
|
# Define product name
|
|
99
474
|
product = "L2E"
|
|
100
475
|
|
|
476
|
+
# Copy to avoid in-place replacement (outside this function)
|
|
477
|
+
product_options = product_options.copy()
|
|
478
|
+
|
|
101
479
|
# -----------------------------------------------------------------.
|
|
102
480
|
# Create file logger
|
|
103
|
-
|
|
481
|
+
temporal_resolution = define_temporal_resolution(seconds=accumulation_interval, rolling=rolling)
|
|
104
482
|
starting_time = pd.to_datetime(start_time).strftime("%Y%m%d%H%M%S")
|
|
105
483
|
ending_time = pd.to_datetime(end_time).strftime("%Y%m%d%H%M%S")
|
|
106
|
-
|
|
484
|
+
expected_filename = f"L2E.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
|
|
107
485
|
logger, logger_filepath = create_logger_file(
|
|
108
486
|
logs_dir=logs_dir,
|
|
109
|
-
filename=
|
|
487
|
+
filename=expected_filename,
|
|
110
488
|
parallel=parallel,
|
|
111
489
|
)
|
|
112
490
|
##------------------------------------------------------------------------.
|
|
113
491
|
# Log start processing
|
|
114
|
-
msg = f"{product}
|
|
492
|
+
msg = f"{product} creation of {expected_filename} has started."
|
|
115
493
|
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
494
|
+
success_flag = False
|
|
116
495
|
|
|
117
496
|
##------------------------------------------------------------------------.
|
|
118
497
|
### Core computation
|
|
119
498
|
try:
|
|
120
499
|
# ------------------------------------------------------------------------.
|
|
121
500
|
#### Open the dataset over the period of interest
|
|
122
|
-
|
|
123
|
-
list_ds = [
|
|
124
|
-
xr.open_dataset(filepath, chunks={}, decode_timedelta=False, cache=False, autoclose=True)
|
|
125
|
-
for filepath in filepaths
|
|
126
|
-
]
|
|
127
|
-
# - Concatenate datasets
|
|
128
|
-
ds = xr.concat(list_ds, dim="time", compat="no_conflicts", combine_attrs="override")
|
|
129
|
-
ds = ds.sel(time=slice(start_time, end_time)).compute()
|
|
130
|
-
# - Close file on disk
|
|
131
|
-
_ = [ds.close() for ds in list_ds]
|
|
501
|
+
ds = open_netcdf_files(filepaths, start_time=start_time, end_time=end_time, parallel=False)
|
|
132
502
|
|
|
133
503
|
##------------------------------------------------------------------------.
|
|
134
504
|
#### Resample dataset
|
|
135
|
-
#
|
|
136
|
-
|
|
137
|
-
# - When we regularize, we infill with NaN
|
|
138
|
-
# - When we aggregate with sum, we don't skip NaN
|
|
139
|
-
# --> Aggregation with original missing timesteps currently results in NaN !
|
|
140
|
-
# TODO: Add tolerance on fraction of missing timesteps for large accumulation_intervals
|
|
141
|
-
# TODO: NaN should not be set as 0 !
|
|
142
|
-
ds["drop_number"] = xr.where(np.isnan(ds["drop_number"]), 0, ds["drop_number"])
|
|
143
|
-
|
|
144
|
-
# - Regularize dataset
|
|
145
|
-
# --> Infill missing timesteps with np.Nan
|
|
146
|
-
sample_interval = ensure_sample_interval_in_seconds(ds["sample_interval"]).item()
|
|
147
|
-
ds = regularize_dataset(ds, freq=f"{sample_interval}s")
|
|
505
|
+
# Define sample interval in seconds
|
|
506
|
+
sample_interval = ensure_sample_interval_in_seconds(ds["sample_interval"]).to_numpy().item()
|
|
148
507
|
|
|
149
508
|
# - Resample dataset
|
|
150
509
|
ds = resample_dataset(
|
|
@@ -154,53 +513,58 @@ def _generate_l2e(
|
|
|
154
513
|
rolling=rolling,
|
|
155
514
|
)
|
|
156
515
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
# TODO: Make it a choice !
|
|
162
|
-
indices_valid_timesteps = np.where(
|
|
163
|
-
~np.logical_or(ds["N"].data == 0, np.isnan(ds["N"].data)),
|
|
164
|
-
)[0]
|
|
165
|
-
ds = ds.isel(time=indices_valid_timesteps)
|
|
516
|
+
# Extract L2E processing options
|
|
517
|
+
l2e_options = product_options.get("product_options")
|
|
518
|
+
radar_enabled = product_options.get("radar_enabled")
|
|
519
|
+
radar_options = product_options.get("radar_options")
|
|
166
520
|
|
|
167
521
|
##------------------------------------------------------------------------.
|
|
168
522
|
#### Generate L2E product
|
|
169
|
-
#
|
|
170
|
-
ds
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
523
|
+
# - Only if at least 2 timesteps available
|
|
524
|
+
if ds["time"].size > 2:
|
|
525
|
+
|
|
526
|
+
# Compute L2E variables
|
|
527
|
+
ds = generate_l2e(ds=ds, **l2e_options)
|
|
528
|
+
|
|
529
|
+
# Simulate L2M-based radar variables if asked
|
|
530
|
+
if radar_enabled:
|
|
531
|
+
ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_options)
|
|
532
|
+
ds.update(ds_radar)
|
|
533
|
+
ds.attrs = ds_radar.attrs.copy()
|
|
534
|
+
|
|
535
|
+
# Write netCDF4 dataset
|
|
536
|
+
if ds["time"].size > 1:
|
|
537
|
+
# Define filepath
|
|
538
|
+
filename = define_l2e_filename(
|
|
539
|
+
ds,
|
|
540
|
+
campaign_name=campaign_name,
|
|
541
|
+
station_name=station_name,
|
|
542
|
+
sample_interval=accumulation_interval,
|
|
543
|
+
rolling=rolling,
|
|
544
|
+
)
|
|
545
|
+
folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
|
|
546
|
+
filepath = os.path.join(folder_path, filename)
|
|
547
|
+
# Write file
|
|
548
|
+
write_product(ds, filepath=filepath, force=force)
|
|
549
|
+
|
|
550
|
+
# Update log
|
|
551
|
+
log_info(logger=logger, msg=f"{product} creation of {filename} has ended.", verbose=verbose)
|
|
552
|
+
else:
|
|
553
|
+
log_info(logger=logger, msg="File not created. Less than one timesteps available.", verbose=verbose)
|
|
554
|
+
else:
|
|
555
|
+
log_info(logger=logger, msg="File not created. Less than two timesteps available.", verbose=verbose)
|
|
183
556
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
if
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
campaign_name=campaign_name,
|
|
190
|
-
station_name=station_name,
|
|
191
|
-
sample_interval=accumulation_interval,
|
|
192
|
-
rolling=rolling,
|
|
193
|
-
)
|
|
194
|
-
filepath = os.path.join(data_dir, filename)
|
|
195
|
-
write_product(ds, product=product, filepath=filepath, force=force)
|
|
557
|
+
##--------------------------------------------------------------------.
|
|
558
|
+
#### Define logger file final directory
|
|
559
|
+
if folder_partitioning != "":
|
|
560
|
+
log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
|
|
561
|
+
os.makedirs(log_dst_dir, exist_ok=True)
|
|
196
562
|
|
|
197
563
|
##--------------------------------------------------------------------.
|
|
198
564
|
# Clean environment
|
|
199
565
|
del ds
|
|
200
566
|
|
|
201
|
-
|
|
202
|
-
msg = f"{product} processing of {filename} has ended."
|
|
203
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
567
|
+
success_flag = True
|
|
204
568
|
|
|
205
569
|
##--------------------------------------------------------------------.
|
|
206
570
|
# Otherwise log the error
|
|
@@ -212,22 +576,17 @@ def _generate_l2e(
|
|
|
212
576
|
# Close the file logger
|
|
213
577
|
close_logger(logger)
|
|
214
578
|
|
|
579
|
+
# Move logger file to correct partitioning directory
|
|
580
|
+
if success_flag and folder_partitioning != "" and logger_filepath is not None:
|
|
581
|
+
# Move logger file to correct partitioning directory
|
|
582
|
+
dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
|
|
583
|
+
shutil.move(logger_filepath, dst_filepath)
|
|
584
|
+
logger_filepath = dst_filepath
|
|
585
|
+
|
|
215
586
|
# Return the logger file path
|
|
216
587
|
return logger_filepath
|
|
217
588
|
|
|
218
589
|
|
|
219
|
-
def is_possible_product(accumulation_interval, sample_interval, rolling):
|
|
220
|
-
"""Assess if production is possible given the requested accumulation interval and source sample_interval."""
|
|
221
|
-
# Avoid rolling product generation at source sample interval
|
|
222
|
-
if rolling and accumulation_interval == sample_interval:
|
|
223
|
-
return False
|
|
224
|
-
# Avoid product generation if the accumulation_interval is less than the sample interval
|
|
225
|
-
if accumulation_interval < sample_interval:
|
|
226
|
-
return False
|
|
227
|
-
# Avoid producti generation if accumulation_interval is not multiple of sample_interval
|
|
228
|
-
return accumulation_interval % sample_interval == 0
|
|
229
|
-
|
|
230
|
-
|
|
231
590
|
def run_l2e_station(
|
|
232
591
|
# Station arguments
|
|
233
592
|
data_source,
|
|
@@ -248,12 +607,12 @@ def run_l2e_station(
|
|
|
248
607
|
This function is intended to be called through the ``disdrodb_run_l2e_station``
|
|
249
608
|
command-line interface.
|
|
250
609
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
The DISDRODB
|
|
254
|
-
per custom block of time (i.e day/month/year) or
|
|
610
|
+
This routine generates L2E files.
|
|
611
|
+
Files are defined based on the DISDRODB archive settings options.
|
|
612
|
+
The DISDRODB archive settings allows to produce L2E files either
|
|
613
|
+
per custom block of time (i.e day/month/year) or per blocks of (rainy) events.
|
|
255
614
|
|
|
256
|
-
For stations with varying measurement intervals, DISDRODB defines a separate list of
|
|
615
|
+
For stations with varying measurement intervals, DISDRODB defines a separate list of partitions
|
|
257
616
|
for each measurement interval option. In other words, DISDRODB does not
|
|
258
617
|
mix files with data acquired at different sample intervals when resampling the data.
|
|
259
618
|
|
|
@@ -297,6 +656,14 @@ def run_l2e_station(
|
|
|
297
656
|
# Retrieve DISDRODB Metadata Archive directory
|
|
298
657
|
metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir=metadata_archive_dir)
|
|
299
658
|
|
|
659
|
+
# Check valid data_source, campaign_name, and station_name
|
|
660
|
+
check_station_inputs(
|
|
661
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
662
|
+
data_source=data_source,
|
|
663
|
+
campaign_name=campaign_name,
|
|
664
|
+
station_name=station_name,
|
|
665
|
+
)
|
|
666
|
+
|
|
300
667
|
# ------------------------------------------------------------------------.
|
|
301
668
|
# Start processing
|
|
302
669
|
if verbose:
|
|
@@ -316,7 +683,7 @@ def run_l2e_station(
|
|
|
316
683
|
station_name=station_name,
|
|
317
684
|
product=required_product,
|
|
318
685
|
# Processing options
|
|
319
|
-
debugging_mode=
|
|
686
|
+
debugging_mode=debugging_mode,
|
|
320
687
|
)
|
|
321
688
|
except Exception as e:
|
|
322
689
|
print(str(e)) # Case where no file paths available
|
|
@@ -326,100 +693,44 @@ def run_l2e_station(
|
|
|
326
693
|
# If no data available, print error message and return None
|
|
327
694
|
if flag_not_available_data:
|
|
328
695
|
msg = (
|
|
329
|
-
f"{product} processing of {data_source} {campaign_name} {station_name}"
|
|
696
|
+
f"{product} processing of {data_source} {campaign_name} {station_name} "
|
|
330
697
|
+ f"has not been launched because of missing {required_product} data."
|
|
331
698
|
)
|
|
332
699
|
print(msg)
|
|
333
700
|
return
|
|
334
701
|
|
|
335
|
-
#
|
|
336
|
-
|
|
337
|
-
# - Each dictionary item contains the processing options for a given rolling/accumulation_interval combo
|
|
338
|
-
l2_processing_options = get_l2_processing_options()
|
|
339
|
-
|
|
340
|
-
# ---------------------------------------------------------------------.
|
|
341
|
-
# Group filepaths by sample intervals
|
|
342
|
-
# - Typically the sample interval is fixed
|
|
343
|
-
# - Some stations might change the sample interval along the years
|
|
344
|
-
# - For each sample interval, separated processing take place here after !
|
|
345
|
-
dict_filepaths = group_filepaths(filepaths, groups="sample_interval")
|
|
702
|
+
# Retrieve L2E processing options
|
|
703
|
+
l2e_processing_options = ProcessingOptions(product="L2E", filepaths=filepaths, parallel=parallel)
|
|
346
704
|
|
|
347
705
|
# -------------------------------------------------------------------------.
|
|
348
|
-
#
|
|
349
|
-
# - [(start_time, end_time)]
|
|
350
|
-
# TODO: Here pass event option list !
|
|
351
|
-
# TODO: Implement more general define_events function
|
|
352
|
-
# - Either rainy events
|
|
353
|
-
# - Either time blocks (day/month/year)
|
|
354
|
-
# TODO: Define events identification settings based on accumulation
|
|
355
|
-
# - This is currently done at the source sample interval !
|
|
356
|
-
# - Should we allow event definition for each accumulation interval and
|
|
357
|
-
# move this code inside the loop below
|
|
358
|
-
|
|
359
|
-
# sample_interval = list(dict_filepaths)[0]
|
|
360
|
-
# filepaths = dict_filepaths[sample_interval]
|
|
361
|
-
|
|
362
|
-
dict_list_events = {
|
|
363
|
-
sample_interval: identify_events(filepaths, parallel=parallel)
|
|
364
|
-
for sample_interval, filepaths in dict_filepaths.items()
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
# ---------------------------------------------------------------------.
|
|
368
|
-
# Subset for debugging mode
|
|
369
|
-
if debugging_mode:
|
|
370
|
-
dict_list_events = {
|
|
371
|
-
sample_interval: list_events[0 : min(len(list_events), 3)]
|
|
372
|
-
for sample_interval, list_events in dict_list_events.items()
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
# ---------------------------------------------------------------------.
|
|
376
|
-
# Loop
|
|
706
|
+
# Generate products for each temporal resolution
|
|
377
707
|
# rolling = False
|
|
378
708
|
# accumulation_interval = 60
|
|
379
|
-
#
|
|
380
|
-
#
|
|
381
|
-
|
|
709
|
+
# temporal_resolution = "10MIN"
|
|
710
|
+
# folder_partitioning = ""
|
|
711
|
+
# product_options = l2e_processing_options.get_product_options(temporal_resolution)
|
|
382
712
|
|
|
383
|
-
for
|
|
713
|
+
for temporal_resolution in l2e_processing_options.temporal_resolutions:
|
|
714
|
+
# Print progress message
|
|
715
|
+
msg = f"Production of {product} {temporal_resolution} has started."
|
|
716
|
+
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
384
717
|
|
|
385
|
-
# Retrieve
|
|
386
|
-
|
|
718
|
+
# Retrieve event info
|
|
719
|
+
files_partitions = l2e_processing_options.get_files_partitions(temporal_resolution)
|
|
387
720
|
|
|
388
|
-
# Retrieve
|
|
389
|
-
|
|
390
|
-
radar_simulation_options = l2_options["radar_simulation_options"]
|
|
391
|
-
if not available_pytmatrix:
|
|
392
|
-
radar_simulation_enabled = False
|
|
721
|
+
# Retrieve folder partitioning (for files and logs)
|
|
722
|
+
folder_partitioning = l2e_processing_options.get_folder_partitioning(temporal_resolution)
|
|
393
723
|
|
|
394
|
-
#
|
|
395
|
-
|
|
396
|
-
# - This is done separately for each possible source sample interval
|
|
397
|
-
# - It groups filepaths by start_time and end_time provided by list_events
|
|
398
|
-
# - Here 'events' can also simply be period of times ('day', 'months', ...)
|
|
399
|
-
# - When aggregating/resampling/accumulating data, we need to load also
|
|
400
|
-
# some data before/after the actual event start_time/end_time
|
|
401
|
-
# - get_events_info adjust the event times to accounts for the required "border" data.
|
|
402
|
-
events_info = [
|
|
403
|
-
get_events_info(
|
|
404
|
-
list_events=list_events,
|
|
405
|
-
filepaths=dict_filepaths[sample_interval],
|
|
406
|
-
accumulation_interval=accumulation_interval,
|
|
407
|
-
rolling=rolling,
|
|
408
|
-
)
|
|
409
|
-
for sample_interval, list_events in dict_list_events.items()
|
|
410
|
-
if is_possible_product(
|
|
411
|
-
accumulation_interval=accumulation_interval,
|
|
412
|
-
sample_interval=sample_interval,
|
|
413
|
-
rolling=rolling,
|
|
414
|
-
)
|
|
415
|
-
]
|
|
416
|
-
events_info = flatten_list(events_info)
|
|
724
|
+
# Retrieve product options
|
|
725
|
+
product_options = l2e_processing_options.get_product_options(temporal_resolution)
|
|
417
726
|
|
|
418
|
-
#
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
727
|
+
# Retrieve accumulation_interval and rolling option
|
|
728
|
+
accumulation_interval, rolling = get_resampling_information(temporal_resolution)
|
|
729
|
+
|
|
730
|
+
# Precompute required scattering tables
|
|
731
|
+
if product_options["radar_enabled"]:
|
|
732
|
+
radar_options = product_options["radar_options"]
|
|
733
|
+
precompute_scattering_tables(verbose=verbose, **radar_options)
|
|
423
734
|
|
|
424
735
|
# ------------------------------------------------------------------.
|
|
425
736
|
# Create product directory
|
|
@@ -459,21 +770,19 @@ def run_l2e_station(
|
|
|
459
770
|
filepaths=event_info["filepaths"],
|
|
460
771
|
data_dir=data_dir,
|
|
461
772
|
logs_dir=logs_dir,
|
|
773
|
+
folder_partitioning=folder_partitioning,
|
|
462
774
|
campaign_name=campaign_name,
|
|
463
775
|
station_name=station_name,
|
|
464
776
|
# L2E options
|
|
465
777
|
rolling=rolling,
|
|
466
778
|
accumulation_interval=accumulation_interval,
|
|
467
|
-
|
|
468
|
-
# Radar options
|
|
469
|
-
radar_simulation_enabled=radar_simulation_enabled,
|
|
470
|
-
radar_simulation_options=radar_simulation_options,
|
|
779
|
+
product_options=product_options,
|
|
471
780
|
# Processing options
|
|
472
781
|
force=force,
|
|
473
782
|
verbose=verbose,
|
|
474
783
|
parallel=parallel,
|
|
475
784
|
)
|
|
476
|
-
for event_info in
|
|
785
|
+
for event_info in files_partitions
|
|
477
786
|
]
|
|
478
787
|
list_logs = dask.compute(*list_tasks) if parallel else list_tasks
|
|
479
788
|
|
|
@@ -507,19 +816,19 @@ def run_l2e_station(
|
|
|
507
816
|
@delayed_if_parallel
|
|
508
817
|
@single_threaded_if_parallel
|
|
509
818
|
def _generate_l2m(
|
|
510
|
-
|
|
819
|
+
start_time,
|
|
820
|
+
end_time,
|
|
821
|
+
filepaths,
|
|
511
822
|
data_dir,
|
|
512
823
|
logs_dir,
|
|
824
|
+
folder_partitioning,
|
|
513
825
|
campaign_name,
|
|
514
826
|
station_name,
|
|
515
827
|
# L2M options
|
|
516
828
|
sample_interval,
|
|
517
829
|
rolling,
|
|
518
830
|
model_name,
|
|
519
|
-
|
|
520
|
-
# Radar options
|
|
521
|
-
radar_simulation_enabled,
|
|
522
|
-
radar_simulation_options,
|
|
831
|
+
product_options,
|
|
523
832
|
# Processing options
|
|
524
833
|
force,
|
|
525
834
|
verbose,
|
|
@@ -529,16 +838,15 @@ def _generate_l2m(
|
|
|
529
838
|
# Define product name
|
|
530
839
|
product = "L2M"
|
|
531
840
|
|
|
532
|
-
#
|
|
533
|
-
|
|
534
|
-
psd_model = l2m_options["models"][model_name]["psd_model"]
|
|
535
|
-
optimization = l2m_options["models"][model_name]["optimization"]
|
|
536
|
-
optimization_kwargs = l2m_options["models"][model_name]["optimization_kwargs"]
|
|
537
|
-
other_options = {k: v for k, v in l2m_options.items() if k != "models"}
|
|
841
|
+
# Copy to avoid in-place replacement (outside this function)
|
|
842
|
+
product_options = product_options.copy()
|
|
538
843
|
|
|
539
844
|
# -----------------------------------------------------------------.
|
|
540
845
|
# Create file logger
|
|
541
|
-
|
|
846
|
+
temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
|
|
847
|
+
starting_time = pd.to_datetime(start_time).strftime("%Y%m%d%H%M%S")
|
|
848
|
+
ending_time = pd.to_datetime(end_time).strftime("%Y%m%d%H%M%S")
|
|
849
|
+
filename = f"L2M_{model_name}.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
|
|
542
850
|
logger, logger_filepath = create_logger_file(
|
|
543
851
|
logs_dir=logs_dir,
|
|
544
852
|
filename=filename,
|
|
@@ -547,43 +855,52 @@ def _generate_l2m(
|
|
|
547
855
|
|
|
548
856
|
##------------------------------------------------------------------------.
|
|
549
857
|
# Log start processing
|
|
550
|
-
msg = f"{product}
|
|
858
|
+
msg = f"{product} creation of {filename} has started."
|
|
551
859
|
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
860
|
+
success_flag = False
|
|
552
861
|
|
|
553
|
-
|
|
862
|
+
##------------------------------------------------------------------------
|
|
554
863
|
### Core computation
|
|
555
864
|
try:
|
|
865
|
+
##------------------------------------------------------------------------.
|
|
866
|
+
# Extract L2M processing options
|
|
867
|
+
l2m_options = product_options.get("product_options")
|
|
868
|
+
radar_enabled = product_options.get("radar_enabled")
|
|
869
|
+
radar_options = product_options.get("radar_options")
|
|
870
|
+
|
|
871
|
+
# Define variables to load
|
|
872
|
+
optimization_kwargs = l2m_options["optimization_kwargs"]
|
|
873
|
+
if "init_method" in optimization_kwargs:
|
|
874
|
+
init_method = optimization_kwargs["init_method"]
|
|
875
|
+
moments = [f"M{order}" for order in init_method.replace("M", "")] + ["M1"]
|
|
876
|
+
else:
|
|
877
|
+
moments = ["M1"]
|
|
878
|
+
|
|
879
|
+
variables = [
|
|
880
|
+
"drop_number_concentration",
|
|
881
|
+
"fall_velocity",
|
|
882
|
+
"D50",
|
|
883
|
+
"Nw",
|
|
884
|
+
"Nt",
|
|
885
|
+
"N",
|
|
886
|
+
*moments,
|
|
887
|
+
]
|
|
888
|
+
|
|
889
|
+
##------------------------------------------------------------------------.
|
|
556
890
|
# Open the raw netCDF
|
|
557
|
-
|
|
558
|
-
variables = [
|
|
559
|
-
"drop_number_concentration",
|
|
560
|
-
"fall_velocity",
|
|
561
|
-
"D50",
|
|
562
|
-
"Nw",
|
|
563
|
-
"Nt",
|
|
564
|
-
"M1",
|
|
565
|
-
"M2",
|
|
566
|
-
"M3",
|
|
567
|
-
"M4",
|
|
568
|
-
"M5",
|
|
569
|
-
"M6",
|
|
570
|
-
]
|
|
571
|
-
ds = ds[variables].load()
|
|
891
|
+
ds = open_netcdf_files(filepaths, start_time=start_time, end_time=end_time, variables=variables)
|
|
572
892
|
|
|
573
893
|
# Produce L2M dataset
|
|
574
|
-
ds =
|
|
894
|
+
ds = generate_l2m(
|
|
575
895
|
ds=ds,
|
|
576
|
-
|
|
577
|
-
optimization=optimization,
|
|
578
|
-
optimization_kwargs=optimization_kwargs,
|
|
579
|
-
**other_options,
|
|
896
|
+
**l2m_options,
|
|
580
897
|
)
|
|
581
898
|
|
|
582
899
|
# Simulate L2M-based radar variables if asked
|
|
583
|
-
if
|
|
584
|
-
ds_radar = generate_l2_radar(ds, parallel=not parallel, **
|
|
900
|
+
if radar_enabled:
|
|
901
|
+
ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_options)
|
|
585
902
|
ds.update(ds_radar)
|
|
586
|
-
ds.attrs = ds_radar.attrs.copy()
|
|
903
|
+
ds.attrs = ds_radar.attrs.copy() # ds_radar contains already all L2M attrs
|
|
587
904
|
|
|
588
905
|
# Write L2M netCDF4 dataset
|
|
589
906
|
if ds["time"].size > 1:
|
|
@@ -596,17 +913,25 @@ def _generate_l2m(
|
|
|
596
913
|
rolling=rolling,
|
|
597
914
|
model_name=model_name,
|
|
598
915
|
)
|
|
599
|
-
|
|
916
|
+
folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
|
|
917
|
+
filepath = os.path.join(folder_path, filename)
|
|
600
918
|
# Write to disk
|
|
601
|
-
write_product(ds,
|
|
919
|
+
write_product(ds, filepath=filepath, force=force)
|
|
920
|
+
|
|
921
|
+
##--------------------------------------------------------------------.
|
|
922
|
+
#### - Define logger file final directory
|
|
923
|
+
if folder_partitioning != "":
|
|
924
|
+
log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
|
|
925
|
+
os.makedirs(log_dst_dir, exist_ok=True)
|
|
602
926
|
|
|
603
927
|
##--------------------------------------------------------------------.
|
|
604
928
|
# Clean environment
|
|
605
929
|
del ds
|
|
606
930
|
|
|
607
931
|
# Log end processing
|
|
608
|
-
msg = f"{product}
|
|
932
|
+
msg = f"{product} creation of {filename} has ended."
|
|
609
933
|
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
934
|
+
success_flag = True
|
|
610
935
|
|
|
611
936
|
##--------------------------------------------------------------------.
|
|
612
937
|
# Otherwise log the error
|
|
@@ -618,6 +943,13 @@ def _generate_l2m(
|
|
|
618
943
|
# Close the file logger
|
|
619
944
|
close_logger(logger)
|
|
620
945
|
|
|
946
|
+
# Move logger file to correct partitioning directory
|
|
947
|
+
if success_flag and folder_partitioning != "" and logger_filepath is not None:
|
|
948
|
+
# Move logger file to correct partitioning directory
|
|
949
|
+
dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
|
|
950
|
+
shutil.move(logger_filepath, dst_filepath)
|
|
951
|
+
logger_filepath = dst_filepath
|
|
952
|
+
|
|
621
953
|
# Return the logger file path
|
|
622
954
|
return logger_filepath
|
|
623
955
|
|
|
@@ -680,6 +1012,14 @@ def run_l2m_station(
|
|
|
680
1012
|
# Retrieve DISDRODB Metadata Archive directory
|
|
681
1013
|
metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
|
|
682
1014
|
|
|
1015
|
+
# Check valid data_source, campaign_name, and station_name
|
|
1016
|
+
check_station_inputs(
|
|
1017
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
1018
|
+
data_source=data_source,
|
|
1019
|
+
campaign_name=campaign_name,
|
|
1020
|
+
station_name=station_name,
|
|
1021
|
+
)
|
|
1022
|
+
|
|
683
1023
|
# ------------------------------------------------------------------------.
|
|
684
1024
|
# Start processing
|
|
685
1025
|
if verbose:
|
|
@@ -687,11 +1027,6 @@ def run_l2m_station(
|
|
|
687
1027
|
msg = f"{product} processing of station {station_name} has started."
|
|
688
1028
|
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
689
1029
|
|
|
690
|
-
# -------------------------------------------------------------------------.
|
|
691
|
-
# Retrieve L2 processing options
|
|
692
|
-
# - Each dictionary item contains the processing options for a given rolling/accumulation_interval combo
|
|
693
|
-
l2_processing_options = get_l2_processing_options()
|
|
694
|
-
|
|
695
1030
|
# ---------------------------------------------------------------------.
|
|
696
1031
|
# Retrieve source sampling interval
|
|
697
1032
|
# - If a station has varying measurement interval over time, choose the smallest one !
|
|
@@ -707,22 +1042,14 @@ def run_l2m_station(
|
|
|
707
1042
|
|
|
708
1043
|
# ---------------------------------------------------------------------.
|
|
709
1044
|
# Loop
|
|
710
|
-
#
|
|
711
|
-
#
|
|
712
|
-
|
|
713
|
-
|
|
1045
|
+
# temporal_resolution = "1MIN"
|
|
1046
|
+
# temporal_resolution = "10MIN"
|
|
1047
|
+
temporal_resolutions = get_product_temporal_resolutions("L2M")
|
|
1048
|
+
print(temporal_resolutions)
|
|
1049
|
+
for temporal_resolution in temporal_resolutions:
|
|
714
1050
|
|
|
715
1051
|
# Retrieve accumulation_interval and rolling option
|
|
716
|
-
accumulation_interval, rolling = get_resampling_information(
|
|
717
|
-
|
|
718
|
-
# Retrieve L2M processing options
|
|
719
|
-
l2m_options = l2_options["l2m_options"]
|
|
720
|
-
|
|
721
|
-
# Retrieve radar simulation options
|
|
722
|
-
radar_simulation_enabled = l2_options.get("radar_simulation_enabled", False)
|
|
723
|
-
radar_simulation_options = l2_options["radar_simulation_options"]
|
|
724
|
-
if not available_pytmatrix:
|
|
725
|
-
radar_simulation_enabled = False
|
|
1052
|
+
accumulation_interval, rolling = get_resampling_information(temporal_resolution)
|
|
726
1053
|
|
|
727
1054
|
# ------------------------------------------------------------------.
|
|
728
1055
|
# Avoid generation of rolling products for source sample interval !
|
|
@@ -758,22 +1085,59 @@ def run_l2m_station(
|
|
|
758
1085
|
# If no data available, try with other L2E accumulation intervals
|
|
759
1086
|
if flag_not_available_data:
|
|
760
1087
|
msg = (
|
|
761
|
-
f"{product} processing of {data_source} {campaign_name} {station_name}"
|
|
762
|
-
+ f"has not been launched because of missing {required_product} {
|
|
1088
|
+
f"{product} processing of {data_source} {campaign_name} {station_name} "
|
|
1089
|
+
+ f"has not been launched because of missing {required_product} {temporal_resolution} data."
|
|
763
1090
|
)
|
|
764
|
-
|
|
1091
|
+
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
1092
|
+
continue
|
|
1093
|
+
|
|
1094
|
+
# -------------------------------------------------------------------------.
|
|
1095
|
+
# Retrieve L2M processing options
|
|
1096
|
+
l2m_processing_options = ProcessingOptions(
|
|
1097
|
+
product="L2M",
|
|
1098
|
+
temporal_resolutions=temporal_resolution,
|
|
1099
|
+
filepaths=filepaths,
|
|
1100
|
+
parallel=parallel,
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
# Retrieve folder partitioning (for files and logs)
|
|
1104
|
+
folder_partitioning = l2m_processing_options.get_folder_partitioning(temporal_resolution)
|
|
1105
|
+
|
|
1106
|
+
# Retrieve product options
|
|
1107
|
+
global_product_options = l2m_processing_options.get_product_options(temporal_resolution)
|
|
1108
|
+
|
|
1109
|
+
# Retrieve files temporal partitions
|
|
1110
|
+
files_partitions = l2m_processing_options.get_files_partitions(temporal_resolution)
|
|
1111
|
+
|
|
1112
|
+
if len(files_partitions) == 0:
|
|
1113
|
+
msg = (
|
|
1114
|
+
f"{product} processing of {data_source} {campaign_name} {station_name} "
|
|
1115
|
+
+ f"has not been launched because of missing {required_product} {temporal_resolution} data."
|
|
1116
|
+
)
|
|
1117
|
+
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
765
1118
|
continue
|
|
766
1119
|
|
|
767
1120
|
# -----------------------------------------------------------------.
|
|
768
1121
|
# Loop over distributions to fit
|
|
769
1122
|
# model_name = "GAMMA_ML"
|
|
770
1123
|
# model_options = l2m_options["models"][model_name]
|
|
771
|
-
|
|
1124
|
+
# Retrieve list of models to fit
|
|
1125
|
+
models = global_product_options.pop("models")
|
|
1126
|
+
for model_name in models:
|
|
1127
|
+
# -----------------------------------------------------------------.
|
|
1128
|
+
# Retrieve product-model options
|
|
1129
|
+
product_options = copy.deepcopy(global_product_options)
|
|
1130
|
+
model_options = get_model_options(product="L2M", model_name=model_name)
|
|
1131
|
+
product_options["product_options"].update(model_options)
|
|
772
1132
|
|
|
773
|
-
# Retrieve model options
|
|
774
1133
|
psd_model = model_options["psd_model"]
|
|
775
1134
|
optimization = model_options["optimization"]
|
|
776
1135
|
|
|
1136
|
+
# Precompute required scattering tables
|
|
1137
|
+
if product_options["radar_enabled"]:
|
|
1138
|
+
radar_options = product_options["radar_options"]
|
|
1139
|
+
precompute_scattering_tables(verbose=verbose, **radar_options)
|
|
1140
|
+
|
|
777
1141
|
# -----------------------------------------------------------------.
|
|
778
1142
|
msg = f"Production of L2M_{model_name} for sample interval {accumulation_interval} s has started."
|
|
779
1143
|
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
@@ -820,25 +1184,25 @@ def run_l2m_station(
|
|
|
820
1184
|
# - If parallel=True, it does that in parallel using dask.delayed
|
|
821
1185
|
list_tasks = [
|
|
822
1186
|
_generate_l2m(
|
|
823
|
-
|
|
1187
|
+
start_time=event_info["start_time"],
|
|
1188
|
+
end_time=event_info["end_time"],
|
|
1189
|
+
filepaths=event_info["filepaths"],
|
|
824
1190
|
data_dir=data_dir,
|
|
825
1191
|
logs_dir=logs_dir,
|
|
1192
|
+
folder_partitioning=folder_partitioning,
|
|
826
1193
|
campaign_name=campaign_name,
|
|
827
1194
|
station_name=station_name,
|
|
828
1195
|
# L2M options
|
|
829
1196
|
sample_interval=accumulation_interval,
|
|
830
1197
|
rolling=rolling,
|
|
831
1198
|
model_name=model_name,
|
|
832
|
-
|
|
833
|
-
# Radar options
|
|
834
|
-
radar_simulation_enabled=radar_simulation_enabled,
|
|
835
|
-
radar_simulation_options=radar_simulation_options,
|
|
1199
|
+
product_options=product_options,
|
|
836
1200
|
# Processing options
|
|
837
1201
|
force=force,
|
|
838
1202
|
verbose=verbose,
|
|
839
1203
|
parallel=parallel,
|
|
840
1204
|
)
|
|
841
|
-
for
|
|
1205
|
+
for event_info in files_partitions
|
|
842
1206
|
]
|
|
843
1207
|
list_logs = dask.compute(*list_tasks) if parallel else list_tasks
|
|
844
1208
|
|