disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +5 -5
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  37. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  38. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  39. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  40. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  41. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  42. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  43. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  44. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  45. disdrodb/l0/l0a_processing.py +30 -30
  46. disdrodb/l0/l0b_nc_processing.py +108 -2
  47. disdrodb/l0/l0b_processing.py +4 -4
  48. disdrodb/l0/l0c_processing.py +5 -13
  49. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  50. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  51. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  52. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  53. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  54. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  55. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  56. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  57. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  58. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  59. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  60. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  61. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  63. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  64. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  65. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  66. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  67. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  68. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  69. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  70. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  71. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  72. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
  73. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  74. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  75. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  76. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
  77. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  78. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  79. disdrodb/l0/routines.py +105 -14
  80. disdrodb/l1/__init__.py +5 -0
  81. disdrodb/l1/filters.py +34 -20
  82. disdrodb/l1/processing.py +45 -44
  83. disdrodb/l1/resampling.py +77 -66
  84. disdrodb/l1/routines.py +35 -43
  85. disdrodb/l1_env/routines.py +18 -3
  86. disdrodb/l2/__init__.py +7 -0
  87. disdrodb/l2/empirical_dsd.py +58 -10
  88. disdrodb/l2/event.py +27 -120
  89. disdrodb/l2/processing.py +267 -116
  90. disdrodb/l2/routines.py +618 -254
  91. disdrodb/metadata/standards.py +3 -1
  92. disdrodb/psd/fitting.py +463 -144
  93. disdrodb/psd/models.py +8 -5
  94. disdrodb/routines.py +3 -3
  95. disdrodb/scattering/__init__.py +16 -4
  96. disdrodb/scattering/axis_ratio.py +56 -36
  97. disdrodb/scattering/permittivity.py +486 -0
  98. disdrodb/scattering/routines.py +701 -159
  99. disdrodb/summary/__init__.py +17 -0
  100. disdrodb/summary/routines.py +4120 -0
  101. disdrodb/utils/attrs.py +68 -125
  102. disdrodb/utils/compression.py +30 -1
  103. disdrodb/utils/dask.py +59 -8
  104. disdrodb/utils/dataframe.py +61 -7
  105. disdrodb/utils/directories.py +35 -15
  106. disdrodb/utils/encoding.py +33 -19
  107. disdrodb/utils/logger.py +13 -6
  108. disdrodb/utils/manipulations.py +71 -0
  109. disdrodb/utils/subsetting.py +214 -0
  110. disdrodb/utils/time.py +165 -19
  111. disdrodb/utils/writer.py +20 -7
  112. disdrodb/utils/xarray.py +2 -4
  113. disdrodb/viz/__init__.py +13 -0
  114. disdrodb/viz/plots.py +327 -0
  115. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  116. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
  117. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  118. disdrodb/l1/encoding_attrs.py +0 -642
  119. disdrodb/l2/processing_options.py +0 -213
  120. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  121. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  122. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  123. {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
disdrodb/l2/routines.py CHANGED
@@ -16,40 +16,47 @@
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Implements routines for DISDRODB L2 processing."""
18
18
 
19
+ import copy
19
20
  import datetime
21
+ import json
20
22
  import logging
21
23
  import os
24
+ import shutil
22
25
  import time
23
26
  from typing import Optional
24
27
 
25
28
  import dask
26
29
  import numpy as np
27
30
  import pandas as pd
28
- import xarray as xr
29
31
 
30
- # Directory
31
- from disdrodb import is_pytmatrix_available
32
+ from disdrodb.api.checks import check_station_inputs
32
33
  from disdrodb.api.create_directories import (
33
34
  create_logs_directory,
34
35
  create_product_directory,
35
36
  )
36
- from disdrodb.api.info import group_filepaths
37
- from disdrodb.api.io import find_files
37
+ from disdrodb.api.info import get_start_end_time_from_filepaths, group_filepaths
38
+ from disdrodb.api.io import find_files, open_netcdf_files
38
39
  from disdrodb.api.path import (
39
- define_accumulation_acronym,
40
+ define_file_folder_path,
40
41
  define_l2e_filename,
41
42
  define_l2m_filename,
43
+ define_temporal_resolution,
42
44
  )
43
45
  from disdrodb.api.search import get_required_product
44
- from disdrodb.configs import get_data_archive_dir, get_metadata_archive_dir
46
+ from disdrodb.configs import (
47
+ get_data_archive_dir,
48
+ get_metadata_archive_dir,
49
+ get_model_options,
50
+ get_product_options,
51
+ get_product_temporal_resolutions,
52
+ )
45
53
  from disdrodb.l1.resampling import resample_dataset
46
- from disdrodb.l2.event import get_events_info, identify_events
54
+ from disdrodb.l2.event import get_files_partitions, group_timesteps_into_event
47
55
  from disdrodb.l2.processing import (
48
- generate_l2_empirical,
49
- generate_l2_model,
50
56
  generate_l2_radar,
57
+ generate_l2e,
58
+ generate_l2m,
51
59
  )
52
- from disdrodb.l2.processing_options import get_l2_processing_options
53
60
  from disdrodb.metadata import read_station_metadata
54
61
  from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
55
62
  from disdrodb.utils.list import flatten_list
@@ -62,12 +69,382 @@ from disdrodb.utils.logger import (
62
69
  log_error,
63
70
  log_info,
64
71
  )
65
- from disdrodb.utils.time import ensure_sample_interval_in_seconds, get_resampling_information, regularize_dataset
72
+ from disdrodb.utils.time import (
73
+ ensure_sample_interval_in_seconds,
74
+ ensure_sorted_by_time,
75
+ generate_time_blocks,
76
+ get_resampling_information,
77
+ )
66
78
  from disdrodb.utils.writer import write_product
67
79
 
68
80
  logger = logging.getLogger(__name__)
69
81
 
70
82
 
83
+ ####----------------------------------------------------------------------------.
84
+ def identify_events(
85
+ filepaths,
86
+ parallel=False,
87
+ min_drops=5,
88
+ neighbor_min_size=2,
89
+ neighbor_time_interval="5MIN",
90
+ event_max_time_gap="6H",
91
+ event_min_duration="5MIN",
92
+ event_min_size=3,
93
+ ):
94
+ """Return a list of rainy events.
95
+
96
+ Rainy timesteps are defined when N > min_drops.
97
+ Any rainy isolated timesteps (based on neighborhood criteria) is removed.
98
+ Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
99
+ exceed `event_max_time_gap`. Finally, events that do not meet minimum size or duration
100
+ requirements are filtered out.
101
+
102
+ Parameters
103
+ ----------
104
+ filepaths: list
105
+ List of L1C file paths.
106
+ parallel: bool
107
+ Whether to load the files in parallel.
108
+ Set parallel=True only in a multiprocessing environment.
109
+ The default is False.
110
+ neighbor_time_interval : str
111
+ The time interval around a given a timestep defining the neighborhood.
112
+ Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
113
+ neighbor_min_size : int, optional
114
+ The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
115
+ timestep to be considered non-isolated. Isolated timesteps are removed !
116
+ - If `neighbor_min_size=0, then no timestep is considered isolated and no filtering occurs.
117
+ - If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
118
+ - If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
119
+ Defaults to 1.
120
+ event_max_time_gap: str
121
+ The maximum time interval between two timesteps to be considered part of the same event.
122
+ This parameters is used to group timesteps into events !
123
+ event_min_duration : str
124
+ The minimum duration an event must span. Events shorter than this duration are discarded.
125
+ event_min_size : int, optional
126
+ The minimum number of valid timesteps required for an event. Defaults to 1.
127
+
128
+ Returns
129
+ -------
130
+ list of dict
131
+ A list of events, where each event is represented as a dictionary with keys:
132
+ - "start_time": np.datetime64, start time of the event
133
+ - "end_time": np.datetime64, end time of the event
134
+ - "duration": np.timedelta64, duration of the event
135
+ - "n_timesteps": int, number of valid timesteps in the event
136
+ """
137
+ # Open datasets in parallel
138
+ ds = open_netcdf_files(filepaths, variables=["time", "N"], parallel=parallel)
139
+ # Sort dataset by time
140
+ ds = ensure_sorted_by_time(ds)
141
+ # Define candidate timesteps to group into events
142
+ idx_valid = ds["N"].data > min_drops
143
+ timesteps = ds["time"].data[idx_valid]
144
+ # Define event list
145
+ event_list = group_timesteps_into_event(
146
+ timesteps=timesteps,
147
+ neighbor_min_size=neighbor_min_size,
148
+ neighbor_time_interval=neighbor_time_interval,
149
+ event_max_time_gap=event_max_time_gap,
150
+ event_min_duration=event_min_duration,
151
+ event_min_size=event_min_size,
152
+ )
153
+ return event_list
154
+
155
+
156
+ def identify_time_partitions(filepaths: list[str], freq: str) -> list[dict]:
157
+ """Identify the set of time blocks covered by files.
158
+
159
+ The result is a minimal, sorted, and unique set of time partitions.
160
+
161
+ Parameters
162
+ ----------
163
+ filepaths : list of str
164
+ Paths to input files from which start and end times will be extracted
165
+ via `get_start_end_time_from_filepaths`.
166
+ freq : {'none', 'hour', 'day', 'month', 'quarter', 'season', 'year'}
167
+ Frequency determining the granularity of candidate blocks.
168
+ See `generate_time_blocks` for more details.
169
+
170
+ Returns
171
+ -------
172
+ list of dict
173
+ A list of dictionaries, each containing:
174
+
175
+ - `start_time` (numpy.datetime64[s])
176
+ Inclusive start of a time block.
177
+ - `end_time` (numpy.datetime64[s])
178
+ Inclusive end of a time block.
179
+
180
+ Only those blocks that overlap at least one file's interval are returned.
181
+ The list is sorted by `start_time` and contains no duplicate blocks.
182
+ """
183
+ # Define file start time and end time
184
+ start_times, end_times = get_start_end_time_from_filepaths(filepaths)
185
+
186
+ # Define files time coverage
187
+ start_time, end_time = start_times.min(), end_times.max()
188
+
189
+ # Compute candidate time blocks
190
+ blocks = generate_time_blocks(start_time, end_time, freq=freq) # end_time non inclusive is correct?
191
+
192
+ # Select time blocks with files
193
+ mask = (blocks[:, 0][:, None] <= end_times) & (blocks[:, 1][:, None] >= start_times)
194
+ blocks = blocks[mask.any(axis=1)]
195
+
196
+ # Ensure sorted unique time blocks
197
+ order = np.argsort(blocks[:, 0])
198
+ blocks = np.unique(blocks[order], axis=0)
199
+
200
+ # Convert to list of dicts
201
+ list_time_blocks = [{"start_time": start_time, "end_time": end_time} for start_time, end_time in blocks]
202
+ return list_time_blocks
203
+
204
+
205
+ def is_possible_product(accumulation_interval, sample_interval, rolling):
206
+ """Assess if production is possible given the requested accumulation interval and source sample_interval."""
207
+ # Avoid rolling product generation at source sample interval
208
+ if rolling and accumulation_interval == sample_interval:
209
+ return False
210
+ # Avoid product generation if the accumulation_interval is less than the sample interval
211
+ if accumulation_interval < sample_interval:
212
+ return False
213
+ # Avoid producti generation if accumulation_interval is not multiple of sample_interval
214
+ return accumulation_interval % sample_interval == 0
215
+
216
+
217
+ def define_temporal_partitions(filepaths, strategy, parallel, strategy_options):
218
+ """Define temporal file processing partitions.
219
+
220
+ Parameters
221
+ ----------
222
+ filepaths : list
223
+ List of files paths to be processed
224
+
225
+ strategy : str
226
+ Which partitioning strategy to apply:
227
+
228
+ - ``'time_block'`` defines fixed time intervals (e.g. monthly) covering input files.
229
+ - ``'event'`` detect clusters of precipitation ("events").
230
+
231
+ parallel : bool
232
+ If True, parallel data loading is used to identify events.
233
+
234
+ strategy_options : dict
235
+ Dictionary with strategy-specific parameters:
236
+
237
+ If ``strategy == 'time_block'``, supported options are:
238
+
239
+ - ``freq``: Time unit for blocks. One of {'year', 'season', 'month', 'day'}.
240
+
241
+ See identify_time_partitions for more information.
242
+
243
+ If ``strategy == 'event'``, supported options are:
244
+
245
+ - ``min_drops`` : int
246
+ Minimum number of drops to consider a timestep.
247
+ - ``neighbor_min_size`` : int
248
+ Minimum cluster size for merging neighboring events.
249
+ - ``neighbor_time_interval`` : str
250
+ Time window (e.g. "5MIN") to merge adjacent clusters.
251
+ - ``event_max_time_gap`` : str
252
+ Maximum allowed gap (e.g. "6H") within a single event.
253
+ - ``event_min_duration`` : str
254
+ Minimum total duration (e.g. "5MIN") of an event.
255
+ - ``event_min_size`` : int
256
+ Minimum number of records in an event.
257
+
258
+ See identify_events for more information.
259
+
260
+ Returns
261
+ -------
262
+ list
263
+ A list of dictionaries, each containing:
264
+
265
+ - ``start_time`` (numpy.datetime64[s])
266
+ Inclusive start of an event or time block.
267
+ - ``end_time`` (numpy.datetime64[s])
268
+ Inclusive end of an event or time block.
269
+
270
+ Notes
271
+ -----
272
+ - The ``'event'`` strategy requires loading data into memory to identify clusters.
273
+ - The ``'time_block'`` strategy can operate on metadata alone, without full data loading.
274
+ - The ``'event'`` strategy implicitly performs data selection on which files to process !
275
+ - The ``'time_block'`` strategy does not performs data selection on which files to process !
276
+ """
277
+ if strategy not in ["time_block", "event"]:
278
+ raise ValueError(f"Unknown strategy: {strategy!r}. Must be 'time_block' or 'event'.")
279
+ if strategy == "event":
280
+ return identify_events(filepaths, parallel=parallel, **strategy_options)
281
+
282
+ return identify_time_partitions(filepaths, **strategy_options)
283
+
284
+
285
+ class ProcessingOptions:
286
+ """Define L2 products processing options."""
287
+
288
+ def __init__(self, product, filepaths, parallel, temporal_resolutions=None):
289
+ """Define L2 products processing options."""
290
+ import disdrodb
291
+
292
+ # ---------------------------------------------------------------------.
293
+ # Define temporal resolutions for which to retrieve processing options
294
+ if temporal_resolutions is None:
295
+ temporal_resolutions = get_product_temporal_resolutions(product)
296
+ elif isinstance(temporal_resolutions, str):
297
+ temporal_resolutions = [temporal_resolutions]
298
+
299
+ # ---------------------------------------------------------------------.
300
+ # Get product options at various temporal resolutions
301
+ dict_product_options = {
302
+ temporal_resolution: get_product_options(product, temporal_resolution=temporal_resolution)
303
+ for temporal_resolution in temporal_resolutions
304
+ }
305
+
306
+ # ---------------------------------------------------------------------.
307
+ # Group filepaths by source sample intervals
308
+ # - Typically the sample interval is fixed and is just one
309
+ # - Some stations might change the sample interval along the years
310
+ # - For each sample interval, separated processing take place here after !
311
+ dict_filepaths = group_filepaths(filepaths, groups="sample_interval")
312
+
313
+ # ---------------------------------------------------------------------.
314
+ # Retrieve processing information for each temporal resolution
315
+ dict_folder_partitioning = {}
316
+ dict_files_partitions = {}
317
+ _cache_dict_list_partitions: dict[str, dict] = {}
318
+ for temporal_resolution in temporal_resolutions:
319
+
320
+ # -------------------------------------------------------------------------.
321
+ # Retrieve product options
322
+ product_options = dict_product_options[temporal_resolution]
323
+
324
+ # Retrieve accumulation_interval and rolling option
325
+ accumulation_interval, rolling = get_resampling_information(temporal_resolution)
326
+
327
+ # Extract processing options
328
+ archive_options = product_options.pop("archive_options")
329
+
330
+ dict_product_options[temporal_resolution] = product_options
331
+ # -------------------------------------------------------------------------.
332
+ # Define folder partitioning
333
+ if "folder_partitioning" not in archive_options:
334
+ dict_folder_partitioning[temporal_resolution] = disdrodb.config.get("folder_partitioning")
335
+ else:
336
+ dict_folder_partitioning[temporal_resolution] = archive_options.pop("folder_partitioning")
337
+
338
+ # -------------------------------------------------------------------------.
339
+ # Define list of temporal partitions
340
+ # - [{start_time:xxx, end_time: xxx}, ....]
341
+ # - Either strategy: "event" or "time_block" or save_by_time_block"
342
+ # - "event" requires loading data into memory to identify events
343
+ # --> Does some data filtering on what to process !
344
+ # - "time_block" does not require loading data into memory
345
+ # --> Does not do data filtering on what to process !
346
+ # --> Here we cache dict_list_partitions so that we don't need to recompute
347
+ # stuffs if processing options are the same
348
+ key = json.dumps(archive_options, sort_keys=True)
349
+ if key not in _cache_dict_list_partitions:
350
+ _cache_dict_list_partitions[key] = {
351
+ sample_interval: define_temporal_partitions(filepaths, parallel=parallel, **archive_options)
352
+ for sample_interval, filepaths in dict_filepaths.items()
353
+ }
354
+ dict_list_partitions = _cache_dict_list_partitions[key].copy() # To avoid in-place replacement
355
+
356
+ # ------------------------------------------------------------------.
357
+ # Group filepaths by temporal partitions
358
+ # - This is done separately for each possible source sample interval
359
+ # - It groups filepaths by start_time and end_time provided by list_partitions
360
+ # - Here 'events' can also simply be period of times ('day', 'months', ...)
361
+ # - When aggregating/resampling/accumulating data, we need to load also
362
+ # some data after the actual event end_time to ensure that the resampled dataset
363
+ # contains the event_end_time
364
+ # --> get_files_partitions adjust the event end_time to accounts for the required "border" data.
365
+ files_partitions = [
366
+ get_files_partitions(
367
+ list_partitions=list_partitions,
368
+ filepaths=dict_filepaths[sample_interval],
369
+ sample_interval=sample_interval,
370
+ accumulation_interval=accumulation_interval,
371
+ rolling=rolling,
372
+ )
373
+ for sample_interval, list_partitions in dict_list_partitions.items()
374
+ if product != "L2E"
375
+ or is_possible_product(
376
+ accumulation_interval=accumulation_interval,
377
+ sample_interval=sample_interval,
378
+ rolling=rolling,
379
+ )
380
+ ]
381
+ files_partitions = flatten_list(files_partitions)
382
+ dict_files_partitions[temporal_resolution] = files_partitions
383
+
384
+ # ------------------------------------------------------------------.
385
+ # Keep only temporal_resolutions for which events could be defined
386
+ # - Remove e.g when not compatible accumulation_interval with source sample_interval
387
+ temporal_resolutions = [
388
+ temporal_resolution
389
+ for temporal_resolution in temporal_resolutions
390
+ if len(dict_files_partitions[temporal_resolution]) > 0
391
+ ]
392
+ # ------------------------------------------------------------------.
393
+ # Add attributes
394
+ self.temporal_resolutions = temporal_resolutions
395
+ self.dict_files_partitions = dict_files_partitions
396
+ self.dict_product_options = dict_product_options
397
+ self.dict_folder_partitioning = dict_folder_partitioning
398
+
399
+ def get_files_partitions(self, temporal_resolution):
400
+ """Return files partitions dictionary for a specific L2E product."""
401
+ return self.dict_files_partitions[temporal_resolution]
402
+
403
+ def get_product_options(self, temporal_resolution):
404
+ """Return product options dictionary for a specific L2E product."""
405
+ return self.dict_product_options[temporal_resolution]
406
+
407
+ def get_folder_partitioning(self, temporal_resolution):
408
+ """Return the folder partitioning for a specific L2E product."""
409
+ # to be used for logs and files !
410
+ return self.dict_folder_partitioning[temporal_resolution]
411
+
412
+
413
+ def precompute_scattering_tables(
414
+ frequency,
415
+ num_points,
416
+ diameter_max,
417
+ canting_angle_std,
418
+ axis_ratio_model,
419
+ permittivity_model,
420
+ water_temperature,
421
+ elevation_angle,
422
+ verbose=True,
423
+ ):
424
+ """Precompute the pyTMatrix scattering tables required for radar variables simulations."""
425
+ from disdrodb.scattering.routines import get_list_simulations_params, load_scatterer
426
+
427
+ # Define parameters for all requested simulations
428
+ list_params = get_list_simulations_params(
429
+ frequency=frequency,
430
+ num_points=num_points,
431
+ diameter_max=diameter_max,
432
+ canting_angle_std=canting_angle_std,
433
+ axis_ratio_model=axis_ratio_model,
434
+ permittivity_model=permittivity_model,
435
+ water_temperature=water_temperature,
436
+ elevation_angle=elevation_angle,
437
+ )
438
+
439
+ # Compute require scattering tables
440
+ for params in list_params:
441
+ # Initialize scattering table
442
+ _ = load_scatterer(
443
+ verbose=verbose,
444
+ **params,
445
+ )
446
+
447
+
71
448
  ####----------------------------------------------------------------------------.
72
449
  #### L2E
73
450
 
@@ -80,15 +457,13 @@ def _generate_l2e(
80
457
  filepaths,
81
458
  data_dir,
82
459
  logs_dir,
460
+ folder_partitioning,
83
461
  campaign_name,
84
462
  station_name,
85
463
  # L2E options
86
464
  accumulation_interval,
87
465
  rolling,
88
- l2e_options,
89
- # Radar options
90
- radar_simulation_enabled,
91
- radar_simulation_options,
466
+ product_options,
92
467
  # Processing options
93
468
  force,
94
469
  verbose,
@@ -98,53 +473,37 @@ def _generate_l2e(
98
473
  # Define product name
99
474
  product = "L2E"
100
475
 
476
+ # Copy to avoid in-place replacement (outside this function)
477
+ product_options = product_options.copy()
478
+
101
479
  # -----------------------------------------------------------------.
102
480
  # Create file logger
103
- sample_interval_acronym = define_accumulation_acronym(seconds=accumulation_interval, rolling=rolling)
481
+ temporal_resolution = define_temporal_resolution(seconds=accumulation_interval, rolling=rolling)
104
482
  starting_time = pd.to_datetime(start_time).strftime("%Y%m%d%H%M%S")
105
483
  ending_time = pd.to_datetime(end_time).strftime("%Y%m%d%H%M%S")
106
- filename = f"L2E.{sample_interval_acronym}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
484
+ expected_filename = f"L2E.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
107
485
  logger, logger_filepath = create_logger_file(
108
486
  logs_dir=logs_dir,
109
- filename=filename,
487
+ filename=expected_filename,
110
488
  parallel=parallel,
111
489
  )
112
490
  ##------------------------------------------------------------------------.
113
491
  # Log start processing
114
- msg = f"{product} processing of {filename} has started."
492
+ msg = f"{product} creation of {expected_filename} has started."
115
493
  log_info(logger=logger, msg=msg, verbose=verbose)
494
+ success_flag = False
116
495
 
117
496
  ##------------------------------------------------------------------------.
118
497
  ### Core computation
119
498
  try:
120
499
  # ------------------------------------------------------------------------.
121
500
  #### Open the dataset over the period of interest
122
- # - Open the netCDFs
123
- list_ds = [
124
- xr.open_dataset(filepath, chunks={}, decode_timedelta=False, cache=False, autoclose=True)
125
- for filepath in filepaths
126
- ]
127
- # - Concatenate datasets
128
- ds = xr.concat(list_ds, dim="time", compat="no_conflicts", combine_attrs="override")
129
- ds = ds.sel(time=slice(start_time, end_time)).compute()
130
- # - Close file on disk
131
- _ = [ds.close() for ds in list_ds]
501
+ ds = open_netcdf_files(filepaths, start_time=start_time, end_time=end_time, parallel=False)
132
502
 
133
503
  ##------------------------------------------------------------------------.
134
504
  #### Resample dataset
135
- # Here we set NaN in the raw_drop_number to 0
136
- # - We assume that NaN corresponds to 0
137
- # - When we regularize, we infill with NaN
138
- # - When we aggregate with sum, we don't skip NaN
139
- # --> Aggregation with original missing timesteps currently results in NaN !
140
- # TODO: Add tolerance on fraction of missing timesteps for large accumulation_intervals
141
- # TODO: NaN should not be set as 0 !
142
- ds["drop_number"] = xr.where(np.isnan(ds["drop_number"]), 0, ds["drop_number"])
143
-
144
- # - Regularize dataset
145
- # --> Infill missing timesteps with np.Nan
146
- sample_interval = ensure_sample_interval_in_seconds(ds["sample_interval"]).item()
147
- ds = regularize_dataset(ds, freq=f"{sample_interval}s")
505
+ # Define sample interval in seconds
506
+ sample_interval = ensure_sample_interval_in_seconds(ds["sample_interval"]).to_numpy().item()
148
507
 
149
508
  # - Resample dataset
150
509
  ds = resample_dataset(
@@ -154,53 +513,58 @@ def _generate_l2e(
154
513
  rolling=rolling,
155
514
  )
156
515
 
157
- ##------------------------------------------------------------------------.
158
- # Remove timesteps with no drops or NaN (from L2E computations)
159
- # timestep_zero_drops = ds["time"].data[ds["N"].data == 0]
160
- # timestep_nan = ds["time"].data[np.isnan(ds["N"].data)]
161
- # TODO: Make it a choice !
162
- indices_valid_timesteps = np.where(
163
- ~np.logical_or(ds["N"].data == 0, np.isnan(ds["N"].data)),
164
- )[0]
165
- ds = ds.isel(time=indices_valid_timesteps)
516
+ # Extract L2E processing options
517
+ l2e_options = product_options.get("product_options")
518
+ radar_enabled = product_options.get("radar_enabled")
519
+ radar_options = product_options.get("radar_options")
166
520
 
167
521
  ##------------------------------------------------------------------------.
168
522
  #### Generate L2E product
169
- # TODO: Pass filtering criteria and actual L2E options !
170
- ds = generate_l2_empirical(ds=ds, **l2e_options)
171
-
172
- # Simulate L2M-based radar variables if asked
173
- if radar_simulation_enabled:
174
- ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_simulation_options)
175
- ds.update(ds_radar)
176
- ds.attrs = ds_radar.attrs.copy()
177
-
178
- ##------------------------------------------------------------------------.
179
- #### Regularize back dataset
180
- # TODO: infill timestep_zero_drops and timestep_nan differently ?
181
- # --> R, P, LWC = 0,
182
- # --> Z, D, with np.nan?
523
+ # - Only if at least 2 timesteps available
524
+ if ds["time"].size > 2:
525
+
526
+ # Compute L2E variables
527
+ ds = generate_l2e(ds=ds, **l2e_options)
528
+
529
+ # Simulate L2M-based radar variables if asked
530
+ if radar_enabled:
531
+ ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_options)
532
+ ds.update(ds_radar)
533
+ ds.attrs = ds_radar.attrs.copy()
534
+
535
+ # Write netCDF4 dataset
536
+ if ds["time"].size > 1:
537
+ # Define filepath
538
+ filename = define_l2e_filename(
539
+ ds,
540
+ campaign_name=campaign_name,
541
+ station_name=station_name,
542
+ sample_interval=accumulation_interval,
543
+ rolling=rolling,
544
+ )
545
+ folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
546
+ filepath = os.path.join(folder_path, filename)
547
+ # Write file
548
+ write_product(ds, filepath=filepath, force=force)
549
+
550
+ # Update log
551
+ log_info(logger=logger, msg=f"{product} creation of {filename} has ended.", verbose=verbose)
552
+ else:
553
+ log_info(logger=logger, msg="File not created. Less than one timesteps available.", verbose=verbose)
554
+ else:
555
+ log_info(logger=logger, msg="File not created. Less than two timesteps available.", verbose=verbose)
183
556
 
184
- ##------------------------------------------------------------------------.
185
- # Write netCDF4 dataset
186
- if ds["time"].size > 1:
187
- filename = define_l2e_filename(
188
- ds,
189
- campaign_name=campaign_name,
190
- station_name=station_name,
191
- sample_interval=accumulation_interval,
192
- rolling=rolling,
193
- )
194
- filepath = os.path.join(data_dir, filename)
195
- write_product(ds, product=product, filepath=filepath, force=force)
557
+ ##--------------------------------------------------------------------.
558
+ #### Define logger file final directory
559
+ if folder_partitioning != "":
560
+ log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
561
+ os.makedirs(log_dst_dir, exist_ok=True)
196
562
 
197
563
  ##--------------------------------------------------------------------.
198
564
  # Clean environment
199
565
  del ds
200
566
 
201
- # Log end processing
202
- msg = f"{product} processing of {filename} has ended."
203
- log_info(logger=logger, msg=msg, verbose=verbose)
567
+ success_flag = True
204
568
 
205
569
  ##--------------------------------------------------------------------.
206
570
  # Otherwise log the error
@@ -212,22 +576,17 @@ def _generate_l2e(
212
576
  # Close the file logger
213
577
  close_logger(logger)
214
578
 
579
+ # Move logger file to correct partitioning directory
580
+ if success_flag and folder_partitioning != "" and logger_filepath is not None:
581
+ # Move logger file to correct partitioning directory
582
+ dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
583
+ shutil.move(logger_filepath, dst_filepath)
584
+ logger_filepath = dst_filepath
585
+
215
586
  # Return the logger file path
216
587
  return logger_filepath
217
588
 
218
589
 
219
- def is_possible_product(accumulation_interval, sample_interval, rolling):
220
- """Assess if production is possible given the requested accumulation interval and source sample_interval."""
221
- # Avoid rolling product generation at source sample interval
222
- if rolling and accumulation_interval == sample_interval:
223
- return False
224
- # Avoid product generation if the accumulation_interval is less than the sample interval
225
- if accumulation_interval < sample_interval:
226
- return False
227
- # Avoid producti generation if accumulation_interval is not multiple of sample_interval
228
- return accumulation_interval % sample_interval == 0
229
-
230
-
231
590
  def run_l2e_station(
232
591
  # Station arguments
233
592
  data_source,
@@ -248,12 +607,12 @@ def run_l2e_station(
248
607
  This function is intended to be called through the ``disdrodb_run_l2e_station``
249
608
  command-line interface.
250
609
 
251
- The DISDRODB L2E routine generate a L2E file for each event.
252
- Events are defined based on the DISDRODB event settings options.
253
- The DISDRODB event settings allows to produce L2E files either
254
- per custom block of time (i.e day/month/year) or for blocks of rainy events.
610
+ This routine generates L2E files.
611
+ Files are defined based on the DISDRODB archive settings options.
612
+ The DISDRODB archive settings allows to produce L2E files either
613
+ per custom block of time (i.e day/month/year) or per blocks of (rainy) events.
255
614
 
256
- For stations with varying measurement intervals, DISDRODB defines a separate list of 'events'
615
+ For stations with varying measurement intervals, DISDRODB defines a separate list of partitions
257
616
  for each measurement interval option. In other words, DISDRODB does not
258
617
  mix files with data acquired at different sample intervals when resampling the data.
259
618
 
@@ -297,6 +656,14 @@ def run_l2e_station(
297
656
  # Retrieve DISDRODB Metadata Archive directory
298
657
  metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir=metadata_archive_dir)
299
658
 
659
+ # Check valid data_source, campaign_name, and station_name
660
+ check_station_inputs(
661
+ metadata_archive_dir=metadata_archive_dir,
662
+ data_source=data_source,
663
+ campaign_name=campaign_name,
664
+ station_name=station_name,
665
+ )
666
+
300
667
  # ------------------------------------------------------------------------.
301
668
  # Start processing
302
669
  if verbose:
@@ -316,7 +683,7 @@ def run_l2e_station(
316
683
  station_name=station_name,
317
684
  product=required_product,
318
685
  # Processing options
319
- debugging_mode=False,
686
+ debugging_mode=debugging_mode,
320
687
  )
321
688
  except Exception as e:
322
689
  print(str(e)) # Case where no file paths available
@@ -326,100 +693,44 @@ def run_l2e_station(
326
693
  # If no data available, print error message and return None
327
694
  if flag_not_available_data:
328
695
  msg = (
329
- f"{product} processing of {data_source} {campaign_name} {station_name}"
696
+ f"{product} processing of {data_source} {campaign_name} {station_name} "
330
697
  + f"has not been launched because of missing {required_product} data."
331
698
  )
332
699
  print(msg)
333
700
  return
334
701
 
335
- # -------------------------------------------------------------------------.
336
- # Retrieve L2 processing options
337
- # - Each dictionary item contains the processing options for a given rolling/accumulation_interval combo
338
- l2_processing_options = get_l2_processing_options()
339
-
340
- # ---------------------------------------------------------------------.
341
- # Group filepaths by sample intervals
342
- # - Typically the sample interval is fixed
343
- # - Some stations might change the sample interval along the years
344
- # - For each sample interval, separated processing take place here after !
345
- dict_filepaths = group_filepaths(filepaths, groups="sample_interval")
702
+ # Retrieve L2E processing options
703
+ l2e_processing_options = ProcessingOptions(product="L2E", filepaths=filepaths, parallel=parallel)
346
704
 
347
705
  # -------------------------------------------------------------------------.
348
- # Define list of event
349
- # - [(start_time, end_time)]
350
- # TODO: Here pass event option list !
351
- # TODO: Implement more general define_events function
352
- # - Either rainy events
353
- # - Either time blocks (day/month/year)
354
- # TODO: Define events identification settings based on accumulation
355
- # - This is currently done at the source sample interval !
356
- # - Should we allow event definition for each accumulation interval and
357
- # move this code inside the loop below
358
-
359
- # sample_interval = list(dict_filepaths)[0]
360
- # filepaths = dict_filepaths[sample_interval]
361
-
362
- dict_list_events = {
363
- sample_interval: identify_events(filepaths, parallel=parallel)
364
- for sample_interval, filepaths in dict_filepaths.items()
365
- }
366
-
367
- # ---------------------------------------------------------------------.
368
- # Subset for debugging mode
369
- if debugging_mode:
370
- dict_list_events = {
371
- sample_interval: list_events[0 : min(len(list_events), 3)]
372
- for sample_interval, list_events in dict_list_events.items()
373
- }
374
-
375
- # ---------------------------------------------------------------------.
376
- # Loop
706
+ # Generate products for each temporal resolution
377
707
  # rolling = False
378
708
  # accumulation_interval = 60
379
- # sample_interval_acronym = "1MIN"
380
- # l2_options = l2_processing_options["1MIN"]
381
- available_pytmatrix = is_pytmatrix_available()
709
+ # temporal_resolution = "10MIN"
710
+ # folder_partitioning = ""
711
+ # product_options = l2e_processing_options.get_product_options(temporal_resolution)
382
712
 
383
- for sample_interval_acronym, l2_options in l2_processing_options.items():
713
+ for temporal_resolution in l2e_processing_options.temporal_resolutions:
714
+ # Print progress message
715
+ msg = f"Production of {product} {temporal_resolution} has started."
716
+ log_info(logger=logger, msg=msg, verbose=verbose)
384
717
 
385
- # Retrieve accumulation_interval and rolling option
386
- accumulation_interval, rolling = get_resampling_information(sample_interval_acronym)
718
+ # Retrieve event info
719
+ files_partitions = l2e_processing_options.get_files_partitions(temporal_resolution)
387
720
 
388
- # Retrieve radar simulation options
389
- radar_simulation_enabled = l2_options.get("radar_simulation_enabled", False)
390
- radar_simulation_options = l2_options["radar_simulation_options"]
391
- if not available_pytmatrix:
392
- radar_simulation_enabled = False
721
+ # Retrieve folder partitioning (for files and logs)
722
+ folder_partitioning = l2e_processing_options.get_folder_partitioning(temporal_resolution)
393
723
 
394
- # ------------------------------------------------------------------.
395
- # Group filepaths by events
396
- # - This is done separately for each possible source sample interval
397
- # - It groups filepaths by start_time and end_time provided by list_events
398
- # - Here 'events' can also simply be period of times ('day', 'months', ...)
399
- # - When aggregating/resampling/accumulating data, we need to load also
400
- # some data before/after the actual event start_time/end_time
401
- # - get_events_info adjust the event times to accounts for the required "border" data.
402
- events_info = [
403
- get_events_info(
404
- list_events=list_events,
405
- filepaths=dict_filepaths[sample_interval],
406
- accumulation_interval=accumulation_interval,
407
- rolling=rolling,
408
- )
409
- for sample_interval, list_events in dict_list_events.items()
410
- if is_possible_product(
411
- accumulation_interval=accumulation_interval,
412
- sample_interval=sample_interval,
413
- rolling=rolling,
414
- )
415
- ]
416
- events_info = flatten_list(events_info)
724
+ # Retrieve product options
725
+ product_options = l2e_processing_options.get_product_options(temporal_resolution)
417
726
 
418
- # ------------------------------------------------------------------.
419
- # Skip processing if no files available
420
- # - When not compatible accumulation_interval with source sample_interval
421
- if len(events_info) == 0:
422
- continue
727
+ # Retrieve accumulation_interval and rolling option
728
+ accumulation_interval, rolling = get_resampling_information(temporal_resolution)
729
+
730
+ # Precompute required scattering tables
731
+ if product_options["radar_enabled"]:
732
+ radar_options = product_options["radar_options"]
733
+ precompute_scattering_tables(verbose=verbose, **radar_options)
423
734
 
424
735
  # ------------------------------------------------------------------.
425
736
  # Create product directory
@@ -459,21 +770,19 @@ def run_l2e_station(
459
770
  filepaths=event_info["filepaths"],
460
771
  data_dir=data_dir,
461
772
  logs_dir=logs_dir,
773
+ folder_partitioning=folder_partitioning,
462
774
  campaign_name=campaign_name,
463
775
  station_name=station_name,
464
776
  # L2E options
465
777
  rolling=rolling,
466
778
  accumulation_interval=accumulation_interval,
467
- l2e_options={}, # TODO
468
- # Radar options
469
- radar_simulation_enabled=radar_simulation_enabled,
470
- radar_simulation_options=radar_simulation_options,
779
+ product_options=product_options,
471
780
  # Processing options
472
781
  force=force,
473
782
  verbose=verbose,
474
783
  parallel=parallel,
475
784
  )
476
- for event_info in events_info
785
+ for event_info in files_partitions
477
786
  ]
478
787
  list_logs = dask.compute(*list_tasks) if parallel else list_tasks
479
788
 
@@ -507,19 +816,19 @@ def run_l2e_station(
507
816
  @delayed_if_parallel
508
817
  @single_threaded_if_parallel
509
818
  def _generate_l2m(
510
- filepath,
819
+ start_time,
820
+ end_time,
821
+ filepaths,
511
822
  data_dir,
512
823
  logs_dir,
824
+ folder_partitioning,
513
825
  campaign_name,
514
826
  station_name,
515
827
  # L2M options
516
828
  sample_interval,
517
829
  rolling,
518
830
  model_name,
519
- l2m_options,
520
- # Radar options
521
- radar_simulation_enabled,
522
- radar_simulation_options,
831
+ product_options,
523
832
  # Processing options
524
833
  force,
525
834
  verbose,
@@ -529,16 +838,15 @@ def _generate_l2m(
529
838
  # Define product name
530
839
  product = "L2M"
531
840
 
532
- # -----------------------------------------------------------------.
533
- # Define model options
534
- psd_model = l2m_options["models"][model_name]["psd_model"]
535
- optimization = l2m_options["models"][model_name]["optimization"]
536
- optimization_kwargs = l2m_options["models"][model_name]["optimization_kwargs"]
537
- other_options = {k: v for k, v in l2m_options.items() if k != "models"}
841
+ # Copy to avoid in-place replacement (outside this function)
842
+ product_options = product_options.copy()
538
843
 
539
844
  # -----------------------------------------------------------------.
540
845
  # Create file logger
541
- filename = os.path.basename(filepath)
846
+ temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
847
+ starting_time = pd.to_datetime(start_time).strftime("%Y%m%d%H%M%S")
848
+ ending_time = pd.to_datetime(end_time).strftime("%Y%m%d%H%M%S")
849
+ filename = f"L2M_{model_name}.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
542
850
  logger, logger_filepath = create_logger_file(
543
851
  logs_dir=logs_dir,
544
852
  filename=filename,
@@ -547,43 +855,52 @@ def _generate_l2m(
547
855
 
548
856
  ##------------------------------------------------------------------------.
549
857
  # Log start processing
550
- msg = f"{product} processing of {filename} has started."
858
+ msg = f"{product} creation of {filename} has started."
551
859
  log_info(logger=logger, msg=msg, verbose=verbose)
860
+ success_flag = False
552
861
 
553
- ##------------------------------------------------------------------------.
862
+ ##------------------------------------------------------------------------
554
863
  ### Core computation
555
864
  try:
865
+ ##------------------------------------------------------------------------.
866
+ # Extract L2M processing options
867
+ l2m_options = product_options.get("product_options")
868
+ radar_enabled = product_options.get("radar_enabled")
869
+ radar_options = product_options.get("radar_options")
870
+
871
+ # Define variables to load
872
+ optimization_kwargs = l2m_options["optimization_kwargs"]
873
+ if "init_method" in optimization_kwargs:
874
+ init_method = optimization_kwargs["init_method"]
875
+ moments = [f"M{order}" for order in init_method.replace("M", "")] + ["M1"]
876
+ else:
877
+ moments = ["M1"]
878
+
879
+ variables = [
880
+ "drop_number_concentration",
881
+ "fall_velocity",
882
+ "D50",
883
+ "Nw",
884
+ "Nt",
885
+ "N",
886
+ *moments,
887
+ ]
888
+
889
+ ##------------------------------------------------------------------------.
556
890
  # Open the raw netCDF
557
- with xr.open_dataset(filepath, chunks={}, decode_timedelta=False, cache=False) as ds:
558
- variables = [
559
- "drop_number_concentration",
560
- "fall_velocity",
561
- "D50",
562
- "Nw",
563
- "Nt",
564
- "M1",
565
- "M2",
566
- "M3",
567
- "M4",
568
- "M5",
569
- "M6",
570
- ]
571
- ds = ds[variables].load()
891
+ ds = open_netcdf_files(filepaths, start_time=start_time, end_time=end_time, variables=variables)
572
892
 
573
893
  # Produce L2M dataset
574
- ds = generate_l2_model(
894
+ ds = generate_l2m(
575
895
  ds=ds,
576
- psd_model=psd_model,
577
- optimization=optimization,
578
- optimization_kwargs=optimization_kwargs,
579
- **other_options,
896
+ **l2m_options,
580
897
  )
581
898
 
582
899
  # Simulate L2M-based radar variables if asked
583
- if radar_simulation_enabled:
584
- ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_simulation_options)
900
+ if radar_enabled:
901
+ ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_options)
585
902
  ds.update(ds_radar)
586
- ds.attrs = ds_radar.attrs.copy()
903
+ ds.attrs = ds_radar.attrs.copy() # ds_radar contains already all L2M attrs
587
904
 
588
905
  # Write L2M netCDF4 dataset
589
906
  if ds["time"].size > 1:
@@ -596,17 +913,25 @@ def _generate_l2m(
596
913
  rolling=rolling,
597
914
  model_name=model_name,
598
915
  )
599
- filepath = os.path.join(data_dir, filename)
916
+ folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
917
+ filepath = os.path.join(folder_path, filename)
600
918
  # Write to disk
601
- write_product(ds, product=product, filepath=filepath, force=force)
919
+ write_product(ds, filepath=filepath, force=force)
920
+
921
+ ##--------------------------------------------------------------------.
922
+ #### - Define logger file final directory
923
+ if folder_partitioning != "":
924
+ log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
925
+ os.makedirs(log_dst_dir, exist_ok=True)
602
926
 
603
927
  ##--------------------------------------------------------------------.
604
928
  # Clean environment
605
929
  del ds
606
930
 
607
931
  # Log end processing
608
- msg = f"{product} processing of {filename} has ended."
932
+ msg = f"{product} creation of {filename} has ended."
609
933
  log_info(logger=logger, msg=msg, verbose=verbose)
934
+ success_flag = True
610
935
 
611
936
  ##--------------------------------------------------------------------.
612
937
  # Otherwise log the error
@@ -618,6 +943,13 @@ def _generate_l2m(
618
943
  # Close the file logger
619
944
  close_logger(logger)
620
945
 
946
+ # Move logger file to correct partitioning directory
947
+ if success_flag and folder_partitioning != "" and logger_filepath is not None:
948
+ # Move logger file to correct partitioning directory
949
+ dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
950
+ shutil.move(logger_filepath, dst_filepath)
951
+ logger_filepath = dst_filepath
952
+
621
953
  # Return the logger file path
622
954
  return logger_filepath
623
955
 
@@ -680,6 +1012,14 @@ def run_l2m_station(
680
1012
  # Retrieve DISDRODB Metadata Archive directory
681
1013
  metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
682
1014
 
1015
+ # Check valid data_source, campaign_name, and station_name
1016
+ check_station_inputs(
1017
+ metadata_archive_dir=metadata_archive_dir,
1018
+ data_source=data_source,
1019
+ campaign_name=campaign_name,
1020
+ station_name=station_name,
1021
+ )
1022
+
683
1023
  # ------------------------------------------------------------------------.
684
1024
  # Start processing
685
1025
  if verbose:
@@ -687,11 +1027,6 @@ def run_l2m_station(
687
1027
  msg = f"{product} processing of station {station_name} has started."
688
1028
  log_info(logger=logger, msg=msg, verbose=verbose)
689
1029
 
690
- # -------------------------------------------------------------------------.
691
- # Retrieve L2 processing options
692
- # - Each dictionary item contains the processing options for a given rolling/accumulation_interval combo
693
- l2_processing_options = get_l2_processing_options()
694
-
695
1030
  # ---------------------------------------------------------------------.
696
1031
  # Retrieve source sampling interval
697
1032
  # - If a station has varying measurement interval over time, choose the smallest one !
@@ -707,22 +1042,14 @@ def run_l2m_station(
707
1042
 
708
1043
  # ---------------------------------------------------------------------.
709
1044
  # Loop
710
- # sample_interval_acronym = "1MIN"
711
- # l2_options = l2_processing_options["1MIN"]
712
- available_pytmatrix = is_pytmatrix_available()
713
- for sample_interval_acronym, l2_options in l2_processing_options.items():
1045
+ # temporal_resolution = "1MIN"
1046
+ # temporal_resolution = "10MIN"
1047
+ temporal_resolutions = get_product_temporal_resolutions("L2M")
1048
+ print(temporal_resolutions)
1049
+ for temporal_resolution in temporal_resolutions:
714
1050
 
715
1051
  # Retrieve accumulation_interval and rolling option
716
- accumulation_interval, rolling = get_resampling_information(sample_interval_acronym)
717
-
718
- # Retrieve L2M processing options
719
- l2m_options = l2_options["l2m_options"]
720
-
721
- # Retrieve radar simulation options
722
- radar_simulation_enabled = l2_options.get("radar_simulation_enabled", False)
723
- radar_simulation_options = l2_options["radar_simulation_options"]
724
- if not available_pytmatrix:
725
- radar_simulation_enabled = False
1052
+ accumulation_interval, rolling = get_resampling_information(temporal_resolution)
726
1053
 
727
1054
  # ------------------------------------------------------------------.
728
1055
  # Avoid generation of rolling products for source sample interval !
@@ -758,22 +1085,59 @@ def run_l2m_station(
758
1085
  # If no data available, try with other L2E accumulation intervals
759
1086
  if flag_not_available_data:
760
1087
  msg = (
761
- f"{product} processing of {data_source} {campaign_name} {station_name}"
762
- + f"has not been launched because of missing {required_product} {sample_interval_acronym} data ."
1088
+ f"{product} processing of {data_source} {campaign_name} {station_name} "
1089
+ + f"has not been launched because of missing {required_product} {temporal_resolution} data."
763
1090
  )
764
- print(msg)
1091
+ log_info(logger=logger, msg=msg, verbose=verbose)
1092
+ continue
1093
+
1094
+ # -------------------------------------------------------------------------.
1095
+ # Retrieve L2M processing options
1096
+ l2m_processing_options = ProcessingOptions(
1097
+ product="L2M",
1098
+ temporal_resolutions=temporal_resolution,
1099
+ filepaths=filepaths,
1100
+ parallel=parallel,
1101
+ )
1102
+
1103
+ # Retrieve folder partitioning (for files and logs)
1104
+ folder_partitioning = l2m_processing_options.get_folder_partitioning(temporal_resolution)
1105
+
1106
+ # Retrieve product options
1107
+ global_product_options = l2m_processing_options.get_product_options(temporal_resolution)
1108
+
1109
+ # Retrieve files temporal partitions
1110
+ files_partitions = l2m_processing_options.get_files_partitions(temporal_resolution)
1111
+
1112
+ if len(files_partitions) == 0:
1113
+ msg = (
1114
+ f"{product} processing of {data_source} {campaign_name} {station_name} "
1115
+ + f"has not been launched because of missing {required_product} {temporal_resolution} data."
1116
+ )
1117
+ log_info(logger=logger, msg=msg, verbose=verbose)
765
1118
  continue
766
1119
 
767
1120
  # -----------------------------------------------------------------.
768
1121
  # Loop over distributions to fit
769
1122
  # model_name = "GAMMA_ML"
770
1123
  # model_options = l2m_options["models"][model_name]
771
- for model_name, model_options in l2m_options["models"].items():
1124
+ # Retrieve list of models to fit
1125
+ models = global_product_options.pop("models")
1126
+ for model_name in models:
1127
+ # -----------------------------------------------------------------.
1128
+ # Retrieve product-model options
1129
+ product_options = copy.deepcopy(global_product_options)
1130
+ model_options = get_model_options(product="L2M", model_name=model_name)
1131
+ product_options["product_options"].update(model_options)
772
1132
 
773
- # Retrieve model options
774
1133
  psd_model = model_options["psd_model"]
775
1134
  optimization = model_options["optimization"]
776
1135
 
1136
+ # Precompute required scattering tables
1137
+ if product_options["radar_enabled"]:
1138
+ radar_options = product_options["radar_options"]
1139
+ precompute_scattering_tables(verbose=verbose, **radar_options)
1140
+
777
1141
  # -----------------------------------------------------------------.
778
1142
  msg = f"Production of L2M_{model_name} for sample interval {accumulation_interval} s has started."
779
1143
  log_info(logger=logger, msg=msg, verbose=verbose)
@@ -820,25 +1184,25 @@ def run_l2m_station(
820
1184
  # - If parallel=True, it does that in parallel using dask.delayed
821
1185
  list_tasks = [
822
1186
  _generate_l2m(
823
- filepath=filepath,
1187
+ start_time=event_info["start_time"],
1188
+ end_time=event_info["end_time"],
1189
+ filepaths=event_info["filepaths"],
824
1190
  data_dir=data_dir,
825
1191
  logs_dir=logs_dir,
1192
+ folder_partitioning=folder_partitioning,
826
1193
  campaign_name=campaign_name,
827
1194
  station_name=station_name,
828
1195
  # L2M options
829
1196
  sample_interval=accumulation_interval,
830
1197
  rolling=rolling,
831
1198
  model_name=model_name,
832
- l2m_options=l2m_options,
833
- # Radar options
834
- radar_simulation_enabled=radar_simulation_enabled,
835
- radar_simulation_options=radar_simulation_options,
1199
+ product_options=product_options,
836
1200
  # Processing options
837
1201
  force=force,
838
1202
  verbose=verbose,
839
1203
  parallel=parallel,
840
1204
  )
841
- for filepath in filepaths
1205
+ for event_info in files_partitions
842
1206
  ]
843
1207
  list_logs = dask.compute(*list_tasks) if parallel else list_tasks
844
1208