disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. disdrodb/__init__.py +68 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +177 -24
  7. disdrodb/api/configs.py +3 -3
  8. disdrodb/api/info.py +13 -13
  9. disdrodb/api/io.py +281 -22
  10. disdrodb/api/path.py +184 -195
  11. disdrodb/api/search.py +18 -9
  12. disdrodb/cli/disdrodb_create_summary.py +103 -0
  13. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  14. disdrodb/cli/disdrodb_run_l0.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
  19. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  20. disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
  21. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  22. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  23. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  24. disdrodb/configs.py +149 -4
  25. disdrodb/constants.py +61 -0
  26. disdrodb/data_transfer/download_data.py +127 -11
  27. disdrodb/etc/configs/attributes.yaml +339 -0
  28. disdrodb/etc/configs/encodings.yaml +473 -0
  29. disdrodb/etc/products/L1/global.yaml +13 -0
  30. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  31. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +22 -0
  33. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  34. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  35. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  38. disdrodb/etc/products/L2M/global.yaml +26 -0
  39. disdrodb/issue/writer.py +2 -0
  40. disdrodb/l0/__init__.py +13 -0
  41. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  42. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  43. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  44. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  45. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  46. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  48. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  49. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  50. disdrodb/l0/l0a_processing.py +37 -32
  51. disdrodb/l0/l0b_nc_processing.py +118 -8
  52. disdrodb/l0/l0b_processing.py +30 -65
  53. disdrodb/l0/l0c_processing.py +369 -259
  54. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  55. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  56. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  58. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  59. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  63. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  66. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  67. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  69. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  71. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  72. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  73. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
  74. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  75. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  76. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  79. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  80. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  81. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  83. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
  84. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  85. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  86. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  87. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  88. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  89. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  90. disdrodb/l1/__init__.py +5 -0
  91. disdrodb/l1/fall_velocity.py +46 -0
  92. disdrodb/l1/filters.py +34 -20
  93. disdrodb/l1/processing.py +46 -45
  94. disdrodb/l1/resampling.py +77 -66
  95. disdrodb/l1_env/routines.py +18 -3
  96. disdrodb/l2/__init__.py +7 -0
  97. disdrodb/l2/empirical_dsd.py +58 -10
  98. disdrodb/l2/processing.py +268 -117
  99. disdrodb/metadata/checks.py +132 -125
  100. disdrodb/metadata/standards.py +3 -1
  101. disdrodb/psd/fitting.py +631 -345
  102. disdrodb/psd/models.py +9 -6
  103. disdrodb/routines/__init__.py +54 -0
  104. disdrodb/{l0/routines.py → routines/l0.py} +316 -355
  105. disdrodb/{l1/routines.py → routines/l1.py} +76 -116
  106. disdrodb/routines/l2.py +1019 -0
  107. disdrodb/{routines.py → routines/wrappers.py} +98 -10
  108. disdrodb/scattering/__init__.py +16 -4
  109. disdrodb/scattering/axis_ratio.py +61 -37
  110. disdrodb/scattering/permittivity.py +504 -0
  111. disdrodb/scattering/routines.py +746 -184
  112. disdrodb/summary/__init__.py +17 -0
  113. disdrodb/summary/routines.py +4196 -0
  114. disdrodb/utils/archiving.py +434 -0
  115. disdrodb/utils/attrs.py +68 -125
  116. disdrodb/utils/cli.py +5 -5
  117. disdrodb/utils/compression.py +30 -1
  118. disdrodb/utils/dask.py +121 -9
  119. disdrodb/utils/dataframe.py +61 -7
  120. disdrodb/utils/decorators.py +31 -0
  121. disdrodb/utils/directories.py +35 -15
  122. disdrodb/utils/encoding.py +37 -19
  123. disdrodb/{l2 → utils}/event.py +15 -173
  124. disdrodb/utils/logger.py +14 -7
  125. disdrodb/utils/manipulations.py +81 -0
  126. disdrodb/utils/routines.py +166 -0
  127. disdrodb/utils/subsetting.py +214 -0
  128. disdrodb/utils/time.py +35 -177
  129. disdrodb/utils/writer.py +20 -7
  130. disdrodb/utils/xarray.py +5 -4
  131. disdrodb/viz/__init__.py +13 -0
  132. disdrodb/viz/plots.py +398 -0
  133. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
  134. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
  135. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
  136. disdrodb/l1/encoding_attrs.py +0 -642
  137. disdrodb/l2/processing_options.py +0 -213
  138. disdrodb/l2/routines.py +0 -868
  139. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  140. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  141. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  142. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
disdrodb/l2/routines.py DELETED
@@ -1,868 +0,0 @@
1
- # -----------------------------------------------------------------------------.
2
- # Copyright (c) 2021-2023 DISDRODB developers
3
- #
4
- # This program is free software: you can redistribute it and/or modify
5
- # it under the terms of the GNU General Public License as published by
6
- # the Free Software Foundation, either version 3 of the License, or
7
- # (at your option) any later version.
8
- #
9
- # This program is distributed in the hope that it will be useful,
10
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- # GNU General Public License for more details.
13
- #
14
- # You should have received a copy of the GNU General Public License
15
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
- # -----------------------------------------------------------------------------.
17
- """Implements routines for DISDRODB L2 processing."""
18
-
19
- import datetime
20
- import logging
21
- import os
22
- import time
23
- from typing import Optional
24
-
25
- import dask
26
- import numpy as np
27
- import pandas as pd
28
- import xarray as xr
29
-
30
- # Directory
31
- from disdrodb import is_pytmatrix_available
32
- from disdrodb.api.create_directories import (
33
- create_logs_directory,
34
- create_product_directory,
35
- )
36
- from disdrodb.api.info import group_filepaths
37
- from disdrodb.api.io import find_files
38
- from disdrodb.api.path import (
39
- define_accumulation_acronym,
40
- define_l2e_filename,
41
- define_l2m_filename,
42
- )
43
- from disdrodb.api.search import get_required_product
44
- from disdrodb.configs import get_data_archive_dir, get_metadata_archive_dir
45
- from disdrodb.l1.resampling import resample_dataset
46
- from disdrodb.l2.event import get_events_info, identify_events
47
- from disdrodb.l2.processing import (
48
- generate_l2_empirical,
49
- generate_l2_model,
50
- generate_l2_radar,
51
- )
52
- from disdrodb.l2.processing_options import get_l2_processing_options
53
- from disdrodb.metadata import read_station_metadata
54
- from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
55
- from disdrodb.utils.list import flatten_list
56
-
57
- # Logger
58
- from disdrodb.utils.logger import (
59
- close_logger,
60
- create_logger_file,
61
- create_product_logs,
62
- log_error,
63
- log_info,
64
- )
65
- from disdrodb.utils.time import ensure_sample_interval_in_seconds, get_resampling_information, regularize_dataset
66
- from disdrodb.utils.writer import write_product
67
-
68
- logger = logging.getLogger(__name__)
69
-
70
-
71
- ####----------------------------------------------------------------------------.
72
- #### L2E
73
-
74
-
75
- @delayed_if_parallel
76
- @single_threaded_if_parallel
77
- def _generate_l2e(
78
- start_time,
79
- end_time,
80
- filepaths,
81
- data_dir,
82
- logs_dir,
83
- campaign_name,
84
- station_name,
85
- # L2E options
86
- accumulation_interval,
87
- rolling,
88
- l2e_options,
89
- # Radar options
90
- radar_simulation_enabled,
91
- radar_simulation_options,
92
- # Processing options
93
- force,
94
- verbose,
95
- parallel, # this is used by the decorator and to initialize correctly the logger !
96
- ):
97
- # -----------------------------------------------------------------.
98
- # Define product name
99
- product = "L2E"
100
-
101
- # -----------------------------------------------------------------.
102
- # Create file logger
103
- sample_interval_acronym = define_accumulation_acronym(seconds=accumulation_interval, rolling=rolling)
104
- starting_time = pd.to_datetime(start_time).strftime("%Y%m%d%H%M%S")
105
- ending_time = pd.to_datetime(end_time).strftime("%Y%m%d%H%M%S")
106
- filename = f"L2E.{sample_interval_acronym}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
107
- logger, logger_filepath = create_logger_file(
108
- logs_dir=logs_dir,
109
- filename=filename,
110
- parallel=parallel,
111
- )
112
- ##------------------------------------------------------------------------.
113
- # Log start processing
114
- msg = f"{product} processing of {filename} has started."
115
- log_info(logger=logger, msg=msg, verbose=verbose)
116
-
117
- ##------------------------------------------------------------------------.
118
- ### Core computation
119
- try:
120
- # ------------------------------------------------------------------------.
121
- #### Open the dataset over the period of interest
122
- # - Open the netCDFs
123
- list_ds = [
124
- xr.open_dataset(filepath, chunks={}, decode_timedelta=False, cache=False, autoclose=True)
125
- for filepath in filepaths
126
- ]
127
- # - Concatenate datasets
128
- ds = xr.concat(list_ds, dim="time", compat="no_conflicts", combine_attrs="override")
129
- ds = ds.sel(time=slice(start_time, end_time)).compute()
130
- # - Close file on disk
131
- _ = [ds.close() for ds in list_ds]
132
-
133
- ##------------------------------------------------------------------------.
134
- #### Resample dataset
135
- # Here we set NaN in the raw_drop_number to 0
136
- # - We assume that NaN corresponds to 0
137
- # - When we regularize, we infill with NaN
138
- # - When we aggregate with sum, we don't skip NaN
139
- # --> Aggregation with original missing timesteps currently results in NaN !
140
- # TODO: Add tolerance on fraction of missing timesteps for large accumulation_intervals
141
- # TODO: NaN should not be set as 0 !
142
- ds["drop_number"] = xr.where(np.isnan(ds["drop_number"]), 0, ds["drop_number"])
143
-
144
- # - Regularize dataset
145
- # --> Infill missing timesteps with np.Nan
146
- sample_interval = ensure_sample_interval_in_seconds(ds["sample_interval"]).item()
147
- ds = regularize_dataset(ds, freq=f"{sample_interval}s")
148
-
149
- # - Resample dataset
150
- ds = resample_dataset(
151
- ds=ds,
152
- sample_interval=sample_interval,
153
- accumulation_interval=accumulation_interval,
154
- rolling=rolling,
155
- )
156
-
157
- ##------------------------------------------------------------------------.
158
- # Remove timesteps with no drops or NaN (from L2E computations)
159
- # timestep_zero_drops = ds["time"].data[ds["N"].data == 0]
160
- # timestep_nan = ds["time"].data[np.isnan(ds["N"].data)]
161
- # TODO: Make it a choice !
162
- indices_valid_timesteps = np.where(
163
- ~np.logical_or(ds["N"].data == 0, np.isnan(ds["N"].data)),
164
- )[0]
165
- ds = ds.isel(time=indices_valid_timesteps)
166
-
167
- ##------------------------------------------------------------------------.
168
- #### Generate L2E product
169
- # TODO: Pass filtering criteria and actual L2E options !
170
- ds = generate_l2_empirical(ds=ds, **l2e_options)
171
-
172
- # Simulate L2M-based radar variables if asked
173
- if radar_simulation_enabled:
174
- ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_simulation_options)
175
- ds.update(ds_radar)
176
- ds.attrs = ds_radar.attrs.copy()
177
-
178
- ##------------------------------------------------------------------------.
179
- #### Regularize back dataset
180
- # TODO: infill timestep_zero_drops and timestep_nan differently ?
181
- # --> R, P, LWC = 0,
182
- # --> Z, D, with np.nan?
183
-
184
- ##------------------------------------------------------------------------.
185
- # Write netCDF4 dataset
186
- if ds["time"].size > 1:
187
- filename = define_l2e_filename(
188
- ds,
189
- campaign_name=campaign_name,
190
- station_name=station_name,
191
- sample_interval=accumulation_interval,
192
- rolling=rolling,
193
- )
194
- filepath = os.path.join(data_dir, filename)
195
- write_product(ds, product=product, filepath=filepath, force=force)
196
-
197
- ##--------------------------------------------------------------------.
198
- # Clean environment
199
- del ds
200
-
201
- # Log end processing
202
- msg = f"{product} processing of {filename} has ended."
203
- log_info(logger=logger, msg=msg, verbose=verbose)
204
-
205
- ##--------------------------------------------------------------------.
206
- # Otherwise log the error
207
- except Exception as e:
208
- error_type = str(type(e).__name__)
209
- msg = f"{error_type}: {e}"
210
- log_error(logger, msg, verbose=verbose)
211
-
212
- # Close the file logger
213
- close_logger(logger)
214
-
215
- # Return the logger file path
216
- return logger_filepath
217
-
218
-
219
- def is_possible_product(accumulation_interval, sample_interval, rolling):
220
- """Assess if production is possible given the requested accumulation interval and source sample_interval."""
221
- # Avoid rolling product generation at source sample interval
222
- if rolling and accumulation_interval == sample_interval:
223
- return False
224
- # Avoid product generation if the accumulation_interval is less than the sample interval
225
- if accumulation_interval < sample_interval:
226
- return False
227
- # Avoid producti generation if accumulation_interval is not multiple of sample_interval
228
- return accumulation_interval % sample_interval == 0
229
-
230
-
231
- def run_l2e_station(
232
- # Station arguments
233
- data_source,
234
- campaign_name,
235
- station_name,
236
- # Processing options
237
- force: bool = False,
238
- verbose: bool = True,
239
- parallel: bool = True,
240
- debugging_mode: bool = False,
241
- # DISDRODB root directories
242
- data_archive_dir: Optional[str] = None,
243
- metadata_archive_dir: Optional[str] = None,
244
- ):
245
- """
246
- Generate the L2E product of a specific DISDRODB station when invoked from the terminal.
247
-
248
- This function is intended to be called through the ``disdrodb_run_l2e_station``
249
- command-line interface.
250
-
251
- The DISDRODB L2E routine generate a L2E file for each event.
252
- Events are defined based on the DISDRODB event settings options.
253
- The DISDRODB event settings allows to produce L2E files either
254
- per custom block of time (i.e day/month/year) or for blocks of rainy events.
255
-
256
- For stations with varying measurement intervals, DISDRODB defines a separate list of 'events'
257
- for each measurement interval option. In other words, DISDRODB does not
258
- mix files with data acquired at different sample intervals when resampling the data.
259
-
260
- L0C product generation ensure creation of files with unique sample intervals.
261
-
262
- Parameters
263
- ----------
264
- data_source : str
265
- The name of the institution (for campaigns spanning multiple countries) or
266
- the name of the country (for campaigns or sensor networks within a single country).
267
- Must be provided in UPPER CASE.
268
- campaign_name : str
269
- The name of the campaign. Must be provided in UPPER CASE.
270
- station_name : str
271
- The name of the station.
272
- force : bool, optional
273
- If ``True``, existing data in the destination directories will be overwritten.
274
- If ``False`` (default), an error will be raised if data already exists in the destination directories.
275
- verbose : bool, optional
276
- If ``True`` (default), detailed processing information will be printed to the terminal.
277
- If ``False``, less information will be displayed.
278
- parallel : bool, optional
279
- If ``True``, files will be processed in multiple processes simultaneously,
280
- with each process using a single thread to avoid issues with the HDF/netCDF library.
281
- If ``False`` (default), files will be processed sequentially in a single process,
282
- and multi-threading will be automatically exploited to speed up I/O tasks.
283
- debugging_mode : bool, optional
284
- If ``True``, the amount of data processed will be reduced.
285
- Only the first 3 files will be processed. The default value is ``False``.
286
- data_archive_dir : str, optional
287
- The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``.
288
- If not specified, the path specified in the DISDRODB active configuration will be used.
289
-
290
- """
291
- # Define product
292
- product = "L2E"
293
-
294
- # Define base directory
295
- data_archive_dir = get_data_archive_dir(data_archive_dir)
296
-
297
- # Retrieve DISDRODB Metadata Archive directory
298
- metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir=metadata_archive_dir)
299
-
300
- # ------------------------------------------------------------------------.
301
- # Start processing
302
- if verbose:
303
- t_i = time.time()
304
- msg = f"{product} processing of station {station_name} has started."
305
- log_info(logger=logger, msg=msg, verbose=verbose)
306
-
307
- # -------------------------------------------------------------------------.
308
- # List L1 files to process
309
- required_product = get_required_product(product)
310
- flag_not_available_data = False
311
- try:
312
- filepaths = find_files(
313
- data_archive_dir=data_archive_dir,
314
- data_source=data_source,
315
- campaign_name=campaign_name,
316
- station_name=station_name,
317
- product=required_product,
318
- # Processing options
319
- debugging_mode=False,
320
- )
321
- except Exception as e:
322
- print(str(e)) # Case where no file paths available
323
- flag_not_available_data = True
324
-
325
- # -------------------------------------------------------------------------.
326
- # If no data available, print error message and return None
327
- if flag_not_available_data:
328
- msg = (
329
- f"{product} processing of {data_source} {campaign_name} {station_name}"
330
- + f"has not been launched because of missing {required_product} data."
331
- )
332
- print(msg)
333
- return
334
-
335
- # -------------------------------------------------------------------------.
336
- # Retrieve L2 processing options
337
- # - Each dictionary item contains the processing options for a given rolling/accumulation_interval combo
338
- l2_processing_options = get_l2_processing_options()
339
-
340
- # ---------------------------------------------------------------------.
341
- # Group filepaths by sample intervals
342
- # - Typically the sample interval is fixed
343
- # - Some stations might change the sample interval along the years
344
- # - For each sample interval, separated processing take place here after !
345
- dict_filepaths = group_filepaths(filepaths, groups="sample_interval")
346
-
347
- # -------------------------------------------------------------------------.
348
- # Define list of event
349
- # - [(start_time, end_time)]
350
- # TODO: Here pass event option list !
351
- # TODO: Implement more general define_events function
352
- # - Either rainy events
353
- # - Either time blocks (day/month/year)
354
- # TODO: Define events identification settings based on accumulation
355
- # - This is currently done at the source sample interval !
356
- # - Should we allow event definition for each accumulation interval and
357
- # move this code inside the loop below
358
-
359
- # sample_interval = list(dict_filepaths)[0]
360
- # filepaths = dict_filepaths[sample_interval]
361
-
362
- dict_list_events = {
363
- sample_interval: identify_events(filepaths, parallel=parallel)
364
- for sample_interval, filepaths in dict_filepaths.items()
365
- }
366
-
367
- # ---------------------------------------------------------------------.
368
- # Subset for debugging mode
369
- if debugging_mode:
370
- dict_list_events = {
371
- sample_interval: list_events[0 : min(len(list_events), 3)]
372
- for sample_interval, list_events in dict_list_events.items()
373
- }
374
-
375
- # ---------------------------------------------------------------------.
376
- # Loop
377
- # rolling = False
378
- # accumulation_interval = 60
379
- # sample_interval_acronym = "1MIN"
380
- # l2_options = l2_processing_options["1MIN"]
381
- available_pytmatrix = is_pytmatrix_available()
382
-
383
- for sample_interval_acronym, l2_options in l2_processing_options.items():
384
-
385
- # Retrieve accumulation_interval and rolling option
386
- accumulation_interval, rolling = get_resampling_information(sample_interval_acronym)
387
-
388
- # Retrieve radar simulation options
389
- radar_simulation_enabled = l2_options.get("radar_simulation_enabled", False)
390
- radar_simulation_options = l2_options["radar_simulation_options"]
391
- if not available_pytmatrix:
392
- radar_simulation_enabled = False
393
-
394
- # ------------------------------------------------------------------.
395
- # Group filepaths by events
396
- # - This is done separately for each possible source sample interval
397
- # - It groups filepaths by start_time and end_time provided by list_events
398
- # - Here 'events' can also simply be period of times ('day', 'months', ...)
399
- # - When aggregating/resampling/accumulating data, we need to load also
400
- # some data before/after the actual event start_time/end_time
401
- # - get_events_info adjust the event times to accounts for the required "border" data.
402
- events_info = [
403
- get_events_info(
404
- list_events=list_events,
405
- filepaths=dict_filepaths[sample_interval],
406
- accumulation_interval=accumulation_interval,
407
- rolling=rolling,
408
- )
409
- for sample_interval, list_events in dict_list_events.items()
410
- if is_possible_product(
411
- accumulation_interval=accumulation_interval,
412
- sample_interval=sample_interval,
413
- rolling=rolling,
414
- )
415
- ]
416
- events_info = flatten_list(events_info)
417
-
418
- # ------------------------------------------------------------------.
419
- # Skip processing if no files available
420
- # - When not compatible accumulation_interval with source sample_interval
421
- if len(events_info) == 0:
422
- continue
423
-
424
- # ------------------------------------------------------------------.
425
- # Create product directory
426
- data_dir = create_product_directory(
427
- data_archive_dir=data_archive_dir,
428
- metadata_archive_dir=metadata_archive_dir,
429
- data_source=data_source,
430
- campaign_name=campaign_name,
431
- station_name=station_name,
432
- product=product,
433
- force=force,
434
- # Option for L2E
435
- sample_interval=accumulation_interval,
436
- rolling=rolling,
437
- )
438
-
439
- # Define logs directory
440
- logs_dir = create_logs_directory(
441
- product=product,
442
- data_archive_dir=data_archive_dir,
443
- data_source=data_source,
444
- campaign_name=campaign_name,
445
- station_name=station_name,
446
- # Option for L2E
447
- sample_interval=accumulation_interval,
448
- rolling=rolling,
449
- )
450
-
451
- # ------------------------------------------------------------------.
452
- # Generate files
453
- # - L2E product generation is optionally parallelized over events
454
- # - If parallel=True, it does that in parallel using dask.delayed
455
- list_tasks = [
456
- _generate_l2e(
457
- start_time=event_info["start_time"],
458
- end_time=event_info["end_time"],
459
- filepaths=event_info["filepaths"],
460
- data_dir=data_dir,
461
- logs_dir=logs_dir,
462
- campaign_name=campaign_name,
463
- station_name=station_name,
464
- # L2E options
465
- rolling=rolling,
466
- accumulation_interval=accumulation_interval,
467
- l2e_options={}, # TODO
468
- # Radar options
469
- radar_simulation_enabled=radar_simulation_enabled,
470
- radar_simulation_options=radar_simulation_options,
471
- # Processing options
472
- force=force,
473
- verbose=verbose,
474
- parallel=parallel,
475
- )
476
- for event_info in events_info
477
- ]
478
- list_logs = dask.compute(*list_tasks) if parallel else list_tasks
479
-
480
- # -----------------------------------------------------------------.
481
- # Define product summary logs
482
- create_product_logs(
483
- product=product,
484
- data_source=data_source,
485
- campaign_name=campaign_name,
486
- station_name=station_name,
487
- data_archive_dir=data_archive_dir,
488
- # Product options
489
- sample_interval=accumulation_interval,
490
- rolling=rolling,
491
- # Logs list
492
- list_logs=list_logs,
493
- )
494
-
495
- # ---------------------------------------------------------------------.
496
- # End product processing
497
- if verbose:
498
- timedelta_str = str(datetime.timedelta(seconds=round(time.time() - t_i)))
499
- msg = f"{product} processing of station {station_name} completed in {timedelta_str}"
500
- log_info(logger=logger, msg=msg, verbose=verbose)
501
-
502
-
503
- ####----------------------------------------------------------------------------.
504
- #### L2M
505
-
506
-
507
- @delayed_if_parallel
508
- @single_threaded_if_parallel
509
- def _generate_l2m(
510
- filepath,
511
- data_dir,
512
- logs_dir,
513
- campaign_name,
514
- station_name,
515
- # L2M options
516
- sample_interval,
517
- rolling,
518
- model_name,
519
- l2m_options,
520
- # Radar options
521
- radar_simulation_enabled,
522
- radar_simulation_options,
523
- # Processing options
524
- force,
525
- verbose,
526
- parallel, # this is used only to initialize the correct logger !
527
- ):
528
- # -----------------------------------------------------------------.
529
- # Define product name
530
- product = "L2M"
531
-
532
- # -----------------------------------------------------------------.
533
- # Define model options
534
- psd_model = l2m_options["models"][model_name]["psd_model"]
535
- optimization = l2m_options["models"][model_name]["optimization"]
536
- optimization_kwargs = l2m_options["models"][model_name]["optimization_kwargs"]
537
- other_options = {k: v for k, v in l2m_options.items() if k != "models"}
538
-
539
- # -----------------------------------------------------------------.
540
- # Create file logger
541
- filename = os.path.basename(filepath)
542
- logger, logger_filepath = create_logger_file(
543
- logs_dir=logs_dir,
544
- filename=filename,
545
- parallel=parallel,
546
- )
547
-
548
- ##------------------------------------------------------------------------.
549
- # Log start processing
550
- msg = f"{product} processing of {filename} has started."
551
- log_info(logger=logger, msg=msg, verbose=verbose)
552
-
553
- ##------------------------------------------------------------------------.
554
- ### Core computation
555
- try:
556
- # Open the raw netCDF
557
- with xr.open_dataset(filepath, chunks={}, decode_timedelta=False, cache=False) as ds:
558
- variables = [
559
- "drop_number_concentration",
560
- "fall_velocity",
561
- "D50",
562
- "Nw",
563
- "Nt",
564
- "M1",
565
- "M2",
566
- "M3",
567
- "M4",
568
- "M5",
569
- "M6",
570
- ]
571
- ds = ds[variables].load()
572
-
573
- # Produce L2M dataset
574
- ds = generate_l2_model(
575
- ds=ds,
576
- psd_model=psd_model,
577
- optimization=optimization,
578
- optimization_kwargs=optimization_kwargs,
579
- **other_options,
580
- )
581
-
582
- # Simulate L2M-based radar variables if asked
583
- if radar_simulation_enabled:
584
- ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_simulation_options)
585
- ds.update(ds_radar)
586
- ds.attrs = ds_radar.attrs.copy()
587
-
588
- # Write L2M netCDF4 dataset
589
- if ds["time"].size > 1:
590
- # Define filepath
591
- filename = define_l2m_filename(
592
- ds,
593
- campaign_name=campaign_name,
594
- station_name=station_name,
595
- sample_interval=sample_interval,
596
- rolling=rolling,
597
- model_name=model_name,
598
- )
599
- filepath = os.path.join(data_dir, filename)
600
- # Write to disk
601
- write_product(ds, product=product, filepath=filepath, force=force)
602
-
603
- ##--------------------------------------------------------------------.
604
- # Clean environment
605
- del ds
606
-
607
- # Log end processing
608
- msg = f"{product} processing of {filename} has ended."
609
- log_info(logger=logger, msg=msg, verbose=verbose)
610
-
611
- ##--------------------------------------------------------------------.
612
- # Otherwise log the error
613
- except Exception as e:
614
- error_type = str(type(e).__name__)
615
- msg = f"{error_type}: {e}"
616
- log_error(logger, msg, verbose=verbose)
617
-
618
- # Close the file logger
619
- close_logger(logger)
620
-
621
- # Return the logger file path
622
- return logger_filepath
623
-
624
-
625
- def run_l2m_station(
626
- # Station arguments
627
- data_source,
628
- campaign_name,
629
- station_name,
630
- # Processing options
631
- force: bool = False,
632
- verbose: bool = True,
633
- parallel: bool = True,
634
- debugging_mode: bool = False,
635
- # DISDRODB root directories
636
- data_archive_dir: Optional[str] = None,
637
- metadata_archive_dir: Optional[str] = None,
638
- ):
639
- """
640
- Run the L2M processing of a specific DISDRODB station when invoked from the terminal.
641
-
642
- This function is intended to be called through the ``disdrodb_run_l2m_station``
643
- command-line interface.
644
-
645
- Parameters
646
- ----------
647
- data_source : str
648
- The name of the institution (for campaigns spanning multiple countries) or
649
- the name of the country (for campaigns or sensor networks within a single country).
650
- Must be provided in UPPER CASE.
651
- campaign_name : str
652
- The name of the campaign. Must be provided in UPPER CASE.
653
- station_name : str
654
- The name of the station.
655
- force : bool, optional
656
- If ``True``, existing data in the destination directories will be overwritten.
657
- If ``False`` (default), an error will be raised if data already exists in the destination directories.
658
- verbose : bool, optional
659
- If ``True`` (default), detailed processing information will be printed to the terminal.
660
- If ``False``, less information will be displayed.
661
- parallel : bool, optional
662
- If ``True``, files will be processed in multiple processes simultaneously,
663
- with each process using a single thread to avoid issues with the HDF/netCDF library.
664
- If ``False`` (default), files will be processed sequentially in a single process,
665
- and multi-threading will be automatically exploited to speed up I/O tasks.
666
- debugging_mode : bool, optional
667
- If ``True``, the amount of data processed will be reduced.
668
- Only the first 3 files will be processed. The default value is ``False``.
669
- data_archive_dir : str, optional
670
- The base directory of DISDRODB, expected in the format ``<...>/DISDRODB``.
671
- If not specified, the path specified in the DISDRODB active configuration will be used.
672
-
673
- """
674
- # Define product
675
- product = "L2M"
676
-
677
- # Define base directory
678
- data_archive_dir = get_data_archive_dir(data_archive_dir)
679
-
680
- # Retrieve DISDRODB Metadata Archive directory
681
- metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
682
-
683
- # ------------------------------------------------------------------------.
684
- # Start processing
685
- if verbose:
686
- t_i = time.time()
687
- msg = f"{product} processing of station {station_name} has started."
688
- log_info(logger=logger, msg=msg, verbose=verbose)
689
-
690
- # -------------------------------------------------------------------------.
691
- # Retrieve L2 processing options
692
- # - Each dictionary item contains the processing options for a given rolling/accumulation_interval combo
693
- l2_processing_options = get_l2_processing_options()
694
-
695
- # ---------------------------------------------------------------------.
696
- # Retrieve source sampling interval
697
- # - If a station has varying measurement interval over time, choose the smallest one !
698
- metadata = read_station_metadata(
699
- metadata_archive_dir=metadata_archive_dir,
700
- data_source=data_source,
701
- campaign_name=campaign_name,
702
- station_name=station_name,
703
- )
704
- sample_interval = metadata["measurement_interval"]
705
- if isinstance(sample_interval, list):
706
- sample_interval = min(sample_interval)
707
-
708
- # ---------------------------------------------------------------------.
709
- # Loop
710
- # sample_interval_acronym = "1MIN"
711
- # l2_options = l2_processing_options["1MIN"]
712
- available_pytmatrix = is_pytmatrix_available()
713
- for sample_interval_acronym, l2_options in l2_processing_options.items():
714
-
715
- # Retrieve accumulation_interval and rolling option
716
- accumulation_interval, rolling = get_resampling_information(sample_interval_acronym)
717
-
718
- # Retrieve L2M processing options
719
- l2m_options = l2_options["l2m_options"]
720
-
721
- # Retrieve radar simulation options
722
- radar_simulation_enabled = l2_options.get("radar_simulation_enabled", False)
723
- radar_simulation_options = l2_options["radar_simulation_options"]
724
- if not available_pytmatrix:
725
- radar_simulation_enabled = False
726
-
727
- # ------------------------------------------------------------------.
728
- # Avoid generation of rolling products for source sample interval !
729
- if rolling and accumulation_interval == sample_interval:
730
- continue
731
-
732
- # Avoid product generation if the accumulation_interval is less than the sample interval
733
- if accumulation_interval < sample_interval:
734
- continue
735
-
736
- # -----------------------------------------------------------------.
737
- # List files to process
738
- required_product = get_required_product(product)
739
- flag_not_available_data = False
740
- try:
741
- filepaths = find_files(
742
- data_archive_dir=data_archive_dir,
743
- # Station arguments
744
- data_source=data_source,
745
- campaign_name=campaign_name,
746
- station_name=station_name,
747
- # Product options
748
- product=required_product,
749
- sample_interval=accumulation_interval,
750
- rolling=rolling,
751
- # Processing options
752
- debugging_mode=debugging_mode,
753
- )
754
- except Exception as e:
755
- print(str(e)) # Case where no file paths available
756
- flag_not_available_data = True
757
-
758
- # If no data available, try with other L2E accumulation intervals
759
- if flag_not_available_data:
760
- msg = (
761
- f"{product} processing of {data_source} {campaign_name} {station_name}"
762
- + f"has not been launched because of missing {required_product} {sample_interval_acronym} data ."
763
- )
764
- print(msg)
765
- continue
766
-
767
- # -----------------------------------------------------------------.
768
- # Loop over distributions to fit
769
- # model_name = "GAMMA_ML"
770
- # model_options = l2m_options["models"][model_name]
771
- for model_name, model_options in l2m_options["models"].items():
772
-
773
- # Retrieve model options
774
- psd_model = model_options["psd_model"]
775
- optimization = model_options["optimization"]
776
-
777
- # -----------------------------------------------------------------.
778
- msg = f"Production of L2M_{model_name} for sample interval {accumulation_interval} s has started."
779
- log_info(logger=logger, msg=msg, verbose=verbose)
780
- msg = f"Estimating {psd_model} parameters using {optimization}."
781
- log_info(logger=logger, msg=msg, verbose=verbose)
782
-
783
- # -------------------------------------------------------------.
784
- # Create product directory
785
- data_dir = create_product_directory(
786
- # DISDRODB root directories
787
- data_archive_dir=data_archive_dir,
788
- metadata_archive_dir=metadata_archive_dir,
789
- # Station arguments
790
- data_source=data_source,
791
- campaign_name=campaign_name,
792
- station_name=station_name,
793
- # Processing options
794
- product=product,
795
- force=force,
796
- # Option for L2E
797
- sample_interval=accumulation_interval,
798
- rolling=rolling,
799
- # Option for L2M
800
- model_name=model_name,
801
- )
802
-
803
- # Define logs directory
804
- logs_dir = create_logs_directory(
805
- product=product,
806
- data_archive_dir=data_archive_dir,
807
- # Station arguments
808
- data_source=data_source,
809
- campaign_name=campaign_name,
810
- station_name=station_name,
811
- # Option for L2E
812
- sample_interval=accumulation_interval,
813
- rolling=rolling,
814
- # Option for L2M
815
- model_name=model_name,
816
- )
817
-
818
- # Generate L2M files
819
- # - Loop over the L2E netCDF files and generate L2M files.
820
- # - If parallel=True, it does that in parallel using dask.delayed
821
- list_tasks = [
822
- _generate_l2m(
823
- filepath=filepath,
824
- data_dir=data_dir,
825
- logs_dir=logs_dir,
826
- campaign_name=campaign_name,
827
- station_name=station_name,
828
- # L2M options
829
- sample_interval=accumulation_interval,
830
- rolling=rolling,
831
- model_name=model_name,
832
- l2m_options=l2m_options,
833
- # Radar options
834
- radar_simulation_enabled=radar_simulation_enabled,
835
- radar_simulation_options=radar_simulation_options,
836
- # Processing options
837
- force=force,
838
- verbose=verbose,
839
- parallel=parallel,
840
- )
841
- for filepath in filepaths
842
- ]
843
- list_logs = dask.compute(*list_tasks) if parallel else list_tasks
844
-
845
- # -----------------------------------------------------------------.
846
- # Define L2M summary logs
847
- create_product_logs(
848
- product=product,
849
- # Station arguments
850
- data_source=data_source,
851
- campaign_name=campaign_name,
852
- station_name=station_name,
853
- # DISDRODB root directory
854
- data_archive_dir=data_archive_dir,
855
- # Product options
856
- model_name=model_name,
857
- sample_interval=sample_interval,
858
- rolling=rolling,
859
- # Logs list
860
- list_logs=list_logs,
861
- )
862
-
863
- # ---------------------------------------------------------------------.
864
- # End L2M processing
865
- if verbose:
866
- timedelta_str = str(datetime.timedelta(seconds=round(time.time() - t_i)))
867
- msg = f"{product} processing of station {station_name} completed in {timedelta_str}"
868
- log_info(logger=logger, msg=msg, verbose=verbose)