disdrodb 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +4 -0
- disdrodb/_version.py +2 -2
- disdrodb/api/checks.py +70 -47
- disdrodb/api/configs.py +0 -2
- disdrodb/api/info.py +3 -3
- disdrodb/api/io.py +48 -8
- disdrodb/api/path.py +116 -133
- disdrodb/api/search.py +12 -3
- disdrodb/cli/disdrodb_create_summary.py +103 -0
- disdrodb/cli/disdrodb_create_summary_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
- disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
- disdrodb/cli/disdrodb_run_l1_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
- disdrodb/data_transfer/download_data.py +123 -7
- disdrodb/issue/writer.py +2 -0
- disdrodb/l0/l0a_processing.py +10 -5
- disdrodb/l0/l0b_nc_processing.py +10 -6
- disdrodb/l0/l0b_processing.py +26 -61
- disdrodb/l0/l0c_processing.py +369 -251
- disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
- disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
- disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
- disdrodb/l1/fall_velocity.py +46 -0
- disdrodb/l1/processing.py +1 -1
- disdrodb/l2/processing.py +1 -1
- disdrodb/metadata/checks.py +132 -125
- disdrodb/psd/fitting.py +172 -205
- disdrodb/psd/models.py +1 -1
- disdrodb/routines/__init__.py +54 -0
- disdrodb/{l0/routines.py → routines/l0.py} +288 -418
- disdrodb/{l1/routines.py → routines/l1.py} +60 -92
- disdrodb/{l2/routines.py → routines/l2.py} +249 -462
- disdrodb/{routines.py → routines/wrappers.py} +95 -7
- disdrodb/scattering/axis_ratio.py +5 -1
- disdrodb/scattering/permittivity.py +18 -0
- disdrodb/scattering/routines.py +56 -36
- disdrodb/summary/routines.py +110 -34
- disdrodb/utils/archiving.py +434 -0
- disdrodb/utils/cli.py +5 -5
- disdrodb/utils/dask.py +62 -1
- disdrodb/utils/decorators.py +31 -0
- disdrodb/utils/encoding.py +5 -1
- disdrodb/{l2 → utils}/event.py +1 -66
- disdrodb/utils/logger.py +1 -1
- disdrodb/utils/manipulations.py +22 -12
- disdrodb/utils/routines.py +166 -0
- disdrodb/utils/time.py +3 -291
- disdrodb/utils/xarray.py +3 -0
- disdrodb/viz/plots.py +85 -14
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/METADATA +2 -2
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/RECORD +62 -54
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +1 -0
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -21,13 +21,10 @@
|
|
|
21
21
|
import datetime
|
|
22
22
|
import logging
|
|
23
23
|
import os
|
|
24
|
-
import shutil
|
|
25
24
|
import time
|
|
26
25
|
from typing import Optional
|
|
27
26
|
|
|
28
|
-
import
|
|
29
|
-
|
|
30
|
-
from disdrodb.api.checks import check_sensor_name, check_station_inputs
|
|
27
|
+
from disdrodb.api.checks import check_measurement_intervals, check_sensor_name, check_station_inputs
|
|
31
28
|
from disdrodb.api.create_directories import (
|
|
32
29
|
create_l0_directory_structure,
|
|
33
30
|
create_logs_directory,
|
|
@@ -39,8 +36,6 @@ from disdrodb.api.path import (
|
|
|
39
36
|
define_l0a_filename,
|
|
40
37
|
define_l0b_filename,
|
|
41
38
|
define_l0c_filename,
|
|
42
|
-
define_metadata_filepath,
|
|
43
|
-
define_partitioning_tree,
|
|
44
39
|
)
|
|
45
40
|
from disdrodb.api.search import get_required_product
|
|
46
41
|
from disdrodb.configs import get_data_archive_dir, get_folder_partitioning, get_metadata_archive_dir
|
|
@@ -52,30 +47,21 @@ from disdrodb.l0.l0a_processing import (
|
|
|
52
47
|
write_l0a,
|
|
53
48
|
)
|
|
54
49
|
from disdrodb.l0.l0b_nc_processing import sanitize_ds
|
|
55
|
-
from disdrodb.l0.l0b_processing import
|
|
56
|
-
|
|
57
|
-
set_l0b_encodings,
|
|
58
|
-
write_l0b,
|
|
59
|
-
)
|
|
60
|
-
from disdrodb.l0.l0c_processing import (
|
|
61
|
-
create_daily_file,
|
|
62
|
-
get_files_per_days,
|
|
63
|
-
retrieve_possible_measurement_intervals,
|
|
64
|
-
)
|
|
50
|
+
from disdrodb.l0.l0b_processing import generate_l0b
|
|
51
|
+
from disdrodb.l0.l0c_processing import TOLERANCE_SECONDS, create_l0c_datasets
|
|
65
52
|
from disdrodb.metadata import read_station_metadata
|
|
66
|
-
from disdrodb.utils.
|
|
53
|
+
from disdrodb.utils.archiving import get_files_per_time_block
|
|
54
|
+
from disdrodb.utils.dask import execute_tasks_safely
|
|
67
55
|
from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
|
|
68
56
|
|
|
69
57
|
# Logger
|
|
70
58
|
from disdrodb.utils.logger import (
|
|
71
|
-
close_logger,
|
|
72
|
-
create_logger_file,
|
|
73
59
|
create_product_logs,
|
|
74
|
-
log_error,
|
|
75
60
|
log_info,
|
|
61
|
+
# log_warning,
|
|
76
62
|
)
|
|
63
|
+
from disdrodb.utils.routines import run_product_generation, try_get_required_filepaths
|
|
77
64
|
from disdrodb.utils.writer import write_product
|
|
78
|
-
from disdrodb.utils.yaml import read_yaml
|
|
79
65
|
|
|
80
66
|
logger = logging.getLogger(__name__)
|
|
81
67
|
|
|
@@ -89,8 +75,7 @@ def _generate_l0a(
|
|
|
89
75
|
filepath,
|
|
90
76
|
data_dir,
|
|
91
77
|
logs_dir,
|
|
92
|
-
|
|
93
|
-
station_name,
|
|
78
|
+
logs_filename,
|
|
94
79
|
# Processing info
|
|
95
80
|
reader,
|
|
96
81
|
metadata,
|
|
@@ -100,271 +85,236 @@ def _generate_l0a(
|
|
|
100
85
|
verbose,
|
|
101
86
|
parallel,
|
|
102
87
|
):
|
|
103
|
-
"""Generate L0A file from raw file."""
|
|
88
|
+
"""Generate L0A file from raw txt file."""
|
|
104
89
|
# Define product
|
|
105
90
|
product = "L0A"
|
|
106
|
-
|
|
107
91
|
# Define folder partitioning
|
|
108
92
|
folder_partitioning = get_folder_partitioning()
|
|
109
93
|
|
|
110
|
-
#
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
94
|
+
# Define product processing function
|
|
95
|
+
def core(
|
|
96
|
+
filepath,
|
|
97
|
+
reader,
|
|
98
|
+
metadata,
|
|
99
|
+
issue_dict,
|
|
100
|
+
# Archiving options
|
|
101
|
+
data_dir,
|
|
102
|
+
folder_partitioning,
|
|
103
|
+
# Processing options
|
|
104
|
+
verbose,
|
|
105
|
+
force,
|
|
106
|
+
logger,
|
|
107
|
+
):
|
|
108
|
+
"""Define L0A product processing."""
|
|
109
|
+
# Retrieve information from metadata
|
|
110
|
+
sensor_name = metadata["sensor_name"]
|
|
111
|
+
campaign_name = metadata["campaign_name"]
|
|
112
|
+
station_name = metadata["station_name"]
|
|
113
|
+
# Read raw data into L0A format
|
|
114
|
+
df = reader(filepath, logger=logger)
|
|
115
|
+
df = sanitize_df(df, sensor_name=sensor_name, verbose=verbose, issue_dict=issue_dict, logger=logger)
|
|
116
|
+
|
|
117
|
+
# Write L0A dataframe
|
|
118
|
+
filename = define_l0a_filename(df, campaign_name=campaign_name, station_name=station_name)
|
|
119
|
+
folder_path = define_file_folder_path(df, dir_path=data_dir, folder_partitioning=folder_partitioning)
|
|
120
|
+
out_path = os.path.join(folder_path, filename)
|
|
121
|
+
write_l0a(df, filepath=out_path, force=force, logger=logger, verbose=verbose)
|
|
122
|
+
# Return L0A dataframe
|
|
123
|
+
return df
|
|
124
|
+
|
|
125
|
+
# Define product processing function kwargs
|
|
126
|
+
core_func_kwargs = dict( # noqa: C408
|
|
127
|
+
filepath=filepath,
|
|
128
|
+
reader=reader,
|
|
129
|
+
metadata=metadata,
|
|
130
|
+
issue_dict=issue_dict,
|
|
131
|
+
# Archiving options
|
|
132
|
+
data_dir=data_dir,
|
|
133
|
+
folder_partitioning=folder_partitioning,
|
|
134
|
+
# Processing options
|
|
135
|
+
verbose=verbose,
|
|
136
|
+
force=force,
|
|
137
|
+
)
|
|
138
|
+
# Run product generation
|
|
139
|
+
logger_filepath = run_product_generation(
|
|
140
|
+
product=product,
|
|
117
141
|
logs_dir=logs_dir,
|
|
118
|
-
|
|
142
|
+
logs_filename=logs_filename,
|
|
119
143
|
parallel=parallel,
|
|
144
|
+
verbose=verbose,
|
|
145
|
+
folder_partitioning=folder_partitioning,
|
|
146
|
+
core_func=core,
|
|
147
|
+
core_func_kwargs=core_func_kwargs,
|
|
148
|
+
pass_logger=True,
|
|
120
149
|
)
|
|
121
|
-
|
|
122
|
-
##------------------------------------------------------------------------.
|
|
123
|
-
# Log start processing
|
|
124
|
-
msg = f"{product} processing of {filename} has started."
|
|
125
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
126
|
-
success_flag = False
|
|
127
|
-
##------------------------------------------------------------------------.
|
|
128
|
-
### - Read raw file into a dataframe and sanitize for L0A format
|
|
129
|
-
try:
|
|
130
|
-
df = reader(filepath, logger=logger)
|
|
131
|
-
df = sanitize_df(
|
|
132
|
-
df=df,
|
|
133
|
-
sensor_name=sensor_name,
|
|
134
|
-
verbose=verbose,
|
|
135
|
-
issue_dict=issue_dict,
|
|
136
|
-
logger=logger,
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
##--------------------------------------------------------------------.
|
|
140
|
-
#### - Write to Parquet
|
|
141
|
-
filename = define_l0a_filename(df=df, campaign_name=campaign_name, station_name=station_name)
|
|
142
|
-
folder_path = define_file_folder_path(df, data_dir=data_dir, folder_partitioning=folder_partitioning)
|
|
143
|
-
filepath = os.path.join(folder_path, filename)
|
|
144
|
-
write_l0a(df=df, filepath=filepath, force=force, logger=logger, verbose=verbose)
|
|
145
|
-
|
|
146
|
-
##--------------------------------------------------------------------.
|
|
147
|
-
#### - Define logger file final directory
|
|
148
|
-
if folder_partitioning != "":
|
|
149
|
-
log_dst_dir = define_file_folder_path(df, data_dir=logs_dir, folder_partitioning=folder_partitioning)
|
|
150
|
-
os.makedirs(log_dst_dir, exist_ok=True)
|
|
151
|
-
|
|
152
|
-
##--------------------------------------------------------------------.
|
|
153
|
-
# Clean environment
|
|
154
|
-
del df
|
|
155
|
-
|
|
156
|
-
# Log end processing
|
|
157
|
-
msg = f"{product} processing of {filename} has ended."
|
|
158
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
159
|
-
success_flag = True
|
|
160
|
-
|
|
161
|
-
# Otherwise log the error
|
|
162
|
-
except Exception as e:
|
|
163
|
-
error_type = str(type(e).__name__)
|
|
164
|
-
msg = f"{error_type}: {e}"
|
|
165
|
-
log_error(logger=logger, msg=msg, verbose=verbose)
|
|
166
|
-
|
|
167
|
-
# Close the file logger
|
|
168
|
-
close_logger(logger)
|
|
169
|
-
|
|
170
|
-
# Move logger file to correct partitioning directory
|
|
171
|
-
if success_flag and folder_partitioning != "" and logger_filepath is not None:
|
|
172
|
-
# Move logger file to correct partitioning directory
|
|
173
|
-
dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
|
|
174
|
-
shutil.move(logger_filepath, dst_filepath)
|
|
175
|
-
logger_filepath = dst_filepath
|
|
176
|
-
|
|
177
|
-
# Return the logger file path
|
|
178
150
|
return logger_filepath
|
|
179
151
|
|
|
180
152
|
|
|
181
153
|
@delayed_if_parallel
|
|
182
154
|
@single_threaded_if_parallel
|
|
183
|
-
def
|
|
155
|
+
def _generate_l0b_from_nc(
|
|
184
156
|
filepath,
|
|
185
157
|
data_dir,
|
|
186
158
|
logs_dir,
|
|
187
|
-
|
|
188
|
-
station_name,
|
|
159
|
+
logs_filename,
|
|
189
160
|
# Processing info
|
|
161
|
+
reader,
|
|
190
162
|
metadata,
|
|
163
|
+
issue_dict,
|
|
191
164
|
# Processing options
|
|
192
165
|
force,
|
|
193
166
|
verbose,
|
|
194
167
|
parallel,
|
|
195
|
-
debugging_mode,
|
|
196
168
|
):
|
|
169
|
+
"""Generate L0B file from raw netCDF file."""
|
|
197
170
|
# Define product
|
|
198
171
|
product = "L0B"
|
|
199
|
-
|
|
200
172
|
# Define folder partitioning
|
|
201
173
|
folder_partitioning = get_folder_partitioning()
|
|
202
174
|
|
|
203
|
-
#
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
175
|
+
# Define product processing function
|
|
176
|
+
def core(
|
|
177
|
+
filepath,
|
|
178
|
+
reader,
|
|
179
|
+
metadata,
|
|
180
|
+
issue_dict,
|
|
181
|
+
# Dara archiving options
|
|
182
|
+
data_dir,
|
|
183
|
+
folder_partitioning,
|
|
184
|
+
# Processing options
|
|
185
|
+
verbose,
|
|
186
|
+
force,
|
|
187
|
+
logger,
|
|
188
|
+
):
|
|
189
|
+
"""Define L0B product processing."""
|
|
190
|
+
# Retrieve information from metadata
|
|
191
|
+
sensor_name = metadata["sensor_name"]
|
|
192
|
+
campaign_name = metadata["campaign_name"]
|
|
193
|
+
station_name = metadata["station_name"]
|
|
194
|
+
|
|
195
|
+
# Read raw netCDF and sanitize to L0B format
|
|
196
|
+
ds = reader(filepath, logger=logger)
|
|
197
|
+
ds = sanitize_ds(
|
|
198
|
+
ds=ds,
|
|
199
|
+
sensor_name=sensor_name,
|
|
200
|
+
metadata=metadata,
|
|
201
|
+
issue_dict=issue_dict,
|
|
202
|
+
verbose=verbose,
|
|
203
|
+
logger=logger,
|
|
204
|
+
)
|
|
231
205
|
|
|
232
|
-
# -----------------------------------------------------------------.
|
|
233
206
|
# Write L0B netCDF4 dataset
|
|
234
207
|
filename = define_l0b_filename(ds=ds, campaign_name=campaign_name, station_name=station_name)
|
|
235
|
-
folder_path = define_file_folder_path(ds,
|
|
208
|
+
folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
|
|
236
209
|
filepath = os.path.join(folder_path, filename)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
shutil.move(logger_filepath, dst_filepath)
|
|
268
|
-
logger_filepath = dst_filepath
|
|
269
|
-
|
|
270
|
-
# Return the logger file path
|
|
210
|
+
write_product(ds, filepath=filepath, force=force)
|
|
211
|
+
|
|
212
|
+
# Return L0B dataset
|
|
213
|
+
return ds
|
|
214
|
+
|
|
215
|
+
# Define product processing function kwargs
|
|
216
|
+
core_func_kwargs = dict( # noqa: C408
|
|
217
|
+
filepath=filepath,
|
|
218
|
+
reader=reader,
|
|
219
|
+
metadata=metadata,
|
|
220
|
+
issue_dict=issue_dict,
|
|
221
|
+
# Archiving options
|
|
222
|
+
data_dir=data_dir,
|
|
223
|
+
folder_partitioning=folder_partitioning,
|
|
224
|
+
# Processing options
|
|
225
|
+
verbose=verbose,
|
|
226
|
+
force=force,
|
|
227
|
+
)
|
|
228
|
+
# Run product generation
|
|
229
|
+
logger_filepath = run_product_generation(
|
|
230
|
+
product=product,
|
|
231
|
+
logs_dir=logs_dir,
|
|
232
|
+
logs_filename=logs_filename,
|
|
233
|
+
parallel=parallel,
|
|
234
|
+
verbose=verbose,
|
|
235
|
+
folder_partitioning=folder_partitioning,
|
|
236
|
+
core_func=core,
|
|
237
|
+
core_func_kwargs=core_func_kwargs,
|
|
238
|
+
pass_logger=True,
|
|
239
|
+
)
|
|
271
240
|
return logger_filepath
|
|
272
241
|
|
|
273
242
|
|
|
274
243
|
@delayed_if_parallel
|
|
275
244
|
@single_threaded_if_parallel
|
|
276
|
-
def
|
|
245
|
+
def _generate_l0b(
|
|
277
246
|
filepath,
|
|
278
247
|
data_dir,
|
|
279
248
|
logs_dir,
|
|
280
|
-
|
|
281
|
-
station_name,
|
|
249
|
+
logs_filename,
|
|
282
250
|
# Processing info
|
|
283
|
-
reader,
|
|
284
251
|
metadata,
|
|
285
|
-
issue_dict,
|
|
286
252
|
# Processing options
|
|
287
253
|
force,
|
|
288
254
|
verbose,
|
|
289
255
|
parallel,
|
|
256
|
+
debugging_mode,
|
|
290
257
|
):
|
|
291
|
-
|
|
292
|
-
# -----------------------------------------------------------------.
|
|
293
|
-
# Define product name
|
|
258
|
+
# Define product
|
|
294
259
|
product = "L0B"
|
|
295
|
-
|
|
296
260
|
# Define folder partitioning
|
|
297
261
|
folder_partitioning = get_folder_partitioning()
|
|
298
262
|
|
|
299
|
-
#
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
263
|
+
# Define product processing function
|
|
264
|
+
def core(
|
|
265
|
+
filepath,
|
|
266
|
+
metadata,
|
|
267
|
+
# Archiving options
|
|
268
|
+
data_dir,
|
|
269
|
+
folder_partitioning,
|
|
270
|
+
# Processing options
|
|
271
|
+
debugging_mode,
|
|
272
|
+
verbose,
|
|
273
|
+
force,
|
|
274
|
+
logger,
|
|
275
|
+
):
|
|
276
|
+
"""Define L0B product processing."""
|
|
277
|
+
# Retrieve information from metadata
|
|
278
|
+
campaign_name = metadata["campaign_name"]
|
|
279
|
+
station_name = metadata["station_name"]
|
|
316
280
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
ds =
|
|
321
|
-
ds = sanitize_ds(
|
|
322
|
-
ds=ds,
|
|
323
|
-
sensor_name=sensor_name,
|
|
324
|
-
metadata=metadata,
|
|
325
|
-
issue_dict=issue_dict,
|
|
326
|
-
verbose=verbose,
|
|
327
|
-
logger=logger,
|
|
328
|
-
)
|
|
281
|
+
# Read L0A Apache Parquet file
|
|
282
|
+
df = read_l0a_dataframe(filepath, debugging_mode=debugging_mode)
|
|
283
|
+
# Create L0B xarray Dataset
|
|
284
|
+
ds = generate_l0b(df=df, metadata=metadata, logger=logger, verbose=verbose)
|
|
329
285
|
|
|
330
|
-
# -----------------------------------------------------------------.
|
|
331
286
|
# Write L0B netCDF4 dataset
|
|
332
287
|
filename = define_l0b_filename(ds=ds, campaign_name=campaign_name, station_name=station_name)
|
|
333
|
-
folder_path = define_file_folder_path(ds,
|
|
288
|
+
folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
|
|
334
289
|
filepath = os.path.join(folder_path, filename)
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
# Move logger file to correct partitioning directory
|
|
364
|
-
dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
|
|
365
|
-
shutil.move(logger_filepath, dst_filepath)
|
|
366
|
-
logger_filepath = dst_filepath
|
|
367
|
-
|
|
290
|
+
write_product(ds, filepath=filepath, force=force)
|
|
291
|
+
# Return L0B dataset
|
|
292
|
+
return ds
|
|
293
|
+
|
|
294
|
+
# Define product processing function kwargs
|
|
295
|
+
core_func_kwargs = dict( # noqa: C408
|
|
296
|
+
filepath=filepath,
|
|
297
|
+
metadata=metadata,
|
|
298
|
+
# Archiving options
|
|
299
|
+
data_dir=data_dir,
|
|
300
|
+
folder_partitioning=folder_partitioning,
|
|
301
|
+
# Processing options
|
|
302
|
+
debugging_mode=debugging_mode,
|
|
303
|
+
verbose=verbose,
|
|
304
|
+
force=force,
|
|
305
|
+
)
|
|
306
|
+
# Run product generation
|
|
307
|
+
logger_filepath = run_product_generation(
|
|
308
|
+
product=product,
|
|
309
|
+
logs_dir=logs_dir,
|
|
310
|
+
logs_filename=logs_filename,
|
|
311
|
+
parallel=parallel,
|
|
312
|
+
verbose=verbose,
|
|
313
|
+
folder_partitioning=folder_partitioning,
|
|
314
|
+
core_func=core,
|
|
315
|
+
core_func_kwargs=core_func_kwargs,
|
|
316
|
+
pass_logger=True,
|
|
317
|
+
)
|
|
368
318
|
# Return the logger file path
|
|
369
319
|
return logger_filepath
|
|
370
320
|
|
|
@@ -372,115 +322,93 @@ def _generate_l0b_from_nc(
|
|
|
372
322
|
@delayed_if_parallel
|
|
373
323
|
@single_threaded_if_parallel
|
|
374
324
|
def _generate_l0c(
|
|
375
|
-
|
|
376
|
-
filepaths,
|
|
325
|
+
event_info,
|
|
377
326
|
data_dir,
|
|
378
327
|
logs_dir,
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
328
|
+
logs_filename,
|
|
329
|
+
# Processing info
|
|
330
|
+
metadata,
|
|
382
331
|
# Processing options
|
|
383
332
|
force,
|
|
384
333
|
verbose,
|
|
385
334
|
parallel, # this is used only to initialize the correct logger !
|
|
386
335
|
):
|
|
387
|
-
|
|
388
|
-
# Define product
|
|
336
|
+
"""Define L0C product processing."""
|
|
337
|
+
# Define product
|
|
389
338
|
product = "L0C"
|
|
390
|
-
|
|
391
339
|
# Define folder partitioning
|
|
392
340
|
folder_partitioning = get_folder_partitioning()
|
|
393
341
|
|
|
394
|
-
#
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
# - TODO: in future available from dataset
|
|
413
|
-
metadata = read_yaml(metadata_filepath)
|
|
414
|
-
measurement_intervals = retrieve_possible_measurement_intervals(metadata)
|
|
342
|
+
# Define product processing function
|
|
343
|
+
def core(
|
|
344
|
+
event_info,
|
|
345
|
+
metadata,
|
|
346
|
+
# Archiving options
|
|
347
|
+
data_dir,
|
|
348
|
+
folder_partitioning,
|
|
349
|
+
# Processing options
|
|
350
|
+
verbose,
|
|
351
|
+
force,
|
|
352
|
+
logger,
|
|
353
|
+
):
|
|
354
|
+
"""Define L0C product processing."""
|
|
355
|
+
# Retrieve information from metadata
|
|
356
|
+
sensor_name = metadata["sensor_name"]
|
|
357
|
+
campaign_name = metadata["campaign_name"]
|
|
358
|
+
station_name = metadata["station_name"]
|
|
359
|
+
measurement_intervals = check_measurement_intervals(metadata["measurement_interval"])
|
|
415
360
|
|
|
416
361
|
# Produce L0C datasets
|
|
417
|
-
dict_ds =
|
|
418
|
-
|
|
419
|
-
filepaths=filepaths,
|
|
362
|
+
dict_ds = create_l0c_datasets(
|
|
363
|
+
event_info=event_info,
|
|
420
364
|
measurement_intervals=measurement_intervals,
|
|
365
|
+
sensor_name=sensor_name,
|
|
421
366
|
ensure_variables_equality=True,
|
|
422
367
|
logger=logger,
|
|
423
368
|
verbose=verbose,
|
|
424
369
|
)
|
|
425
370
|
|
|
426
371
|
# Write a dataset for each sample interval
|
|
372
|
+
valid_datasets = []
|
|
427
373
|
for ds in dict_ds.values(): # (sample_interval, ds)
|
|
428
374
|
# Write L0C netCDF4 dataset
|
|
429
375
|
if ds["time"].size > 1:
|
|
430
|
-
#
|
|
431
|
-
sensor_name = ds.attrs.get("sensor_name")
|
|
432
|
-
campaign_name = ds.attrs.get("campaign_name")
|
|
433
|
-
station_name = ds.attrs.get("station_name")
|
|
434
|
-
|
|
435
|
-
# Set encodings
|
|
436
|
-
ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
|
|
437
|
-
# Update global attributes
|
|
438
|
-
ds = set_disdrodb_attrs(ds, product=product)
|
|
439
|
-
|
|
440
|
-
# Define product filepath
|
|
376
|
+
# Write L0C netCDF4 dataset
|
|
441
377
|
filename = define_l0c_filename(ds, campaign_name=campaign_name, station_name=station_name)
|
|
442
|
-
folder_path = define_file_folder_path(ds,
|
|
378
|
+
folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
|
|
443
379
|
filepath = os.path.join(folder_path, filename)
|
|
444
|
-
|
|
445
|
-
# Write to disk
|
|
446
380
|
write_product(ds, filepath=filepath, force=force)
|
|
381
|
+
valid_datasets.append(ds)
|
|
382
|
+
|
|
383
|
+
# Return a valid L0C dataset (just for logging)
|
|
384
|
+
if len(valid_datasets) == 0:
|
|
385
|
+
return None # can happen when e.g. for a day there is not data (but input filepaths of previous/next day)
|
|
386
|
+
return valid_datasets[0]
|
|
387
|
+
|
|
388
|
+
# Define product processing function kwargs
|
|
389
|
+
core_func_kwargs = dict( # noqa: C408
|
|
390
|
+
event_info=event_info,
|
|
391
|
+
metadata=metadata,
|
|
392
|
+
# Archiving options
|
|
393
|
+
data_dir=data_dir,
|
|
394
|
+
folder_partitioning=folder_partitioning,
|
|
395
|
+
# Processing options
|
|
396
|
+
verbose=verbose,
|
|
397
|
+
force=force,
|
|
398
|
+
)
|
|
447
399
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
os.makedirs(log_dst_dir, exist_ok=True)
|
|
461
|
-
|
|
462
|
-
# Log end processing
|
|
463
|
-
msg = f"{product} processing for {day} has ended."
|
|
464
|
-
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
465
|
-
success_flag = True
|
|
466
|
-
|
|
467
|
-
##--------------------------------------------------------------------.
|
|
468
|
-
# Otherwise log the error
|
|
469
|
-
except Exception as e:
|
|
470
|
-
error_type = str(type(e).__name__)
|
|
471
|
-
msg = f"{error_type}: {e}"
|
|
472
|
-
log_error(logger, msg, verbose=verbose)
|
|
473
|
-
|
|
474
|
-
# Close the file logger
|
|
475
|
-
close_logger(logger)
|
|
476
|
-
|
|
477
|
-
# Move logger file to correct partitioning directory
|
|
478
|
-
if success_flag and folder_partitioning != "" and logger_filepath is not None:
|
|
479
|
-
# Move logger file to correct partitioning directory
|
|
480
|
-
dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
|
|
481
|
-
shutil.move(logger_filepath, dst_filepath)
|
|
482
|
-
logger_filepath = dst_filepath
|
|
483
|
-
|
|
400
|
+
# Run product generation
|
|
401
|
+
logger_filepath = run_product_generation(
|
|
402
|
+
product=product,
|
|
403
|
+
logs_dir=logs_dir,
|
|
404
|
+
logs_filename=logs_filename,
|
|
405
|
+
parallel=parallel,
|
|
406
|
+
verbose=verbose,
|
|
407
|
+
folder_partitioning=folder_partitioning,
|
|
408
|
+
core_func=core,
|
|
409
|
+
core_func_kwargs=core_func_kwargs,
|
|
410
|
+
pass_logger=True,
|
|
411
|
+
)
|
|
484
412
|
# Return the logger file path
|
|
485
413
|
return logger_filepath
|
|
486
414
|
|
|
@@ -579,11 +507,11 @@ def run_l0a_station(
|
|
|
579
507
|
# Create directory structure
|
|
580
508
|
data_dir = create_l0_directory_structure(
|
|
581
509
|
data_archive_dir=data_archive_dir,
|
|
510
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
582
511
|
data_source=data_source,
|
|
583
512
|
campaign_name=campaign_name,
|
|
584
|
-
metadata_archive_dir=metadata_archive_dir,
|
|
585
|
-
product=product, # L0A or L0B
|
|
586
513
|
station_name=station_name,
|
|
514
|
+
product=product, # L0A or L0B
|
|
587
515
|
force=force,
|
|
588
516
|
)
|
|
589
517
|
|
|
@@ -647,8 +575,7 @@ def run_l0a_station(
|
|
|
647
575
|
filepath=filepath,
|
|
648
576
|
data_dir=data_dir,
|
|
649
577
|
logs_dir=logs_dir,
|
|
650
|
-
|
|
651
|
-
station_name=station_name,
|
|
578
|
+
logs_filename=os.path.basename(filepath),
|
|
652
579
|
# Reader argument
|
|
653
580
|
reader=reader,
|
|
654
581
|
# Processing info
|
|
@@ -661,7 +588,7 @@ def run_l0a_station(
|
|
|
661
588
|
)
|
|
662
589
|
for filepath in filepaths
|
|
663
590
|
]
|
|
664
|
-
list_logs =
|
|
591
|
+
list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
|
|
665
592
|
|
|
666
593
|
# -----------------------------------------------------------------.
|
|
667
594
|
# Define product summary logs
|
|
@@ -794,30 +721,19 @@ def run_l0b_station(
|
|
|
794
721
|
)
|
|
795
722
|
|
|
796
723
|
##----------------------------------------------------------------.
|
|
797
|
-
#
|
|
724
|
+
# List files to process
|
|
725
|
+
# - If no data available, print error message and return None
|
|
798
726
|
required_product = get_required_product(product)
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
except Exception as e:
|
|
810
|
-
print(str(e)) # Case where no file paths available
|
|
811
|
-
flag_not_available_data = True
|
|
812
|
-
|
|
813
|
-
# -------------------------------------------------------------------------.
|
|
814
|
-
# If no data available, print error message and return None
|
|
815
|
-
if flag_not_available_data:
|
|
816
|
-
msg = (
|
|
817
|
-
f"{product} processing of {data_source} {campaign_name} {station_name} "
|
|
818
|
-
+ f"has not been launched because of missing {required_product} data."
|
|
819
|
-
)
|
|
820
|
-
print(msg)
|
|
727
|
+
filepaths = try_get_required_filepaths(
|
|
728
|
+
data_archive_dir=data_archive_dir,
|
|
729
|
+
data_source=data_source,
|
|
730
|
+
campaign_name=campaign_name,
|
|
731
|
+
station_name=station_name,
|
|
732
|
+
product=required_product,
|
|
733
|
+
# Processing options
|
|
734
|
+
debugging_mode=debugging_mode,
|
|
735
|
+
)
|
|
736
|
+
if filepaths is None:
|
|
821
737
|
return
|
|
822
738
|
|
|
823
739
|
##----------------------------------------------------------------.
|
|
@@ -826,16 +742,13 @@ def run_l0b_station(
|
|
|
826
742
|
# - If parallel=True, it does that in parallel using dask.bag
|
|
827
743
|
# Settings npartitions=len(filepaths) enable to wait prior task on a core
|
|
828
744
|
# finish before starting a new one.
|
|
829
|
-
# BUG: If debugging_mode=True and parallel=True a subtle bug can currently occur when
|
|
830
|
-
# two processes with a subsetted L0A files want to create the same L0B files !
|
|
831
745
|
list_tasks = [
|
|
832
746
|
_generate_l0b(
|
|
833
747
|
filepath=filepath,
|
|
834
748
|
data_dir=data_dir,
|
|
835
749
|
logs_dir=logs_dir,
|
|
750
|
+
logs_filename=os.path.basename(filepath),
|
|
836
751
|
metadata=metadata,
|
|
837
|
-
campaign_name=campaign_name,
|
|
838
|
-
station_name=station_name,
|
|
839
752
|
force=force,
|
|
840
753
|
verbose=verbose,
|
|
841
754
|
debugging_mode=debugging_mode,
|
|
@@ -843,38 +756,8 @@ def run_l0b_station(
|
|
|
843
756
|
)
|
|
844
757
|
for filepath in filepaths
|
|
845
758
|
]
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
# list_logs = [
|
|
849
|
-
# _generate_l0b(
|
|
850
|
-
# filepath=filepath,
|
|
851
|
-
# data_dir=data_dir,
|
|
852
|
-
# logs_dir=logs_dir,
|
|
853
|
-
# metadata=metadata,
|
|
854
|
-
# campaign_name=campaign_name,
|
|
855
|
-
# station_name=station_name,
|
|
856
|
-
# force=force,
|
|
857
|
-
# verbose=verbose,
|
|
858
|
-
# debugging_mode=debugging_mode,
|
|
859
|
-
# parallel=parallel,
|
|
860
|
-
# )
|
|
861
|
-
# for filepath in filepaths
|
|
862
|
-
# ]
|
|
863
|
-
|
|
864
|
-
# else:
|
|
865
|
-
# bag = db.from_sequence(filepaths, npartitions=len(filepaths))
|
|
866
|
-
# list_logs = bag.map(
|
|
867
|
-
# _generate_l0b,
|
|
868
|
-
# data_dir=data_dir,
|
|
869
|
-
# logs_dir=logs_dir,
|
|
870
|
-
# metadata=metadata,
|
|
871
|
-
# campaign_name=campaign_name,
|
|
872
|
-
# station_name=station_name,
|
|
873
|
-
# force=force,
|
|
874
|
-
# verbose=verbose,
|
|
875
|
-
# debugging_mode=debugging_mode,
|
|
876
|
-
# parallel=parallel,
|
|
877
|
-
# ).compute()
|
|
759
|
+
|
|
760
|
+
list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
|
|
878
761
|
|
|
879
762
|
# -----------------------------------------------------------------.
|
|
880
763
|
# Define L0B summary logs
|
|
@@ -990,6 +873,15 @@ def run_l0c_station(
|
|
|
990
873
|
station_name=station_name,
|
|
991
874
|
)
|
|
992
875
|
|
|
876
|
+
# -----------------------------------------------------------------.
|
|
877
|
+
# Retrieve metadata
|
|
878
|
+
metadata = read_station_metadata(
|
|
879
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
880
|
+
data_source=data_source,
|
|
881
|
+
campaign_name=campaign_name,
|
|
882
|
+
station_name=station_name,
|
|
883
|
+
)
|
|
884
|
+
|
|
993
885
|
# ------------------------------------------------------------------------.
|
|
994
886
|
# Start processing
|
|
995
887
|
t_i = time.time()
|
|
@@ -1017,46 +909,26 @@ def run_l0c_station(
|
|
|
1017
909
|
force=force,
|
|
1018
910
|
)
|
|
1019
911
|
|
|
1020
|
-
#
|
|
1021
|
-
#
|
|
1022
|
-
|
|
1023
|
-
|
|
912
|
+
# -------------------------------------------------------------------------.
|
|
913
|
+
# List files to process
|
|
914
|
+
# - If no data available, print error message and return None
|
|
915
|
+
required_product = get_required_product(product)
|
|
916
|
+
filepaths = try_get_required_filepaths(
|
|
917
|
+
data_archive_dir=data_archive_dir,
|
|
1024
918
|
data_source=data_source,
|
|
1025
919
|
campaign_name=campaign_name,
|
|
1026
920
|
station_name=station_name,
|
|
921
|
+
product=required_product,
|
|
922
|
+
# Processing options
|
|
923
|
+
debugging_mode=debugging_mode,
|
|
1027
924
|
)
|
|
1028
|
-
|
|
1029
|
-
# -------------------------------------------------------------------------.
|
|
1030
|
-
# List files to process
|
|
1031
|
-
required_product = get_required_product(product)
|
|
1032
|
-
flag_not_available_data = False
|
|
1033
|
-
try:
|
|
1034
|
-
filepaths = find_files(
|
|
1035
|
-
data_archive_dir=data_archive_dir,
|
|
1036
|
-
data_source=data_source,
|
|
1037
|
-
campaign_name=campaign_name,
|
|
1038
|
-
station_name=station_name,
|
|
1039
|
-
product=required_product,
|
|
1040
|
-
# Processing options
|
|
1041
|
-
debugging_mode=debugging_mode,
|
|
1042
|
-
)
|
|
1043
|
-
except Exception as e:
|
|
1044
|
-
print(str(e)) # Case where no file paths available
|
|
1045
|
-
flag_not_available_data = True
|
|
1046
|
-
|
|
1047
|
-
# -------------------------------------------------------------------------.
|
|
1048
|
-
# If no data available, print error message and return None
|
|
1049
|
-
if flag_not_available_data:
|
|
1050
|
-
msg = (
|
|
1051
|
-
f"{product} processing of {data_source} {campaign_name} {station_name} "
|
|
1052
|
-
+ f"has not been launched because of missing {required_product} data."
|
|
1053
|
-
)
|
|
1054
|
-
print(msg)
|
|
925
|
+
if filepaths is None:
|
|
1055
926
|
return
|
|
1056
927
|
|
|
1057
928
|
# -------------------------------------------------------------------------.
|
|
1058
|
-
# Retrieve dictionary with the required files
|
|
1059
|
-
|
|
929
|
+
# Retrieve dictionary with the required files per time block
|
|
930
|
+
# TODO: allow customizing this in config file, but risk of out of memory !
|
|
931
|
+
list_event_info = get_files_per_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
|
|
1060
932
|
|
|
1061
933
|
# -----------------------------------------------------------------.
|
|
1062
934
|
# Generate L0C files
|
|
@@ -1064,21 +936,19 @@ def run_l0c_station(
|
|
|
1064
936
|
# - If parallel=True, it does that in parallel using dask.delayed
|
|
1065
937
|
list_tasks = [
|
|
1066
938
|
_generate_l0c(
|
|
1067
|
-
|
|
1068
|
-
|
|
939
|
+
event_info=event_info,
|
|
940
|
+
metadata=metadata,
|
|
1069
941
|
data_dir=data_dir,
|
|
1070
942
|
logs_dir=logs_dir,
|
|
1071
|
-
|
|
1072
|
-
campaign_name=campaign_name,
|
|
1073
|
-
station_name=station_name,
|
|
943
|
+
logs_filename=event_info["start_time"].strftime("%Y%m%dT%H%M%S"),
|
|
1074
944
|
# Processing options
|
|
1075
945
|
force=force,
|
|
1076
946
|
verbose=verbose,
|
|
1077
947
|
parallel=parallel,
|
|
1078
948
|
)
|
|
1079
|
-
for
|
|
949
|
+
for event_info in list_event_info
|
|
1080
950
|
]
|
|
1081
|
-
list_logs =
|
|
951
|
+
list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
|
|
1082
952
|
|
|
1083
953
|
# -----------------------------------------------------------------.
|
|
1084
954
|
# Define summary logs
|