disdrodb 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/info.py +3 -3
  6. disdrodb/api/io.py +48 -8
  7. disdrodb/api/path.py +116 -133
  8. disdrodb/api/search.py +12 -3
  9. disdrodb/cli/disdrodb_create_summary.py +103 -0
  10. disdrodb/cli/disdrodb_create_summary_station.py +1 -1
  11. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  13. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  17. disdrodb/data_transfer/download_data.py +123 -7
  18. disdrodb/issue/writer.py +2 -0
  19. disdrodb/l0/l0a_processing.py +10 -5
  20. disdrodb/l0/l0b_nc_processing.py +10 -6
  21. disdrodb/l0/l0b_processing.py +26 -61
  22. disdrodb/l0/l0c_processing.py +369 -251
  23. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  24. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  25. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  26. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  27. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  28. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  29. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  30. disdrodb/l1/fall_velocity.py +46 -0
  31. disdrodb/l1/processing.py +1 -1
  32. disdrodb/l2/processing.py +1 -1
  33. disdrodb/metadata/checks.py +132 -125
  34. disdrodb/psd/fitting.py +172 -205
  35. disdrodb/psd/models.py +1 -1
  36. disdrodb/routines/__init__.py +54 -0
  37. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  38. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  39. disdrodb/{l2/routines.py → routines/l2.py} +249 -462
  40. disdrodb/{routines.py → routines/wrappers.py} +95 -7
  41. disdrodb/scattering/axis_ratio.py +5 -1
  42. disdrodb/scattering/permittivity.py +18 -0
  43. disdrodb/scattering/routines.py +56 -36
  44. disdrodb/summary/routines.py +110 -34
  45. disdrodb/utils/archiving.py +434 -0
  46. disdrodb/utils/cli.py +5 -5
  47. disdrodb/utils/dask.py +62 -1
  48. disdrodb/utils/decorators.py +31 -0
  49. disdrodb/utils/encoding.py +5 -1
  50. disdrodb/{l2 → utils}/event.py +1 -66
  51. disdrodb/utils/logger.py +1 -1
  52. disdrodb/utils/manipulations.py +22 -12
  53. disdrodb/utils/routines.py +166 -0
  54. disdrodb/utils/time.py +3 -291
  55. disdrodb/utils/xarray.py +3 -0
  56. disdrodb/viz/plots.py +85 -14
  57. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/METADATA +2 -2
  58. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/RECORD +62 -54
  59. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +1 -0
  60. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  61. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  62. {disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -21,13 +21,10 @@
21
21
  import datetime
22
22
  import logging
23
23
  import os
24
- import shutil
25
24
  import time
26
25
  from typing import Optional
27
26
 
28
- import dask
29
-
30
- from disdrodb.api.checks import check_sensor_name, check_station_inputs
27
+ from disdrodb.api.checks import check_measurement_intervals, check_sensor_name, check_station_inputs
31
28
  from disdrodb.api.create_directories import (
32
29
  create_l0_directory_structure,
33
30
  create_logs_directory,
@@ -39,8 +36,6 @@ from disdrodb.api.path import (
39
36
  define_l0a_filename,
40
37
  define_l0b_filename,
41
38
  define_l0c_filename,
42
- define_metadata_filepath,
43
- define_partitioning_tree,
44
39
  )
45
40
  from disdrodb.api.search import get_required_product
46
41
  from disdrodb.configs import get_data_archive_dir, get_folder_partitioning, get_metadata_archive_dir
@@ -52,30 +47,21 @@ from disdrodb.l0.l0a_processing import (
52
47
  write_l0a,
53
48
  )
54
49
  from disdrodb.l0.l0b_nc_processing import sanitize_ds
55
- from disdrodb.l0.l0b_processing import (
56
- generate_l0b,
57
- set_l0b_encodings,
58
- write_l0b,
59
- )
60
- from disdrodb.l0.l0c_processing import (
61
- create_daily_file,
62
- get_files_per_days,
63
- retrieve_possible_measurement_intervals,
64
- )
50
+ from disdrodb.l0.l0b_processing import generate_l0b
51
+ from disdrodb.l0.l0c_processing import TOLERANCE_SECONDS, create_l0c_datasets
65
52
  from disdrodb.metadata import read_station_metadata
66
- from disdrodb.utils.attrs import set_disdrodb_attrs
53
+ from disdrodb.utils.archiving import get_files_per_time_block
54
+ from disdrodb.utils.dask import execute_tasks_safely
67
55
  from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
68
56
 
69
57
  # Logger
70
58
  from disdrodb.utils.logger import (
71
- close_logger,
72
- create_logger_file,
73
59
  create_product_logs,
74
- log_error,
75
60
  log_info,
61
+ # log_warning,
76
62
  )
63
+ from disdrodb.utils.routines import run_product_generation, try_get_required_filepaths
77
64
  from disdrodb.utils.writer import write_product
78
- from disdrodb.utils.yaml import read_yaml
79
65
 
80
66
  logger = logging.getLogger(__name__)
81
67
 
@@ -89,8 +75,7 @@ def _generate_l0a(
89
75
  filepath,
90
76
  data_dir,
91
77
  logs_dir,
92
- campaign_name,
93
- station_name,
78
+ logs_filename,
94
79
  # Processing info
95
80
  reader,
96
81
  metadata,
@@ -100,271 +85,236 @@ def _generate_l0a(
100
85
  verbose,
101
86
  parallel,
102
87
  ):
103
- """Generate L0A file from raw file."""
88
+ """Generate L0A file from raw txt file."""
104
89
  # Define product
105
90
  product = "L0A"
106
-
107
91
  # Define folder partitioning
108
92
  folder_partitioning = get_folder_partitioning()
109
93
 
110
- # Retrieve sensor name
111
- sensor_name = metadata["sensor_name"]
112
-
113
- ##------------------------------------------------------------------------.
114
- # Create file logger
115
- filename = os.path.basename(filepath)
116
- logger, logger_filepath = create_logger_file(
94
+ # Define product processing function
95
+ def core(
96
+ filepath,
97
+ reader,
98
+ metadata,
99
+ issue_dict,
100
+ # Archiving options
101
+ data_dir,
102
+ folder_partitioning,
103
+ # Processing options
104
+ verbose,
105
+ force,
106
+ logger,
107
+ ):
108
+ """Define L0A product processing."""
109
+ # Retrieve information from metadata
110
+ sensor_name = metadata["sensor_name"]
111
+ campaign_name = metadata["campaign_name"]
112
+ station_name = metadata["station_name"]
113
+ # Read raw data into L0A format
114
+ df = reader(filepath, logger=logger)
115
+ df = sanitize_df(df, sensor_name=sensor_name, verbose=verbose, issue_dict=issue_dict, logger=logger)
116
+
117
+ # Write L0A dataframe
118
+ filename = define_l0a_filename(df, campaign_name=campaign_name, station_name=station_name)
119
+ folder_path = define_file_folder_path(df, dir_path=data_dir, folder_partitioning=folder_partitioning)
120
+ out_path = os.path.join(folder_path, filename)
121
+ write_l0a(df, filepath=out_path, force=force, logger=logger, verbose=verbose)
122
+ # Return L0A dataframe
123
+ return df
124
+
125
+ # Define product processing function kwargs
126
+ core_func_kwargs = dict( # noqa: C408
127
+ filepath=filepath,
128
+ reader=reader,
129
+ metadata=metadata,
130
+ issue_dict=issue_dict,
131
+ # Archiving options
132
+ data_dir=data_dir,
133
+ folder_partitioning=folder_partitioning,
134
+ # Processing options
135
+ verbose=verbose,
136
+ force=force,
137
+ )
138
+ # Run product generation
139
+ logger_filepath = run_product_generation(
140
+ product=product,
117
141
  logs_dir=logs_dir,
118
- filename=filename,
142
+ logs_filename=logs_filename,
119
143
  parallel=parallel,
144
+ verbose=verbose,
145
+ folder_partitioning=folder_partitioning,
146
+ core_func=core,
147
+ core_func_kwargs=core_func_kwargs,
148
+ pass_logger=True,
120
149
  )
121
-
122
- ##------------------------------------------------------------------------.
123
- # Log start processing
124
- msg = f"{product} processing of {filename} has started."
125
- log_info(logger=logger, msg=msg, verbose=verbose)
126
- success_flag = False
127
- ##------------------------------------------------------------------------.
128
- ### - Read raw file into a dataframe and sanitize for L0A format
129
- try:
130
- df = reader(filepath, logger=logger)
131
- df = sanitize_df(
132
- df=df,
133
- sensor_name=sensor_name,
134
- verbose=verbose,
135
- issue_dict=issue_dict,
136
- logger=logger,
137
- )
138
-
139
- ##--------------------------------------------------------------------.
140
- #### - Write to Parquet
141
- filename = define_l0a_filename(df=df, campaign_name=campaign_name, station_name=station_name)
142
- folder_path = define_file_folder_path(df, data_dir=data_dir, folder_partitioning=folder_partitioning)
143
- filepath = os.path.join(folder_path, filename)
144
- write_l0a(df=df, filepath=filepath, force=force, logger=logger, verbose=verbose)
145
-
146
- ##--------------------------------------------------------------------.
147
- #### - Define logger file final directory
148
- if folder_partitioning != "":
149
- log_dst_dir = define_file_folder_path(df, data_dir=logs_dir, folder_partitioning=folder_partitioning)
150
- os.makedirs(log_dst_dir, exist_ok=True)
151
-
152
- ##--------------------------------------------------------------------.
153
- # Clean environment
154
- del df
155
-
156
- # Log end processing
157
- msg = f"{product} processing of {filename} has ended."
158
- log_info(logger=logger, msg=msg, verbose=verbose)
159
- success_flag = True
160
-
161
- # Otherwise log the error
162
- except Exception as e:
163
- error_type = str(type(e).__name__)
164
- msg = f"{error_type}: {e}"
165
- log_error(logger=logger, msg=msg, verbose=verbose)
166
-
167
- # Close the file logger
168
- close_logger(logger)
169
-
170
- # Move logger file to correct partitioning directory
171
- if success_flag and folder_partitioning != "" and logger_filepath is not None:
172
- # Move logger file to correct partitioning directory
173
- dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
174
- shutil.move(logger_filepath, dst_filepath)
175
- logger_filepath = dst_filepath
176
-
177
- # Return the logger file path
178
150
  return logger_filepath
179
151
 
180
152
 
181
153
  @delayed_if_parallel
182
154
  @single_threaded_if_parallel
183
- def _generate_l0b(
155
+ def _generate_l0b_from_nc(
184
156
  filepath,
185
157
  data_dir,
186
158
  logs_dir,
187
- campaign_name,
188
- station_name,
159
+ logs_filename,
189
160
  # Processing info
161
+ reader,
190
162
  metadata,
163
+ issue_dict,
191
164
  # Processing options
192
165
  force,
193
166
  verbose,
194
167
  parallel,
195
- debugging_mode,
196
168
  ):
169
+ """Generate L0B file from raw netCDF file."""
197
170
  # Define product
198
171
  product = "L0B"
199
-
200
172
  # Define folder partitioning
201
173
  folder_partitioning = get_folder_partitioning()
202
174
 
203
- # -----------------------------------------------------------------.
204
- # Create file logger
205
- filename = os.path.basename(filepath)
206
- logger, logger_filepath = create_logger_file(
207
- logs_dir=logs_dir,
208
- filename=filename,
209
- parallel=parallel,
210
- )
211
-
212
- ##------------------------------------------------------------------------.
213
- # Log start processing
214
- msg = f"{product} processing of {filename} has started."
215
- log_info(logger=logger, msg=msg, verbose=verbose)
216
- success_flag = False
217
-
218
- ##------------------------------------------------------------------------.
219
- # Retrieve sensor name
220
- sensor_name = metadata["sensor_name"]
221
- check_sensor_name(sensor_name)
222
-
223
- ##------------------------------------------------------------------------.
224
- try:
225
- # Read L0A Apache Parquet file
226
- df = read_l0a_dataframe(filepath, debugging_mode=debugging_mode)
227
-
228
- # -----------------------------------------------------------------.
229
- # Create xarray Dataset
230
- ds = generate_l0b(df=df, metadata=metadata, logger=logger, verbose=verbose)
175
+ # Define product processing function
176
+ def core(
177
+ filepath,
178
+ reader,
179
+ metadata,
180
+ issue_dict,
181
+ # Dara archiving options
182
+ data_dir,
183
+ folder_partitioning,
184
+ # Processing options
185
+ verbose,
186
+ force,
187
+ logger,
188
+ ):
189
+ """Define L0B product processing."""
190
+ # Retrieve information from metadata
191
+ sensor_name = metadata["sensor_name"]
192
+ campaign_name = metadata["campaign_name"]
193
+ station_name = metadata["station_name"]
194
+
195
+ # Read raw netCDF and sanitize to L0B format
196
+ ds = reader(filepath, logger=logger)
197
+ ds = sanitize_ds(
198
+ ds=ds,
199
+ sensor_name=sensor_name,
200
+ metadata=metadata,
201
+ issue_dict=issue_dict,
202
+ verbose=verbose,
203
+ logger=logger,
204
+ )
231
205
 
232
- # -----------------------------------------------------------------.
233
206
  # Write L0B netCDF4 dataset
234
207
  filename = define_l0b_filename(ds=ds, campaign_name=campaign_name, station_name=station_name)
235
- folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
208
+ folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
236
209
  filepath = os.path.join(folder_path, filename)
237
- write_l0b(ds, filepath=filepath, force=force)
238
-
239
- ##--------------------------------------------------------------------.
240
- #### - Define logger file final directory
241
- if folder_partitioning != "":
242
- log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
243
- os.makedirs(log_dst_dir, exist_ok=True)
244
-
245
- ##--------------------------------------------------------------------.
246
- # Clean environment
247
- del ds, df
248
-
249
- # Log end processing
250
- msg = f"{product} processing of {filename} has ended."
251
- log_info(logger=logger, msg=msg, verbose=verbose)
252
- success_flag = True
253
-
254
- # Otherwise log the error
255
- except Exception as e:
256
- error_type = str(type(e).__name__)
257
- msg = f"{error_type}: {e}"
258
- log_error(logger, msg, verbose=verbose)
259
-
260
- # Close the file logger
261
- close_logger(logger)
262
-
263
- # Move logger file to correct partitioning directory
264
- if success_flag and folder_partitioning != "" and logger_filepath is not None:
265
- # Move logger file to correct partitioning directory
266
- dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
267
- shutil.move(logger_filepath, dst_filepath)
268
- logger_filepath = dst_filepath
269
-
270
- # Return the logger file path
210
+ write_product(ds, filepath=filepath, force=force)
211
+
212
+ # Return L0B dataset
213
+ return ds
214
+
215
+ # Define product processing function kwargs
216
+ core_func_kwargs = dict( # noqa: C408
217
+ filepath=filepath,
218
+ reader=reader,
219
+ metadata=metadata,
220
+ issue_dict=issue_dict,
221
+ # Archiving options
222
+ data_dir=data_dir,
223
+ folder_partitioning=folder_partitioning,
224
+ # Processing options
225
+ verbose=verbose,
226
+ force=force,
227
+ )
228
+ # Run product generation
229
+ logger_filepath = run_product_generation(
230
+ product=product,
231
+ logs_dir=logs_dir,
232
+ logs_filename=logs_filename,
233
+ parallel=parallel,
234
+ verbose=verbose,
235
+ folder_partitioning=folder_partitioning,
236
+ core_func=core,
237
+ core_func_kwargs=core_func_kwargs,
238
+ pass_logger=True,
239
+ )
271
240
  return logger_filepath
272
241
 
273
242
 
274
243
  @delayed_if_parallel
275
244
  @single_threaded_if_parallel
276
- def _generate_l0b_from_nc(
245
+ def _generate_l0b(
277
246
  filepath,
278
247
  data_dir,
279
248
  logs_dir,
280
- campaign_name,
281
- station_name,
249
+ logs_filename,
282
250
  # Processing info
283
- reader,
284
251
  metadata,
285
- issue_dict,
286
252
  # Processing options
287
253
  force,
288
254
  verbose,
289
255
  parallel,
256
+ debugging_mode,
290
257
  ):
291
-
292
- # -----------------------------------------------------------------.
293
- # Define product name
258
+ # Define product
294
259
  product = "L0B"
295
-
296
260
  # Define folder partitioning
297
261
  folder_partitioning = get_folder_partitioning()
298
262
 
299
- # Retrieve sensor name
300
- sensor_name = metadata["sensor_name"]
301
-
302
- # -----------------------------------------------------------------.
303
- # Create file logger
304
- filename = os.path.basename(filepath)
305
- logger, logger_filepath = create_logger_file(
306
- logs_dir=logs_dir,
307
- filename=filename,
308
- parallel=parallel,
309
- )
310
-
311
- ##------------------------------------------------------------------------.
312
- # Log start processing
313
- msg = f"{product} processing of {filename} has started."
314
- log_info(logger=logger, msg=msg, verbose=verbose)
315
- success_flag = False
263
+ # Define product processing function
264
+ def core(
265
+ filepath,
266
+ metadata,
267
+ # Archiving options
268
+ data_dir,
269
+ folder_partitioning,
270
+ # Processing options
271
+ debugging_mode,
272
+ verbose,
273
+ force,
274
+ logger,
275
+ ):
276
+ """Define L0B product processing."""
277
+ # Retrieve information from metadata
278
+ campaign_name = metadata["campaign_name"]
279
+ station_name = metadata["station_name"]
316
280
 
317
- ##------------------------------------------------------------------------.
318
- ### - Read raw netCDF and sanitize for L0B format
319
- try:
320
- ds = reader(filepath, logger=logger)
321
- ds = sanitize_ds(
322
- ds=ds,
323
- sensor_name=sensor_name,
324
- metadata=metadata,
325
- issue_dict=issue_dict,
326
- verbose=verbose,
327
- logger=logger,
328
- )
281
+ # Read L0A Apache Parquet file
282
+ df = read_l0a_dataframe(filepath, debugging_mode=debugging_mode)
283
+ # Create L0B xarray Dataset
284
+ ds = generate_l0b(df=df, metadata=metadata, logger=logger, verbose=verbose)
329
285
 
330
- # -----------------------------------------------------------------.
331
286
  # Write L0B netCDF4 dataset
332
287
  filename = define_l0b_filename(ds=ds, campaign_name=campaign_name, station_name=station_name)
333
- folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
288
+ folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
334
289
  filepath = os.path.join(folder_path, filename)
335
- write_l0b(ds, filepath=filepath, force=force)
336
-
337
- ##--------------------------------------------------------------------.
338
- #### - Define logger file final directory
339
- if folder_partitioning != "":
340
- log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
341
- os.makedirs(log_dst_dir, exist_ok=True)
342
-
343
- ##--------------------------------------------------------------------.
344
- # Clean environment
345
- del ds
346
-
347
- # Log end processing
348
- msg = f"L0B processing of {filename} has ended."
349
- log_info(logger=logger, msg=msg, verbose=verbose)
350
- success_flag = True
351
-
352
- # Otherwise log the error
353
- except Exception as e:
354
- error_type = str(type(e).__name__)
355
- msg = f"{error_type}: {e}"
356
- log_error(logger, msg, verbose=verbose)
357
-
358
- # Close the file logger
359
- close_logger(logger)
360
-
361
- # Move logger file to correct partitioning directory
362
- if success_flag and folder_partitioning != "" and logger_filepath is not None:
363
- # Move logger file to correct partitioning directory
364
- dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
365
- shutil.move(logger_filepath, dst_filepath)
366
- logger_filepath = dst_filepath
367
-
290
+ write_product(ds, filepath=filepath, force=force)
291
+ # Return L0B dataset
292
+ return ds
293
+
294
+ # Define product processing function kwargs
295
+ core_func_kwargs = dict( # noqa: C408
296
+ filepath=filepath,
297
+ metadata=metadata,
298
+ # Archiving options
299
+ data_dir=data_dir,
300
+ folder_partitioning=folder_partitioning,
301
+ # Processing options
302
+ debugging_mode=debugging_mode,
303
+ verbose=verbose,
304
+ force=force,
305
+ )
306
+ # Run product generation
307
+ logger_filepath = run_product_generation(
308
+ product=product,
309
+ logs_dir=logs_dir,
310
+ logs_filename=logs_filename,
311
+ parallel=parallel,
312
+ verbose=verbose,
313
+ folder_partitioning=folder_partitioning,
314
+ core_func=core,
315
+ core_func_kwargs=core_func_kwargs,
316
+ pass_logger=True,
317
+ )
368
318
  # Return the logger file path
369
319
  return logger_filepath
370
320
 
@@ -372,115 +322,93 @@ def _generate_l0b_from_nc(
372
322
  @delayed_if_parallel
373
323
  @single_threaded_if_parallel
374
324
  def _generate_l0c(
375
- day,
376
- filepaths,
325
+ event_info,
377
326
  data_dir,
378
327
  logs_dir,
379
- metadata_filepath,
380
- campaign_name,
381
- station_name,
328
+ logs_filename,
329
+ # Processing info
330
+ metadata,
382
331
  # Processing options
383
332
  force,
384
333
  verbose,
385
334
  parallel, # this is used only to initialize the correct logger !
386
335
  ):
387
- # -----------------------------------------------------------------.
388
- # Define product name
336
+ """Define L0C product processing."""
337
+ # Define product
389
338
  product = "L0C"
390
-
391
339
  # Define folder partitioning
392
340
  folder_partitioning = get_folder_partitioning()
393
341
 
394
- # -----------------------------------------------------------------.
395
- # Create file logger
396
- logger, logger_filepath = create_logger_file(
397
- logs_dir=logs_dir,
398
- filename=day,
399
- parallel=parallel,
400
- )
401
-
402
- ##------------------------------------------------------------------------.
403
- # Log start processing
404
- msg = f"{product} processing for {day} has started."
405
- log_info(logger=logger, msg=msg, verbose=verbose)
406
- success_flag = False
407
-
408
- ##------------------------------------------------------------------------.
409
- ### Core computation
410
- try:
411
- # Retrieve measurement_intervals
412
- # - TODO: in future available from dataset
413
- metadata = read_yaml(metadata_filepath)
414
- measurement_intervals = retrieve_possible_measurement_intervals(metadata)
342
+ # Define product processing function
343
+ def core(
344
+ event_info,
345
+ metadata,
346
+ # Archiving options
347
+ data_dir,
348
+ folder_partitioning,
349
+ # Processing options
350
+ verbose,
351
+ force,
352
+ logger,
353
+ ):
354
+ """Define L0C product processing."""
355
+ # Retrieve information from metadata
356
+ sensor_name = metadata["sensor_name"]
357
+ campaign_name = metadata["campaign_name"]
358
+ station_name = metadata["station_name"]
359
+ measurement_intervals = check_measurement_intervals(metadata["measurement_interval"])
415
360
 
416
361
  # Produce L0C datasets
417
- dict_ds = create_daily_file(
418
- day=day,
419
- filepaths=filepaths,
362
+ dict_ds = create_l0c_datasets(
363
+ event_info=event_info,
420
364
  measurement_intervals=measurement_intervals,
365
+ sensor_name=sensor_name,
421
366
  ensure_variables_equality=True,
422
367
  logger=logger,
423
368
  verbose=verbose,
424
369
  )
425
370
 
426
371
  # Write a dataset for each sample interval
372
+ valid_datasets = []
427
373
  for ds in dict_ds.values(): # (sample_interval, ds)
428
374
  # Write L0C netCDF4 dataset
429
375
  if ds["time"].size > 1:
430
- # Get sensor name from dataset
431
- sensor_name = ds.attrs.get("sensor_name")
432
- campaign_name = ds.attrs.get("campaign_name")
433
- station_name = ds.attrs.get("station_name")
434
-
435
- # Set encodings
436
- ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
437
- # Update global attributes
438
- ds = set_disdrodb_attrs(ds, product=product)
439
-
440
- # Define product filepath
376
+ # Write L0C netCDF4 dataset
441
377
  filename = define_l0c_filename(ds, campaign_name=campaign_name, station_name=station_name)
442
- folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
378
+ folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
443
379
  filepath = os.path.join(folder_path, filename)
444
-
445
- # Write to disk
446
380
  write_product(ds, filepath=filepath, force=force)
381
+ valid_datasets.append(ds)
382
+
383
+ # Return a valid L0C dataset (just for logging)
384
+ if len(valid_datasets) == 0:
385
+ return None # can happen when e.g. for a day there is not data (but input filepaths of previous/next day)
386
+ return valid_datasets[0]
387
+
388
+ # Define product processing function kwargs
389
+ core_func_kwargs = dict( # noqa: C408
390
+ event_info=event_info,
391
+ metadata=metadata,
392
+ # Archiving options
393
+ data_dir=data_dir,
394
+ folder_partitioning=folder_partitioning,
395
+ # Processing options
396
+ verbose=verbose,
397
+ force=force,
398
+ )
447
399
 
448
- # Clean environment
449
- del ds
450
-
451
- ##--------------------------------------------------------------------.
452
- #### - Define logger file final directory
453
- if folder_partitioning != "":
454
- print(day)
455
- dirtree = define_partitioning_tree(
456
- time=datetime.datetime.strptime("2022-03-22", "%Y-%m-%d"),
457
- folder_partitioning=folder_partitioning,
458
- )
459
- log_dst_dir = os.path.join(logs_dir, dirtree)
460
- os.makedirs(log_dst_dir, exist_ok=True)
461
-
462
- # Log end processing
463
- msg = f"{product} processing for {day} has ended."
464
- log_info(logger=logger, msg=msg, verbose=verbose)
465
- success_flag = True
466
-
467
- ##--------------------------------------------------------------------.
468
- # Otherwise log the error
469
- except Exception as e:
470
- error_type = str(type(e).__name__)
471
- msg = f"{error_type}: {e}"
472
- log_error(logger, msg, verbose=verbose)
473
-
474
- # Close the file logger
475
- close_logger(logger)
476
-
477
- # Move logger file to correct partitioning directory
478
- if success_flag and folder_partitioning != "" and logger_filepath is not None:
479
- # Move logger file to correct partitioning directory
480
- dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
481
- shutil.move(logger_filepath, dst_filepath)
482
- logger_filepath = dst_filepath
483
-
400
+ # Run product generation
401
+ logger_filepath = run_product_generation(
402
+ product=product,
403
+ logs_dir=logs_dir,
404
+ logs_filename=logs_filename,
405
+ parallel=parallel,
406
+ verbose=verbose,
407
+ folder_partitioning=folder_partitioning,
408
+ core_func=core,
409
+ core_func_kwargs=core_func_kwargs,
410
+ pass_logger=True,
411
+ )
484
412
  # Return the logger file path
485
413
  return logger_filepath
486
414
 
@@ -579,11 +507,11 @@ def run_l0a_station(
579
507
  # Create directory structure
580
508
  data_dir = create_l0_directory_structure(
581
509
  data_archive_dir=data_archive_dir,
510
+ metadata_archive_dir=metadata_archive_dir,
582
511
  data_source=data_source,
583
512
  campaign_name=campaign_name,
584
- metadata_archive_dir=metadata_archive_dir,
585
- product=product, # L0A or L0B
586
513
  station_name=station_name,
514
+ product=product, # L0A or L0B
587
515
  force=force,
588
516
  )
589
517
 
@@ -647,8 +575,7 @@ def run_l0a_station(
647
575
  filepath=filepath,
648
576
  data_dir=data_dir,
649
577
  logs_dir=logs_dir,
650
- campaign_name=campaign_name,
651
- station_name=station_name,
578
+ logs_filename=os.path.basename(filepath),
652
579
  # Reader argument
653
580
  reader=reader,
654
581
  # Processing info
@@ -661,7 +588,7 @@ def run_l0a_station(
661
588
  )
662
589
  for filepath in filepaths
663
590
  ]
664
- list_logs = dask.compute(*list_tasks) if parallel else list_tasks
591
+ list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
665
592
 
666
593
  # -----------------------------------------------------------------.
667
594
  # Define product summary logs
@@ -794,30 +721,19 @@ def run_l0b_station(
794
721
  )
795
722
 
796
723
  ##----------------------------------------------------------------.
797
- # Get L0A files for the station
724
+ # List files to process
725
+ # - If no data available, print error message and return None
798
726
  required_product = get_required_product(product)
799
- flag_not_available_data = False
800
- try:
801
- filepaths = find_files(
802
- data_archive_dir=data_archive_dir,
803
- data_source=data_source,
804
- campaign_name=campaign_name,
805
- station_name=station_name,
806
- product=required_product,
807
- debugging_mode=debugging_mode,
808
- )
809
- except Exception as e:
810
- print(str(e)) # Case where no file paths available
811
- flag_not_available_data = True
812
-
813
- # -------------------------------------------------------------------------.
814
- # If no data available, print error message and return None
815
- if flag_not_available_data:
816
- msg = (
817
- f"{product} processing of {data_source} {campaign_name} {station_name} "
818
- + f"has not been launched because of missing {required_product} data."
819
- )
820
- print(msg)
727
+ filepaths = try_get_required_filepaths(
728
+ data_archive_dir=data_archive_dir,
729
+ data_source=data_source,
730
+ campaign_name=campaign_name,
731
+ station_name=station_name,
732
+ product=required_product,
733
+ # Processing options
734
+ debugging_mode=debugging_mode,
735
+ )
736
+ if filepaths is None:
821
737
  return
822
738
 
823
739
  ##----------------------------------------------------------------.
@@ -826,16 +742,13 @@ def run_l0b_station(
826
742
  # - If parallel=True, it does that in parallel using dask.bag
827
743
  # Settings npartitions=len(filepaths) enable to wait prior task on a core
828
744
  # finish before starting a new one.
829
- # BUG: If debugging_mode=True and parallel=True a subtle bug can currently occur when
830
- # two processes with a subsetted L0A files want to create the same L0B files !
831
745
  list_tasks = [
832
746
  _generate_l0b(
833
747
  filepath=filepath,
834
748
  data_dir=data_dir,
835
749
  logs_dir=logs_dir,
750
+ logs_filename=os.path.basename(filepath),
836
751
  metadata=metadata,
837
- campaign_name=campaign_name,
838
- station_name=station_name,
839
752
  force=force,
840
753
  verbose=verbose,
841
754
  debugging_mode=debugging_mode,
@@ -843,38 +756,8 @@ def run_l0b_station(
843
756
  )
844
757
  for filepath in filepaths
845
758
  ]
846
- list_logs = dask.compute(*list_tasks) if parallel else list_tasks
847
- # if not parallel:
848
- # list_logs = [
849
- # _generate_l0b(
850
- # filepath=filepath,
851
- # data_dir=data_dir,
852
- # logs_dir=logs_dir,
853
- # metadata=metadata,
854
- # campaign_name=campaign_name,
855
- # station_name=station_name,
856
- # force=force,
857
- # verbose=verbose,
858
- # debugging_mode=debugging_mode,
859
- # parallel=parallel,
860
- # )
861
- # for filepath in filepaths
862
- # ]
863
-
864
- # else:
865
- # bag = db.from_sequence(filepaths, npartitions=len(filepaths))
866
- # list_logs = bag.map(
867
- # _generate_l0b,
868
- # data_dir=data_dir,
869
- # logs_dir=logs_dir,
870
- # metadata=metadata,
871
- # campaign_name=campaign_name,
872
- # station_name=station_name,
873
- # force=force,
874
- # verbose=verbose,
875
- # debugging_mode=debugging_mode,
876
- # parallel=parallel,
877
- # ).compute()
759
+
760
+ list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
878
761
 
879
762
  # -----------------------------------------------------------------.
880
763
  # Define L0B summary logs
@@ -990,6 +873,15 @@ def run_l0c_station(
990
873
  station_name=station_name,
991
874
  )
992
875
 
876
+ # -----------------------------------------------------------------.
877
+ # Retrieve metadata
878
+ metadata = read_station_metadata(
879
+ metadata_archive_dir=metadata_archive_dir,
880
+ data_source=data_source,
881
+ campaign_name=campaign_name,
882
+ station_name=station_name,
883
+ )
884
+
993
885
  # ------------------------------------------------------------------------.
994
886
  # Start processing
995
887
  t_i = time.time()
@@ -1017,46 +909,26 @@ def run_l0c_station(
1017
909
  force=force,
1018
910
  )
1019
911
 
1020
- # ------------------------------------------------------------------------.
1021
- # Define metadata filepath
1022
- metadata_filepath = define_metadata_filepath(
1023
- metadata_archive_dir=metadata_archive_dir,
912
+ # -------------------------------------------------------------------------.
913
+ # List files to process
914
+ # - If no data available, print error message and return None
915
+ required_product = get_required_product(product)
916
+ filepaths = try_get_required_filepaths(
917
+ data_archive_dir=data_archive_dir,
1024
918
  data_source=data_source,
1025
919
  campaign_name=campaign_name,
1026
920
  station_name=station_name,
921
+ product=required_product,
922
+ # Processing options
923
+ debugging_mode=debugging_mode,
1027
924
  )
1028
-
1029
- # -------------------------------------------------------------------------.
1030
- # List files to process
1031
- required_product = get_required_product(product)
1032
- flag_not_available_data = False
1033
- try:
1034
- filepaths = find_files(
1035
- data_archive_dir=data_archive_dir,
1036
- data_source=data_source,
1037
- campaign_name=campaign_name,
1038
- station_name=station_name,
1039
- product=required_product,
1040
- # Processing options
1041
- debugging_mode=debugging_mode,
1042
- )
1043
- except Exception as e:
1044
- print(str(e)) # Case where no file paths available
1045
- flag_not_available_data = True
1046
-
1047
- # -------------------------------------------------------------------------.
1048
- # If no data available, print error message and return None
1049
- if flag_not_available_data:
1050
- msg = (
1051
- f"{product} processing of {data_source} {campaign_name} {station_name} "
1052
- + f"has not been launched because of missing {required_product} data."
1053
- )
1054
- print(msg)
925
+ if filepaths is None:
1055
926
  return
1056
927
 
1057
928
  # -------------------------------------------------------------------------.
1058
- # Retrieve dictionary with the required files for each day.
1059
- dict_days_files = get_files_per_days(filepaths)
929
+ # Retrieve dictionary with the required files per time block
930
+ # TODO: allow customizing this in config file, but risk of out of memory !
931
+ list_event_info = get_files_per_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
1060
932
 
1061
933
  # -----------------------------------------------------------------.
1062
934
  # Generate L0C files
@@ -1064,21 +936,19 @@ def run_l0c_station(
1064
936
  # - If parallel=True, it does that in parallel using dask.delayed
1065
937
  list_tasks = [
1066
938
  _generate_l0c(
1067
- day=day,
1068
- filepaths=filepaths,
939
+ event_info=event_info,
940
+ metadata=metadata,
1069
941
  data_dir=data_dir,
1070
942
  logs_dir=logs_dir,
1071
- metadata_filepath=metadata_filepath,
1072
- campaign_name=campaign_name,
1073
- station_name=station_name,
943
+ logs_filename=event_info["start_time"].strftime("%Y%m%dT%H%M%S"),
1074
944
  # Processing options
1075
945
  force=force,
1076
946
  verbose=verbose,
1077
947
  parallel=parallel,
1078
948
  )
1079
- for day, filepaths in dict_days_files.items()
949
+ for event_info in list_event_info
1080
950
  ]
1081
- list_logs = dask.compute(*list_tasks) if parallel else list_tasks
951
+ list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
1082
952
 
1083
953
  # -----------------------------------------------------------------.
1084
954
  # Define summary logs