disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. disdrodb/__init__.py +68 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +177 -24
  7. disdrodb/api/configs.py +3 -3
  8. disdrodb/api/info.py +13 -13
  9. disdrodb/api/io.py +281 -22
  10. disdrodb/api/path.py +184 -195
  11. disdrodb/api/search.py +18 -9
  12. disdrodb/cli/disdrodb_create_summary.py +103 -0
  13. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  14. disdrodb/cli/disdrodb_run_l0.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
  19. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  20. disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
  21. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  22. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  23. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  24. disdrodb/configs.py +149 -4
  25. disdrodb/constants.py +61 -0
  26. disdrodb/data_transfer/download_data.py +127 -11
  27. disdrodb/etc/configs/attributes.yaml +339 -0
  28. disdrodb/etc/configs/encodings.yaml +473 -0
  29. disdrodb/etc/products/L1/global.yaml +13 -0
  30. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  31. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +22 -0
  33. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  34. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  35. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  38. disdrodb/etc/products/L2M/global.yaml +26 -0
  39. disdrodb/issue/writer.py +2 -0
  40. disdrodb/l0/__init__.py +13 -0
  41. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  42. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  43. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  44. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  45. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  46. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  48. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  49. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  50. disdrodb/l0/l0a_processing.py +37 -32
  51. disdrodb/l0/l0b_nc_processing.py +118 -8
  52. disdrodb/l0/l0b_processing.py +30 -65
  53. disdrodb/l0/l0c_processing.py +369 -259
  54. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  55. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  56. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  58. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  59. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  63. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  66. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  67. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  69. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  71. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  72. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  73. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
  74. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  75. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  76. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  79. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  80. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  81. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  83. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
  84. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  85. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  86. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  87. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  88. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  89. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  90. disdrodb/l1/__init__.py +5 -0
  91. disdrodb/l1/fall_velocity.py +46 -0
  92. disdrodb/l1/filters.py +34 -20
  93. disdrodb/l1/processing.py +46 -45
  94. disdrodb/l1/resampling.py +77 -66
  95. disdrodb/l1_env/routines.py +18 -3
  96. disdrodb/l2/__init__.py +7 -0
  97. disdrodb/l2/empirical_dsd.py +58 -10
  98. disdrodb/l2/processing.py +268 -117
  99. disdrodb/metadata/checks.py +132 -125
  100. disdrodb/metadata/standards.py +3 -1
  101. disdrodb/psd/fitting.py +631 -345
  102. disdrodb/psd/models.py +9 -6
  103. disdrodb/routines/__init__.py +54 -0
  104. disdrodb/{l0/routines.py → routines/l0.py} +316 -355
  105. disdrodb/{l1/routines.py → routines/l1.py} +76 -116
  106. disdrodb/routines/l2.py +1019 -0
  107. disdrodb/{routines.py → routines/wrappers.py} +98 -10
  108. disdrodb/scattering/__init__.py +16 -4
  109. disdrodb/scattering/axis_ratio.py +61 -37
  110. disdrodb/scattering/permittivity.py +504 -0
  111. disdrodb/scattering/routines.py +746 -184
  112. disdrodb/summary/__init__.py +17 -0
  113. disdrodb/summary/routines.py +4196 -0
  114. disdrodb/utils/archiving.py +434 -0
  115. disdrodb/utils/attrs.py +68 -125
  116. disdrodb/utils/cli.py +5 -5
  117. disdrodb/utils/compression.py +30 -1
  118. disdrodb/utils/dask.py +121 -9
  119. disdrodb/utils/dataframe.py +61 -7
  120. disdrodb/utils/decorators.py +31 -0
  121. disdrodb/utils/directories.py +35 -15
  122. disdrodb/utils/encoding.py +37 -19
  123. disdrodb/{l2 → utils}/event.py +15 -173
  124. disdrodb/utils/logger.py +14 -7
  125. disdrodb/utils/manipulations.py +81 -0
  126. disdrodb/utils/routines.py +166 -0
  127. disdrodb/utils/subsetting.py +214 -0
  128. disdrodb/utils/time.py +35 -177
  129. disdrodb/utils/writer.py +20 -7
  130. disdrodb/utils/xarray.py +5 -4
  131. disdrodb/viz/__init__.py +13 -0
  132. disdrodb/viz/plots.py +398 -0
  133. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
  134. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
  135. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
  136. disdrodb/l1/encoding_attrs.py +0 -642
  137. disdrodb/l2/processing_options.py +0 -213
  138. disdrodb/l2/routines.py +0 -868
  139. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  140. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  141. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  142. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -24,11 +24,7 @@ import os
24
24
  import time
25
25
  from typing import Optional
26
26
 
27
- import dask
28
-
29
- from disdrodb.api.checks import check_sensor_name
30
-
31
- # Directory
27
+ from disdrodb.api.checks import check_measurement_intervals, check_sensor_name, check_station_inputs
32
28
  from disdrodb.api.create_directories import (
33
29
  create_l0_directory_structure,
34
30
  create_logs_directory,
@@ -40,7 +36,6 @@ from disdrodb.api.path import (
40
36
  define_l0a_filename,
41
37
  define_l0b_filename,
42
38
  define_l0c_filename,
43
- define_metadata_filepath,
44
39
  )
45
40
  from disdrodb.api.search import get_required_product
46
41
  from disdrodb.configs import get_data_archive_dir, get_folder_partitioning, get_metadata_archive_dir
@@ -52,31 +47,21 @@ from disdrodb.l0.l0a_processing import (
52
47
  write_l0a,
53
48
  )
54
49
  from disdrodb.l0.l0b_nc_processing import sanitize_ds
55
- from disdrodb.l0.l0b_processing import (
56
- create_l0b_from_l0a,
57
- set_l0b_encodings,
58
- write_l0b,
59
- )
60
- from disdrodb.l0.l0c_processing import (
61
- create_daily_file,
62
- get_files_per_days,
63
- retrieve_possible_measurement_intervals,
64
- )
50
+ from disdrodb.l0.l0b_processing import generate_l0b
51
+ from disdrodb.l0.l0c_processing import TOLERANCE_SECONDS, create_l0c_datasets
65
52
  from disdrodb.metadata import read_station_metadata
53
+ from disdrodb.utils.archiving import get_files_per_time_block
54
+ from disdrodb.utils.dask import execute_tasks_safely
66
55
  from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
67
56
 
68
57
  # Logger
69
58
  from disdrodb.utils.logger import (
70
- close_logger,
71
- create_logger_file,
72
59
  create_product_logs,
73
- log_error,
74
60
  log_info,
61
+ # log_warning,
75
62
  )
76
-
77
- # log_warning,
63
+ from disdrodb.utils.routines import run_product_generation, try_get_required_filepaths
78
64
  from disdrodb.utils.writer import write_product
79
- from disdrodb.utils.yaml import read_yaml
80
65
 
81
66
  logger = logging.getLogger(__name__)
82
67
 
@@ -90,8 +75,7 @@ def _generate_l0a(
90
75
  filepath,
91
76
  data_dir,
92
77
  logs_dir,
93
- campaign_name,
94
- station_name,
78
+ logs_filename,
95
79
  # Processing info
96
80
  reader,
97
81
  metadata,
@@ -101,225 +85,236 @@ def _generate_l0a(
101
85
  verbose,
102
86
  parallel,
103
87
  ):
104
- """Generate L0A file from raw file."""
88
+ """Generate L0A file from raw txt file."""
105
89
  # Define product
106
90
  product = "L0A"
107
-
108
91
  # Define folder partitioning
109
92
  folder_partitioning = get_folder_partitioning()
110
93
 
111
- # Retrieve sensor name
112
- sensor_name = metadata["sensor_name"]
113
-
114
- ##------------------------------------------------------------------------.
115
- # Create file logger
116
- filename = os.path.basename(filepath)
117
- logger, logger_filepath = create_logger_file(
94
+ # Define product processing function
95
+ def core(
96
+ filepath,
97
+ reader,
98
+ metadata,
99
+ issue_dict,
100
+ # Archiving options
101
+ data_dir,
102
+ folder_partitioning,
103
+ # Processing options
104
+ verbose,
105
+ force,
106
+ logger,
107
+ ):
108
+ """Define L0A product processing."""
109
+ # Retrieve information from metadata
110
+ sensor_name = metadata["sensor_name"]
111
+ campaign_name = metadata["campaign_name"]
112
+ station_name = metadata["station_name"]
113
+ # Read raw data into L0A format
114
+ df = reader(filepath, logger=logger)
115
+ df = sanitize_df(df, sensor_name=sensor_name, verbose=verbose, issue_dict=issue_dict, logger=logger)
116
+
117
+ # Write L0A dataframe
118
+ filename = define_l0a_filename(df, campaign_name=campaign_name, station_name=station_name)
119
+ folder_path = define_file_folder_path(df, dir_path=data_dir, folder_partitioning=folder_partitioning)
120
+ out_path = os.path.join(folder_path, filename)
121
+ write_l0a(df, filepath=out_path, force=force, logger=logger, verbose=verbose)
122
+ # Return L0A dataframe
123
+ return df
124
+
125
+ # Define product processing function kwargs
126
+ core_func_kwargs = dict( # noqa: C408
127
+ filepath=filepath,
128
+ reader=reader,
129
+ metadata=metadata,
130
+ issue_dict=issue_dict,
131
+ # Archiving options
132
+ data_dir=data_dir,
133
+ folder_partitioning=folder_partitioning,
134
+ # Processing options
135
+ verbose=verbose,
136
+ force=force,
137
+ )
138
+ # Run product generation
139
+ logger_filepath = run_product_generation(
140
+ product=product,
118
141
  logs_dir=logs_dir,
119
- filename=filename,
142
+ logs_filename=logs_filename,
120
143
  parallel=parallel,
144
+ verbose=verbose,
145
+ folder_partitioning=folder_partitioning,
146
+ core_func=core,
147
+ core_func_kwargs=core_func_kwargs,
148
+ pass_logger=True,
121
149
  )
122
-
123
- ##------------------------------------------------------------------------.
124
- # Log start processing
125
- msg = f"{product} processing of {filename} has started."
126
- log_info(logger=logger, msg=msg, verbose=verbose)
127
-
128
- ##------------------------------------------------------------------------.
129
- ### - Read raw file into a dataframe and sanitize for L0A format
130
- try:
131
- df = reader(filepath, logger=logger)
132
- df = sanitize_df(
133
- df=df,
134
- sensor_name=sensor_name,
135
- verbose=verbose,
136
- issue_dict=issue_dict,
137
- logger=logger,
138
- )
139
-
140
- ##--------------------------------------------------------------------.
141
- #### - Write to Parquet
142
- filename = define_l0a_filename(df=df, campaign_name=campaign_name, station_name=station_name)
143
- folder_path = define_file_folder_path(df, data_dir=data_dir, folder_partitioning=folder_partitioning)
144
- filepath = os.path.join(folder_path, filename)
145
- write_l0a(df=df, filepath=filepath, force=force, logger=logger, verbose=verbose)
146
-
147
- ##--------------------------------------------------------------------.
148
- # Clean environment
149
- del df
150
-
151
- # Log end processing
152
- msg = f"{product} processing of {filename} has ended."
153
- log_info(logger=logger, msg=msg, verbose=verbose)
154
-
155
- # Otherwise log the error
156
- except Exception as e:
157
- error_type = str(type(e).__name__)
158
- msg = f"{error_type}: {e}"
159
- log_error(logger=logger, msg=msg, verbose=verbose)
160
-
161
- # Close the file logger
162
- close_logger(logger)
163
-
164
- # Return the logger file path
165
150
  return logger_filepath
166
151
 
167
152
 
168
153
  @delayed_if_parallel
169
154
  @single_threaded_if_parallel
170
- def _generate_l0b(
155
+ def _generate_l0b_from_nc(
171
156
  filepath,
172
157
  data_dir,
173
158
  logs_dir,
174
- campaign_name,
175
- station_name,
159
+ logs_filename,
176
160
  # Processing info
161
+ reader,
177
162
  metadata,
163
+ issue_dict,
178
164
  # Processing options
179
165
  force,
180
166
  verbose,
181
167
  parallel,
182
- debugging_mode,
183
168
  ):
169
+ """Generate L0B file from raw netCDF file."""
184
170
  # Define product
185
171
  product = "L0B"
186
-
187
172
  # Define folder partitioning
188
173
  folder_partitioning = get_folder_partitioning()
189
174
 
190
- # -----------------------------------------------------------------.
191
- # Create file logger
192
- filename = os.path.basename(filepath)
193
- logger, logger_filepath = create_logger_file(
194
- logs_dir=logs_dir,
195
- filename=filename,
196
- parallel=parallel,
197
- )
198
-
199
- ##------------------------------------------------------------------------.
200
- # Log start processing
201
- msg = f"{product} processing of {filename} has started."
202
- log_info(logger=logger, msg=msg, verbose=verbose)
203
-
204
- ##------------------------------------------------------------------------.
205
- # Retrieve sensor name
206
- sensor_name = metadata["sensor_name"]
207
- check_sensor_name(sensor_name)
208
-
209
- ##------------------------------------------------------------------------.
210
- try:
211
- # Read L0A Apache Parquet file
212
- df = read_l0a_dataframe(filepath, logger=logger, verbose=verbose, debugging_mode=debugging_mode)
213
-
214
- # -----------------------------------------------------------------.
215
- # Create xarray Dataset
216
- ds = create_l0b_from_l0a(df=df, metadata=metadata, logger=logger, verbose=verbose)
175
+ # Define product processing function
176
+ def core(
177
+ filepath,
178
+ reader,
179
+ metadata,
180
+ issue_dict,
181
+ # Dara archiving options
182
+ data_dir,
183
+ folder_partitioning,
184
+ # Processing options
185
+ verbose,
186
+ force,
187
+ logger,
188
+ ):
189
+ """Define L0B product processing."""
190
+ # Retrieve information from metadata
191
+ sensor_name = metadata["sensor_name"]
192
+ campaign_name = metadata["campaign_name"]
193
+ station_name = metadata["station_name"]
194
+
195
+ # Read raw netCDF and sanitize to L0B format
196
+ ds = reader(filepath, logger=logger)
197
+ ds = sanitize_ds(
198
+ ds=ds,
199
+ sensor_name=sensor_name,
200
+ metadata=metadata,
201
+ issue_dict=issue_dict,
202
+ verbose=verbose,
203
+ logger=logger,
204
+ )
217
205
 
218
- # -----------------------------------------------------------------.
219
206
  # Write L0B netCDF4 dataset
220
207
  filename = define_l0b_filename(ds=ds, campaign_name=campaign_name, station_name=station_name)
221
- folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
208
+ folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
222
209
  filepath = os.path.join(folder_path, filename)
223
- write_l0b(ds, filepath=filepath, force=force)
224
-
225
- ##--------------------------------------------------------------------.
226
- # Clean environment
227
- del ds, df
228
-
229
- # Log end processing
230
- msg = f"{product} processing of {filename} has ended."
231
- log_info(logger=logger, msg=msg, verbose=verbose)
232
-
233
- # Otherwise log the error
234
- except Exception as e:
235
- error_type = str(type(e).__name__)
236
- msg = f"{error_type}: {e}"
237
- log_error(logger, msg, verbose=verbose)
238
-
239
- # Close the file logger
240
- close_logger(logger)
241
-
242
- # Return the logger file path
210
+ write_product(ds, filepath=filepath, force=force)
211
+
212
+ # Return L0B dataset
213
+ return ds
214
+
215
+ # Define product processing function kwargs
216
+ core_func_kwargs = dict( # noqa: C408
217
+ filepath=filepath,
218
+ reader=reader,
219
+ metadata=metadata,
220
+ issue_dict=issue_dict,
221
+ # Archiving options
222
+ data_dir=data_dir,
223
+ folder_partitioning=folder_partitioning,
224
+ # Processing options
225
+ verbose=verbose,
226
+ force=force,
227
+ )
228
+ # Run product generation
229
+ logger_filepath = run_product_generation(
230
+ product=product,
231
+ logs_dir=logs_dir,
232
+ logs_filename=logs_filename,
233
+ parallel=parallel,
234
+ verbose=verbose,
235
+ folder_partitioning=folder_partitioning,
236
+ core_func=core,
237
+ core_func_kwargs=core_func_kwargs,
238
+ pass_logger=True,
239
+ )
243
240
  return logger_filepath
244
241
 
245
242
 
246
- def _generate_l0b_from_nc(
243
+ @delayed_if_parallel
244
+ @single_threaded_if_parallel
245
+ def _generate_l0b(
247
246
  filepath,
248
247
  data_dir,
249
248
  logs_dir,
250
- campaign_name,
251
- station_name,
249
+ logs_filename,
252
250
  # Processing info
253
- reader,
254
251
  metadata,
255
- issue_dict,
256
252
  # Processing options
257
253
  force,
258
254
  verbose,
259
255
  parallel,
256
+ debugging_mode,
260
257
  ):
261
-
262
- # -----------------------------------------------------------------.
263
- # Define product name
258
+ # Define product
264
259
  product = "L0B"
265
-
266
260
  # Define folder partitioning
267
261
  folder_partitioning = get_folder_partitioning()
268
262
 
269
- # Retrieve sensor name
270
- sensor_name = metadata["sensor_name"]
263
+ # Define product processing function
264
+ def core(
265
+ filepath,
266
+ metadata,
267
+ # Archiving options
268
+ data_dir,
269
+ folder_partitioning,
270
+ # Processing options
271
+ debugging_mode,
272
+ verbose,
273
+ force,
274
+ logger,
275
+ ):
276
+ """Define L0B product processing."""
277
+ # Retrieve information from metadata
278
+ campaign_name = metadata["campaign_name"]
279
+ station_name = metadata["station_name"]
271
280
 
272
- # -----------------------------------------------------------------.
273
- # Create file logger
274
- filename = os.path.basename(filepath)
275
- logger, logger_filepath = create_logger_file(
276
- logs_dir=logs_dir,
277
- filename=filename,
278
- parallel=parallel,
279
- )
280
-
281
- ##------------------------------------------------------------------------.
282
- # Log start processing
283
- msg = f"{product} processing of {filename} has started."
284
- log_info(logger=logger, msg=msg, verbose=verbose)
285
-
286
- ##------------------------------------------------------------------------.
287
- ### - Read raw netCDF and sanitize for L0B format
288
- try:
289
- ds = reader(filepath, logger=logger)
290
- ds = sanitize_ds(
291
- ds=ds,
292
- sensor_name=sensor_name,
293
- metadata=metadata,
294
- issue_dict=issue_dict,
295
- verbose=verbose,
296
- logger=logger,
297
- )
281
+ # Read L0A Apache Parquet file
282
+ df = read_l0a_dataframe(filepath, debugging_mode=debugging_mode)
283
+ # Create L0B xarray Dataset
284
+ ds = generate_l0b(df=df, metadata=metadata, logger=logger, verbose=verbose)
298
285
 
299
- # -----------------------------------------------------------------.
300
286
  # Write L0B netCDF4 dataset
301
287
  filename = define_l0b_filename(ds=ds, campaign_name=campaign_name, station_name=station_name)
302
- folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
288
+ folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
303
289
  filepath = os.path.join(folder_path, filename)
304
- write_l0b(ds, filepath=filepath, force=force)
305
-
306
- ##--------------------------------------------------------------------.
307
- # Clean environment
308
- del ds
309
-
310
- # Log end processing
311
- msg = f"L0B processing of {filename} has ended."
312
- log_info(logger=logger, msg=msg, verbose=verbose)
313
-
314
- # Otherwise log the error
315
- except Exception as e:
316
- error_type = str(type(e).__name__)
317
- msg = f"{error_type}: {e}"
318
- log_error(logger, msg, verbose=verbose)
319
-
320
- # Close the file logger
321
- close_logger(logger)
322
-
290
+ write_product(ds, filepath=filepath, force=force)
291
+ # Return L0B dataset
292
+ return ds
293
+
294
+ # Define product processing function kwargs
295
+ core_func_kwargs = dict( # noqa: C408
296
+ filepath=filepath,
297
+ metadata=metadata,
298
+ # Archiving options
299
+ data_dir=data_dir,
300
+ folder_partitioning=folder_partitioning,
301
+ # Processing options
302
+ debugging_mode=debugging_mode,
303
+ verbose=verbose,
304
+ force=force,
305
+ )
306
+ # Run product generation
307
+ logger_filepath = run_product_generation(
308
+ product=product,
309
+ logs_dir=logs_dir,
310
+ logs_filename=logs_filename,
311
+ parallel=parallel,
312
+ verbose=verbose,
313
+ folder_partitioning=folder_partitioning,
314
+ core_func=core,
315
+ core_func_kwargs=core_func_kwargs,
316
+ pass_logger=True,
317
+ )
323
318
  # Return the logger file path
324
319
  return logger_filepath
325
320
 
@@ -327,93 +322,93 @@ def _generate_l0b_from_nc(
327
322
  @delayed_if_parallel
328
323
  @single_threaded_if_parallel
329
324
  def _generate_l0c(
330
- day,
331
- filepaths,
325
+ event_info,
332
326
  data_dir,
333
327
  logs_dir,
334
- metadata_filepath,
335
- campaign_name,
336
- station_name,
328
+ logs_filename,
329
+ # Processing info
330
+ metadata,
337
331
  # Processing options
338
332
  force,
339
333
  verbose,
340
334
  parallel, # this is used only to initialize the correct logger !
341
335
  ):
342
- # -----------------------------------------------------------------.
343
- # Define product name
336
+ """Define L0C product processing."""
337
+ # Define product
344
338
  product = "L0C"
345
-
346
339
  # Define folder partitioning
347
340
  folder_partitioning = get_folder_partitioning()
348
341
 
349
- # -----------------------------------------------------------------.
350
- # Create file logger
351
- logger, logger_filepath = create_logger_file(
352
- logs_dir=logs_dir,
353
- filename=day,
354
- parallel=parallel,
355
- )
356
-
357
- ##------------------------------------------------------------------------.
358
- # Log start processing
359
- msg = f"{product} processing for {day} has started."
360
- log_info(logger=logger, msg=msg, verbose=verbose)
361
-
362
- ##------------------------------------------------------------------------.
363
- ### Core computation
364
- try:
365
- # Retrieve measurement_intervals
366
- # - TODO: in future available from dataset
367
- metadata = read_yaml(metadata_filepath)
368
- measurement_intervals = retrieve_possible_measurement_intervals(metadata)
342
+ # Define product processing function
343
+ def core(
344
+ event_info,
345
+ metadata,
346
+ # Archiving options
347
+ data_dir,
348
+ folder_partitioning,
349
+ # Processing options
350
+ verbose,
351
+ force,
352
+ logger,
353
+ ):
354
+ """Define L0C product processing."""
355
+ # Retrieve information from metadata
356
+ sensor_name = metadata["sensor_name"]
357
+ campaign_name = metadata["campaign_name"]
358
+ station_name = metadata["station_name"]
359
+ measurement_intervals = check_measurement_intervals(metadata["measurement_interval"])
369
360
 
370
361
  # Produce L0C datasets
371
- dict_ds = create_daily_file(
372
- day=day,
373
- filepaths=filepaths,
362
+ dict_ds = create_l0c_datasets(
363
+ event_info=event_info,
374
364
  measurement_intervals=measurement_intervals,
365
+ sensor_name=sensor_name,
375
366
  ensure_variables_equality=True,
376
367
  logger=logger,
377
368
  verbose=verbose,
378
369
  )
379
370
 
380
371
  # Write a dataset for each sample interval
372
+ valid_datasets = []
381
373
  for ds in dict_ds.values(): # (sample_interval, ds)
382
374
  # Write L0C netCDF4 dataset
383
375
  if ds["time"].size > 1:
384
- # Get sensor name from dataset
385
- sensor_name = ds.attrs.get("sensor_name")
386
- campaign_name = ds.attrs.get("campaign_name")
387
- station_name = ds.attrs.get("station_name")
388
-
389
- # Set encodings
390
- ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
391
-
392
- # Define filepath
376
+ # Write L0C netCDF4 dataset
393
377
  filename = define_l0c_filename(ds, campaign_name=campaign_name, station_name=station_name)
394
- folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
378
+ folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
395
379
  filepath = os.path.join(folder_path, filename)
380
+ write_product(ds, filepath=filepath, force=force)
381
+ valid_datasets.append(ds)
382
+
383
+ # Return a valid L0C dataset (just for logging)
384
+ if len(valid_datasets) == 0:
385
+ return None # can happen when e.g. for a day there is not data (but input filepaths of previous/next day)
386
+ return valid_datasets[0]
387
+
388
+ # Define product processing function kwargs
389
+ core_func_kwargs = dict( # noqa: C408
390
+ event_info=event_info,
391
+ metadata=metadata,
392
+ # Archiving options
393
+ data_dir=data_dir,
394
+ folder_partitioning=folder_partitioning,
395
+ # Processing options
396
+ verbose=verbose,
397
+ force=force,
398
+ )
396
399
 
397
- # Write to disk
398
- write_product(ds, product=product, filepath=filepath, force=force)
399
-
400
- # Clean environment
401
- del ds
402
-
403
- # Log end processing
404
- msg = f"{product} processing for {day} has ended."
405
- log_info(logger=logger, msg=msg, verbose=verbose)
406
-
407
- ##--------------------------------------------------------------------.
408
- # Otherwise log the error
409
- except Exception as e:
410
- error_type = str(type(e).__name__)
411
- msg = f"{error_type}: {e}"
412
- log_error(logger, msg, verbose=verbose)
413
-
414
- # Close the file logger
415
- close_logger(logger)
416
-
400
+ # Run product generation
401
+ logger_filepath = run_product_generation(
402
+ product=product,
403
+ logs_dir=logs_dir,
404
+ logs_filename=logs_filename,
405
+ parallel=parallel,
406
+ verbose=verbose,
407
+ folder_partitioning=folder_partitioning,
408
+ core_func=core,
409
+ core_func_kwargs=core_func_kwargs,
410
+ pass_logger=True,
411
+ )
417
412
  # Return the logger file path
418
413
  return logger_filepath
419
414
 
@@ -474,6 +469,15 @@ def run_l0a_station(
474
469
  data_archive_dir = get_data_archive_dir(data_archive_dir)
475
470
  metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
476
471
 
472
+ # Check valid data_source, campaign_name, and station_name
473
+ check_station_inputs(
474
+ metadata_archive_dir=metadata_archive_dir,
475
+ data_source=data_source,
476
+ campaign_name=campaign_name,
477
+ station_name=station_name,
478
+ )
479
+
480
+ # ------------------------------------------------------------------------.
477
481
  # Read metadata
478
482
  metadata = read_station_metadata(
479
483
  metadata_archive_dir=metadata_archive_dir,
@@ -503,11 +507,11 @@ def run_l0a_station(
503
507
  # Create directory structure
504
508
  data_dir = create_l0_directory_structure(
505
509
  data_archive_dir=data_archive_dir,
510
+ metadata_archive_dir=metadata_archive_dir,
506
511
  data_source=data_source,
507
512
  campaign_name=campaign_name,
508
- metadata_archive_dir=metadata_archive_dir,
509
- product=product, # L0A or L0B
510
513
  station_name=station_name,
514
+ product=product, # L0A or L0B
511
515
  force=force,
512
516
  )
513
517
 
@@ -571,8 +575,7 @@ def run_l0a_station(
571
575
  filepath=filepath,
572
576
  data_dir=data_dir,
573
577
  logs_dir=logs_dir,
574
- campaign_name=campaign_name,
575
- station_name=station_name,
578
+ logs_filename=os.path.basename(filepath),
576
579
  # Reader argument
577
580
  reader=reader,
578
581
  # Processing info
@@ -585,7 +588,7 @@ def run_l0a_station(
585
588
  )
586
589
  for filepath in filepaths
587
590
  ]
588
- list_logs = dask.compute(*list_tasks) if parallel else list_tasks
591
+ list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
589
592
 
590
593
  # -----------------------------------------------------------------.
591
594
  # Define product summary logs
@@ -652,7 +655,7 @@ def run_l0b_station(
652
655
  and multi-threading will be automatically exploited to speed up I/O tasks.
653
656
  debugging_mode : bool, optional
654
657
  If ``True``, the amount of data processed will be reduced.
655
- Only the first 100 rows of 3 L0A files will be processed. The default value is ``False``.
658
+ Only 100 rows sampled from 3 L0A files will be processed. The default value is ``False``.
656
659
  remove_l0a: bool, optional
657
660
  Whether to remove the processed L0A files. The default value is ``False``.
658
661
  data_archive_dir : str, optional
@@ -669,6 +672,13 @@ def run_l0b_station(
669
672
  # Retrieve DISDRODB Metadata Archive directory
670
673
  metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
671
674
 
675
+ # Check valid data_source, campaign_name, and station_name
676
+ check_station_inputs(
677
+ metadata_archive_dir=metadata_archive_dir,
678
+ data_source=data_source,
679
+ campaign_name=campaign_name,
680
+ station_name=station_name,
681
+ )
672
682
  # -----------------------------------------------------------------.
673
683
  # Retrieve metadata
674
684
  metadata = read_station_metadata(
@@ -711,30 +721,19 @@ def run_l0b_station(
711
721
  )
712
722
 
713
723
  ##----------------------------------------------------------------.
714
- # Get L0A files for the station
724
+ # List files to process
725
+ # - If no data available, print error message and return None
715
726
  required_product = get_required_product(product)
716
- flag_not_available_data = False
717
- try:
718
- filepaths = find_files(
719
- data_archive_dir=data_archive_dir,
720
- data_source=data_source,
721
- campaign_name=campaign_name,
722
- station_name=station_name,
723
- product=required_product,
724
- debugging_mode=debugging_mode,
725
- )
726
- except Exception as e:
727
- print(str(e)) # Case where no file paths available
728
- flag_not_available_data = True
729
-
730
- # -------------------------------------------------------------------------.
731
- # If no data available, print error message and return None
732
- if flag_not_available_data:
733
- msg = (
734
- f"{product} processing of {data_source} {campaign_name} {station_name}"
735
- + f"has not been launched because of missing {required_product} data."
736
- )
737
- print(msg)
727
+ filepaths = try_get_required_filepaths(
728
+ data_archive_dir=data_archive_dir,
729
+ data_source=data_source,
730
+ campaign_name=campaign_name,
731
+ station_name=station_name,
732
+ product=required_product,
733
+ # Processing options
734
+ debugging_mode=debugging_mode,
735
+ )
736
+ if filepaths is None:
738
737
  return
739
738
 
740
739
  ##----------------------------------------------------------------.
@@ -743,16 +742,13 @@ def run_l0b_station(
743
742
  # - If parallel=True, it does that in parallel using dask.bag
744
743
  # Settings npartitions=len(filepaths) enable to wait prior task on a core
745
744
  # finish before starting a new one.
746
- # BUG: If debugging_mode=True and parallel=True a subtle bug can currently occur when
747
- # two processes with a subsetted L0A files want to create the same L0B files !
748
745
  list_tasks = [
749
746
  _generate_l0b(
750
747
  filepath=filepath,
751
748
  data_dir=data_dir,
752
749
  logs_dir=logs_dir,
750
+ logs_filename=os.path.basename(filepath),
753
751
  metadata=metadata,
754
- campaign_name=campaign_name,
755
- station_name=station_name,
756
752
  force=force,
757
753
  verbose=verbose,
758
754
  debugging_mode=debugging_mode,
@@ -760,38 +756,8 @@ def run_l0b_station(
760
756
  )
761
757
  for filepath in filepaths
762
758
  ]
763
- list_logs = dask.compute(*list_tasks) if parallel else list_tasks
764
- # if not parallel:
765
- # list_logs = [
766
- # _generate_l0b(
767
- # filepath=filepath,
768
- # data_dir=data_dir,
769
- # logs_dir=logs_dir,
770
- # metadata=metadata,
771
- # campaign_name=campaign_name,
772
- # station_name=station_name,
773
- # force=force,
774
- # verbose=verbose,
775
- # debugging_mode=debugging_mode,
776
- # parallel=parallel,
777
- # )
778
- # for filepath in filepaths
779
- # ]
780
-
781
- # else:
782
- # bag = db.from_sequence(filepaths, npartitions=len(filepaths))
783
- # list_logs = bag.map(
784
- # _generate_l0b,
785
- # data_dir=data_dir,
786
- # logs_dir=logs_dir,
787
- # metadata=metadata,
788
- # campaign_name=campaign_name,
789
- # station_name=station_name,
790
- # force=force,
791
- # verbose=verbose,
792
- # debugging_mode=debugging_mode,
793
- # parallel=parallel,
794
- # ).compute()
759
+
760
+ list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
795
761
 
796
762
  # -----------------------------------------------------------------.
797
763
  # Define L0B summary logs
@@ -899,6 +865,23 @@ def run_l0c_station(
899
865
  # Retrieve DISDRODB Metadata Archive directory
900
866
  metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
901
867
 
868
+ # Check valid data_source, campaign_name, and station_name
869
+ check_station_inputs(
870
+ metadata_archive_dir=metadata_archive_dir,
871
+ data_source=data_source,
872
+ campaign_name=campaign_name,
873
+ station_name=station_name,
874
+ )
875
+
876
+ # -----------------------------------------------------------------.
877
+ # Retrieve metadata
878
+ metadata = read_station_metadata(
879
+ metadata_archive_dir=metadata_archive_dir,
880
+ data_source=data_source,
881
+ campaign_name=campaign_name,
882
+ station_name=station_name,
883
+ )
884
+
902
885
  # ------------------------------------------------------------------------.
903
886
  # Start processing
904
887
  t_i = time.time()
@@ -926,46 +909,26 @@ def run_l0c_station(
926
909
  force=force,
927
910
  )
928
911
 
929
- # ------------------------------------------------------------------------.
930
- # Define metadata filepath
931
- metadata_filepath = define_metadata_filepath(
932
- metadata_archive_dir=metadata_archive_dir,
912
+ # -------------------------------------------------------------------------.
913
+ # List files to process
914
+ # - If no data available, print error message and return None
915
+ required_product = get_required_product(product)
916
+ filepaths = try_get_required_filepaths(
917
+ data_archive_dir=data_archive_dir,
933
918
  data_source=data_source,
934
919
  campaign_name=campaign_name,
935
920
  station_name=station_name,
921
+ product=required_product,
922
+ # Processing options
923
+ debugging_mode=debugging_mode,
936
924
  )
937
-
938
- # -------------------------------------------------------------------------.
939
- # List files to process
940
- required_product = get_required_product(product)
941
- flag_not_available_data = False
942
- try:
943
- filepaths = find_files(
944
- data_archive_dir=data_archive_dir,
945
- data_source=data_source,
946
- campaign_name=campaign_name,
947
- station_name=station_name,
948
- product=required_product,
949
- # Processing options
950
- debugging_mode=debugging_mode,
951
- )
952
- except Exception as e:
953
- print(str(e)) # Case where no file paths available
954
- flag_not_available_data = True
955
-
956
- # -------------------------------------------------------------------------.
957
- # If no data available, print error message and return None
958
- if flag_not_available_data:
959
- msg = (
960
- f"{product} processing of {data_source} {campaign_name} {station_name}"
961
- + f"has not been launched because of missing {required_product} data."
962
- )
963
- print(msg)
925
+ if filepaths is None:
964
926
  return
965
927
 
966
928
  # -------------------------------------------------------------------------.
967
- # Retrieve dictionary with the required files for each day.
968
- dict_days_files = get_files_per_days(filepaths)
929
+ # Retrieve dictionary with the required files per time block
930
+ # TODO: allow customizing this in config file, but risk of out of memory !
931
+ list_event_info = get_files_per_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
969
932
 
970
933
  # -----------------------------------------------------------------.
971
934
  # Generate L0C files
@@ -973,21 +936,19 @@ def run_l0c_station(
973
936
  # - If parallel=True, it does that in parallel using dask.delayed
974
937
  list_tasks = [
975
938
  _generate_l0c(
976
- day=day,
977
- filepaths=filepaths,
939
+ event_info=event_info,
940
+ metadata=metadata,
978
941
  data_dir=data_dir,
979
942
  logs_dir=logs_dir,
980
- metadata_filepath=metadata_filepath,
981
- campaign_name=campaign_name,
982
- station_name=station_name,
943
+ logs_filename=event_info["start_time"].strftime("%Y%m%dT%H%M%S"),
983
944
  # Processing options
984
945
  force=force,
985
946
  verbose=verbose,
986
947
  parallel=parallel,
987
948
  )
988
- for day, filepaths in dict_days_files.items()
949
+ for event_info in list_event_info
989
950
  ]
990
- list_logs = dask.compute(*list_tasks) if parallel else list_tasks
951
+ list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
991
952
 
992
953
  # -----------------------------------------------------------------.
993
954
  # Define summary logs