disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. disdrodb/__init__.py +68 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +177 -24
  7. disdrodb/api/configs.py +3 -3
  8. disdrodb/api/info.py +13 -13
  9. disdrodb/api/io.py +281 -22
  10. disdrodb/api/path.py +184 -195
  11. disdrodb/api/search.py +18 -9
  12. disdrodb/cli/disdrodb_create_summary.py +103 -0
  13. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  14. disdrodb/cli/disdrodb_run_l0.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
  19. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  20. disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
  21. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  22. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  23. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  24. disdrodb/configs.py +149 -4
  25. disdrodb/constants.py +61 -0
  26. disdrodb/data_transfer/download_data.py +127 -11
  27. disdrodb/etc/configs/attributes.yaml +339 -0
  28. disdrodb/etc/configs/encodings.yaml +473 -0
  29. disdrodb/etc/products/L1/global.yaml +13 -0
  30. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  31. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +22 -0
  33. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  34. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  35. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  38. disdrodb/etc/products/L2M/global.yaml +26 -0
  39. disdrodb/issue/writer.py +2 -0
  40. disdrodb/l0/__init__.py +13 -0
  41. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  42. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  43. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  44. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  45. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  46. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  48. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  49. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  50. disdrodb/l0/l0a_processing.py +37 -32
  51. disdrodb/l0/l0b_nc_processing.py +118 -8
  52. disdrodb/l0/l0b_processing.py +30 -65
  53. disdrodb/l0/l0c_processing.py +369 -259
  54. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  55. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  56. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  58. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  59. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  63. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  66. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  67. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  69. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  71. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  72. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  73. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
  74. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  75. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  76. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  79. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  80. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  81. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  83. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
  84. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  85. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  86. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  87. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  88. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  89. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  90. disdrodb/l1/__init__.py +5 -0
  91. disdrodb/l1/fall_velocity.py +46 -0
  92. disdrodb/l1/filters.py +34 -20
  93. disdrodb/l1/processing.py +46 -45
  94. disdrodb/l1/resampling.py +77 -66
  95. disdrodb/l1_env/routines.py +18 -3
  96. disdrodb/l2/__init__.py +7 -0
  97. disdrodb/l2/empirical_dsd.py +58 -10
  98. disdrodb/l2/processing.py +268 -117
  99. disdrodb/metadata/checks.py +132 -125
  100. disdrodb/metadata/standards.py +3 -1
  101. disdrodb/psd/fitting.py +631 -345
  102. disdrodb/psd/models.py +9 -6
  103. disdrodb/routines/__init__.py +54 -0
  104. disdrodb/{l0/routines.py → routines/l0.py} +316 -355
  105. disdrodb/{l1/routines.py → routines/l1.py} +76 -116
  106. disdrodb/routines/l2.py +1019 -0
  107. disdrodb/{routines.py → routines/wrappers.py} +98 -10
  108. disdrodb/scattering/__init__.py +16 -4
  109. disdrodb/scattering/axis_ratio.py +61 -37
  110. disdrodb/scattering/permittivity.py +504 -0
  111. disdrodb/scattering/routines.py +746 -184
  112. disdrodb/summary/__init__.py +17 -0
  113. disdrodb/summary/routines.py +4196 -0
  114. disdrodb/utils/archiving.py +434 -0
  115. disdrodb/utils/attrs.py +68 -125
  116. disdrodb/utils/cli.py +5 -5
  117. disdrodb/utils/compression.py +30 -1
  118. disdrodb/utils/dask.py +121 -9
  119. disdrodb/utils/dataframe.py +61 -7
  120. disdrodb/utils/decorators.py +31 -0
  121. disdrodb/utils/directories.py +35 -15
  122. disdrodb/utils/encoding.py +37 -19
  123. disdrodb/{l2 → utils}/event.py +15 -173
  124. disdrodb/utils/logger.py +14 -7
  125. disdrodb/utils/manipulations.py +81 -0
  126. disdrodb/utils/routines.py +166 -0
  127. disdrodb/utils/subsetting.py +214 -0
  128. disdrodb/utils/time.py +35 -177
  129. disdrodb/utils/writer.py +20 -7
  130. disdrodb/utils/xarray.py +5 -4
  131. disdrodb/viz/__init__.py +13 -0
  132. disdrodb/viz/plots.py +398 -0
  133. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
  134. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
  135. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
  136. disdrodb/l1/encoding_attrs.py +0 -642
  137. disdrodb/l2/processing_options.py +0 -213
  138. disdrodb/l2/routines.py +0 -868
  139. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  140. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  141. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  142. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
@@ -15,110 +15,16 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Functions for event definition."""
18
- import dask
18
+
19
19
  import numpy as np
20
20
  import pandas as pd
21
- import xarray as xr
22
-
23
- from disdrodb.api.info import get_start_end_time_from_filepaths
24
- from disdrodb.utils.time import acronym_to_seconds, ensure_sorted_by_time
25
-
26
-
27
- @dask.delayed
28
- def _delayed_open_dataset(filepath):
29
- with dask.config.set(scheduler="synchronous"):
30
- ds = xr.open_dataset(filepath, chunks={}, autoclose=True, decode_timedelta=False, cache=False)
31
- return ds
32
-
33
-
34
- def identify_events(
35
- filepaths,
36
- parallel=False,
37
- min_n_drops=5,
38
- neighbor_min_size=2,
39
- neighbor_time_interval="5MIN",
40
- intra_event_max_time_gap="6H",
41
- event_min_duration="5MIN",
42
- event_min_size=3,
43
- ):
44
- """Return a list of rainy events.
45
-
46
- Rainy timesteps are defined when N > min_n_drops.
47
- Any rainy isolated timesteps (based on neighborhood criteria) is removed.
48
- Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
49
- exceed `intra_event_max_time_gap`. Finally, events that do not meet minimum size or duration
50
- requirements are filtered out.
51
-
52
- Parameters
53
- ----------
54
- filepaths: list
55
- List of L1C file paths.
56
- parallel: bool
57
- Whether to load the files in parallel.
58
- Set parallel=True only in a multiprocessing environment.
59
- The default is False.
60
- neighbor_time_interval : str
61
- The time interval around a given a timestep defining the neighborhood.
62
- Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
63
- neighbor_min_size : int, optional
64
- The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
65
- timestep to be considered non-isolated. Isolated timesteps are removed !
66
- - If `neighbor_min_size=0, then no timestep is considered isolated and no filtering occurs.
67
- - If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
68
- - If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
69
- Defaults to 1.
70
- intra_event_max_time_gap: str
71
- The maximum time interval between two timesteps to be considered part of the same event.
72
- This parameters is used to group timesteps into events !
73
- event_min_duration : str
74
- The minimum duration an event must span. Events shorter than this duration are discarded.
75
- event_min_size : int, optional
76
- The minimum number of valid timesteps required for an event. Defaults to 1.
77
21
 
78
- Returns
79
- -------
80
- list of dict
81
- A list of events, where each event is represented as a dictionary with keys:
82
- - "start_time": np.datetime64, start time of the event
83
- - "end_time": np.datetime64, end time of the event
84
- - "duration": np.timedelta64, duration of the event
85
- - "n_timesteps": int, number of valid timesteps in the event
86
- """
87
- # Open datasets in parallel
88
- if parallel:
89
- list_ds = dask.compute([_delayed_open_dataset(filepath) for filepath in filepaths])[0]
90
- else:
91
- list_ds = [xr.open_dataset(filepath, chunks={}, cache=False, decode_timedelta=False) for filepath in filepaths]
92
- # Filter dataset for requested variables
93
- variables = ["time", "N"]
94
- list_ds = [ds[variables] for ds in list_ds]
95
- # Concat datasets
96
- ds = xr.concat(list_ds, dim="time", compat="no_conflicts", combine_attrs="override")
97
- # Read in memory the variable needed
98
- ds = ds.compute()
99
- # Close file on disk
100
- _ = [ds.close() for ds in list_ds]
101
- del list_ds
102
- # Sort dataset by time
103
- ds = ensure_sorted_by_time(ds)
104
- # Define candidate timesteps to group into events
105
- idx_valid = ds["N"].data > min_n_drops
106
- timesteps = ds["time"].data[idx_valid]
107
- # Define event list
108
- event_list = group_timesteps_into_event(
109
- timesteps=timesteps,
110
- neighbor_min_size=neighbor_min_size,
111
- neighbor_time_interval=neighbor_time_interval,
112
- intra_event_max_time_gap=intra_event_max_time_gap,
113
- event_min_duration=event_min_duration,
114
- event_min_size=event_min_size,
115
- )
116
- return event_list
22
+ from disdrodb.utils.time import temporal_resolution_to_seconds
117
23
 
118
24
 
119
25
  def group_timesteps_into_event(
120
26
  timesteps,
121
- intra_event_max_time_gap,
27
+ event_max_time_gap,
122
28
  event_min_size=0,
123
29
  event_min_duration="0S",
124
30
  neighbor_min_size=0,
@@ -130,7 +36,7 @@ def group_timesteps_into_event(
130
36
  This function groups valid candidate timesteps into events by considering how they cluster
131
37
  in time. Any isolated timesteps (based on neighborhood criteria) are first removed. Then,
132
38
  consecutive timesteps are grouped into the same event if the time gap between them does not
133
- exceed `intra_event_max_time_gap`. Finally, events that do not meet minimum size or duration
39
+ exceed `event_max_time_gap`. Finally, events that do not meet minimum size or duration
134
40
  requirements are filtered out.
135
41
 
136
42
  Please note that neighbor_min_size and neighbor_time_interval are very sensitive to the
@@ -150,7 +56,7 @@ def group_timesteps_into_event(
150
56
  - If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
151
57
  - If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
152
58
  Defaults to 1.
153
- intra_event_max_time_gap: str
59
+ event_max_time_gap: str
154
60
  The maximum time interval between two timesteps to be considered part of the same event.
155
61
  This parameters is used to group timesteps into events !
156
62
  event_min_duration : str
@@ -168,9 +74,9 @@ def group_timesteps_into_event(
168
74
  - "n_timesteps": int, number of valid timesteps in the event
169
75
  """
170
76
  # Retrieve datetime arguments
171
- neighbor_time_interval = pd.Timedelta(acronym_to_seconds(neighbor_time_interval), unit="seconds")
172
- intra_event_max_time_gap = pd.Timedelta(acronym_to_seconds(intra_event_max_time_gap), unit="seconds")
173
- event_min_duration = pd.Timedelta(acronym_to_seconds(event_min_duration), unit="seconds")
77
+ neighbor_time_interval = pd.Timedelta(temporal_resolution_to_seconds(neighbor_time_interval), unit="seconds")
78
+ event_max_time_gap = pd.Timedelta(temporal_resolution_to_seconds(event_max_time_gap), unit="seconds")
79
+ event_min_duration = pd.Timedelta(temporal_resolution_to_seconds(event_min_duration), unit="seconds")
174
80
 
175
81
  # Remove isolated timesteps
176
82
  timesteps = remove_isolated_timesteps(
@@ -180,8 +86,8 @@ def group_timesteps_into_event(
180
86
  )
181
87
 
182
88
  # Group timesteps into events
183
- # - If two timesteps are separated by less than intra_event_max_time_gap, are considered the same event
184
- events = group_timesteps_into_events(timesteps, intra_event_max_time_gap)
89
+ # - If two timesteps are separated by less than event_max_time_gap, are considered the same event
90
+ events = group_timesteps_into_events(timesteps, event_max_time_gap)
185
91
 
186
92
  # Define list of event
187
93
  event_list = [
@@ -270,7 +176,7 @@ def remove_isolated_timesteps(timesteps, neighbor_min_size, neighbor_time_interv
270
176
  return non_isolated_timesteps
271
177
 
272
178
 
273
- def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
179
+ def group_timesteps_into_events(timesteps, event_max_time_gap):
274
180
  """
275
181
  Group valid timesteps into events based on a maximum allowed dry interval.
276
182
 
@@ -278,7 +184,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
278
184
  ----------
279
185
  timesteps : array-like of np.datetime64
280
186
  Sorted array of valid timesteps.
281
- intra_event_max_time_gap : np.timedelta64
187
+ event_max_time_gap : np.timedelta64
282
188
  Maximum time interval allowed between consecutive valid timesteps for them
283
189
  to be considered part of the same event.
284
190
 
@@ -297,9 +203,9 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
297
203
  # Compute differences between consecutive timesteps
298
204
  diffs = np.diff(timesteps)
299
205
 
300
- # Identify the indices where the gap is larger than intra_event_max_time_gap
206
+ # Identify the indices where the gap is larger than event_max_time_gap
301
207
  # These indices represent boundaries between events
302
- break_indices = np.where(diffs > intra_event_max_time_gap)[0] + 1
208
+ break_indices = np.where(diffs > event_max_time_gap)[0] + 1
303
209
 
304
210
  # Split the timesteps at the identified break points
305
211
  events = np.split(timesteps, break_indices)
@@ -311,7 +217,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
311
217
  # current_t = timesteps[i]
312
218
  # previous_t = timesteps[i - 1]
313
219
 
314
- # if current_t - previous_t <= intra_event_max_time_gap:
220
+ # if current_t - previous_t <= event_max_time_gap:
315
221
  # current_event.append(current_t)
316
222
  # else:
317
223
  # events.append(current_event)
@@ -322,67 +228,3 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
322
228
 
323
229
 
324
230
  ####-----------------------------------------------------------------------------------.
325
-
326
-
327
- def get_events_info(list_events, filepaths, accumulation_interval, rolling):
328
- """
329
- Provide information about the required files for each event.
330
-
331
- For each event in `list_events`, this function identifies the file paths from `filepaths` that
332
- overlap with the event period, adjusted by the `accumulation_interval`. The event period is
333
- extended backward or forward based on the `rolling` parameter.
334
-
335
- Parameters
336
- ----------
337
- list_events : list of dict
338
- List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
339
- keys with `numpy.datetime64` values.
340
- filepaths : list of str
341
- List of file paths corresponding to data files.
342
- accumulation_interval : numpy.timedelta64 or int
343
- Time interval to adjust the event period for accumulation. If an integer is provided, it is
344
- assumed to be in seconds.
345
- rolling : bool
346
- If True, adjust the event period backward by `accumulation_interval` (rolling backward).
347
- If False, adjust forward (aggregate forward).
348
-
349
- Returns
350
- -------
351
- list of dict
352
- A list where each element is a dictionary containing:
353
- - 'start_time': Adjusted start time of the event (`numpy.datetime64`).
354
- - 'end_time': Adjusted end time of the event (`numpy.datetime64`).
355
- - 'filepaths': List of file paths overlapping with the adjusted event period.
356
-
357
- """
358
- # Ensure accumulation_interval is numpy.timedelta64
359
- if not isinstance(accumulation_interval, np.timedelta64):
360
- accumulation_interval = np.timedelta64(accumulation_interval, "s")
361
-
362
- # Retrieve file start_time and end_time
363
- files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
364
-
365
- # Retrieve information for each event
366
- event_info = []
367
- for event_dict in list_events:
368
- # Retrieve event time period
369
- event_start_time = event_dict["start_time"]
370
- event_end_time = event_dict["end_time"]
371
-
372
- # Add buffer to account for accumulation interval
373
- if rolling: # backward
374
- event_start_time = event_start_time - np.array(accumulation_interval, dtype="m8[s]")
375
- else: # aggregate forward
376
- event_end_time = event_end_time + np.array(accumulation_interval, dtype="m8[s]")
377
-
378
- # Derive event filepaths
379
- overlaps = (files_start_time <= event_end_time) & (files_end_time >= event_start_time)
380
- event_filepaths = np.array(filepaths)[overlaps].tolist()
381
-
382
- # Create dictionary
383
- if len(event_filepaths) > 0:
384
- event_info.append(
385
- {"start_time": event_start_time, "end_time": event_end_time, "filepaths": event_filepaths},
386
- )
387
-
388
- return event_info
disdrodb/utils/logger.py CHANGED
@@ -42,7 +42,7 @@ def create_logger_file(logs_dir, filename, parallel):
42
42
  format_type = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
43
43
  handler.setFormatter(logging.Formatter(format_type))
44
44
  logger.addHandler(handler)
45
- logger.setLevel(logging.DEBUG)
45
+ logger.setLevel(logging.INFO)
46
46
 
47
47
  # Define logger filepath
48
48
  # - LogCaptureHandler of pytest does not have baseFilename attribute --> So set None
@@ -164,9 +164,16 @@ def _define_station_summary_log_file(list_logs, summary_filepath):
164
164
 
165
165
 
166
166
  def _define_station_problem_log_file(list_logs, problem_filepath):
167
- # - Copy the log of files with warnings and error
168
- list_keywords = ["ERROR"] # "WARNING"
169
- list_patterns = ["ValueError: Less than 5 timesteps available for day"]
167
+ # Copy the log of files with errors
168
+ list_keywords = ["ERROR"]
169
+ # Exclude lines with the following patterns
170
+ list_patterns = [
171
+ # Caused by no data with L2E and L2M filtering
172
+ "No timesteps with rain rate",
173
+ "No timesteps with N",
174
+ "No timesteps with Nbins",
175
+ ]
176
+ # Compile patterns to search, escaping any special regex characters
170
177
  re_keyword = re.compile("|".join(list_keywords))
171
178
  # Compile patterns to ignore, escaping any special regex characters
172
179
  re_patterns = re.compile("|".join(map(re.escape, list_patterns))) if list_patterns else None
@@ -221,7 +228,7 @@ def create_product_logs(
221
228
 
222
229
  The logs directory structure is the follow:
223
230
  /logs
224
- - /files/<product_acronym>/<station> (same structure as data ... a log for each processed file)
231
+ - /files/<product_name>/<station> (same structure as data ... a log for each processed file)
225
232
  - /summary
226
233
  --> SUMMARY.<PRODUCT_ACRONYM>.<CAMPAIGN_NAME>.<STATION_NAME>.log
227
234
  - /problems
@@ -269,7 +276,7 @@ def create_product_logs(
269
276
  # Product options
270
277
  **product_kwargs,
271
278
  )
272
- list_logs = list_files(logs_dir, glob_pattern="*", recursive=True)
279
+ list_logs = list_files(logs_dir, recursive=True)
273
280
 
274
281
  # --------------------------------------------------------.
275
282
  # LogCaptureHandler of pytest does not have baseFilename attribute, so it returns None
@@ -332,5 +339,5 @@ def create_product_logs(
332
339
 
333
340
  # --------------------------------------------------------.
334
341
  # Remove /problem directory if empty !
335
- if len(os.listdir(logs_problem_dir)) == 0:
342
+ if len(list_files(logs_problem_dir, glob_pattern="*.log")) == 0:
336
343
  os.rmdir(logs_problem_dir)
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """Include functions helping for DISDRODB product manipulations."""
20
+
21
+ import numpy as np
22
+
23
+ from disdrodb.constants import DIAMETER_DIMENSION
24
+ from disdrodb.utils.xarray import unstack_datarray_dimension
25
+
26
+
27
+ def get_diameter_bin_edges(ds):
28
+ """Retrieve diameter bin edges."""
29
+ bin_edges = np.append(ds["diameter_bin_lower"].compute().data, ds["diameter_bin_upper"].compute().data[-1])
30
+ return bin_edges
31
+
32
+
33
+ def convert_from_decibel(x):
34
+ """Convert dB to unit."""
35
+ return np.power(10.0, 0.1 * x) # x/10
36
+
37
+
38
+ def convert_to_decibel(x):
39
+ """Convert unit to dB."""
40
+ return 10 * np.log10(x)
41
+
42
+
43
+ def unstack_radar_variables(ds):
44
+ """Unstack radar variables."""
45
+ from disdrodb.scattering import RADAR_VARIABLES
46
+
47
+ for var in RADAR_VARIABLES:
48
+ if var in ds:
49
+ ds_unstack = unstack_datarray_dimension(ds[var], dim="frequency", prefix="", suffix="_")
50
+ ds.update(ds_unstack)
51
+ ds = ds.drop_vars(var)
52
+ if "frequency" in ds.dims:
53
+ ds = ds.drop_dims("frequency")
54
+ return ds
55
+
56
+
57
+ def get_diameter_coords_dict_from_bin_edges(diameter_bin_edges):
58
+ """Get dictionary with all relevant diameter coordinates."""
59
+ if np.size(diameter_bin_edges) < 2:
60
+ raise ValueError("Expecting at least 2 values defining bin edges.")
61
+ diameter_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
62
+ diameter_bin_width = np.diff(diameter_bin_edges)
63
+ diameter_bin_lower = diameter_bin_edges[:-1]
64
+ diameter_bin_upper = diameter_bin_edges[1:]
65
+ coords_dict = {
66
+ "diameter_bin_center": (DIAMETER_DIMENSION, diameter_bin_center),
67
+ "diameter_bin_width": (DIAMETER_DIMENSION, diameter_bin_width),
68
+ "diameter_bin_lower": (DIAMETER_DIMENSION, diameter_bin_lower),
69
+ "diameter_bin_upper": (DIAMETER_DIMENSION, diameter_bin_upper),
70
+ }
71
+ return coords_dict
72
+
73
+
74
+ def resample_drop_number_concentration(drop_number_concentration, diameter_bin_edges, method="linear"):
75
+ """Resample drop number concentration N(D) DataArray to high resolution diameter bins."""
76
+ diameters_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
77
+
78
+ da = drop_number_concentration.interp(coords={"diameter_bin_center": diameters_bin_center}, method=method)
79
+ coords_dict = get_diameter_coords_dict_from_bin_edges(diameter_bin_edges)
80
+ da = da.assign_coords(coords_dict)
81
+ return da
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """Utilities for DISDRODB processing routines."""
20
+ import os
21
+ import shutil
22
+ import tempfile
23
+
24
+ from disdrodb.api.io import find_files
25
+ from disdrodb.api.path import define_file_folder_path, define_temporal_resolution
26
+ from disdrodb.utils.logger import (
27
+ close_logger,
28
+ create_logger_file,
29
+ log_error,
30
+ log_info,
31
+ )
32
+
33
+
34
+ def is_possible_product(accumulation_interval, sample_interval, rolling):
35
+ """Assess if production is possible given the requested accumulation interval and source sample_interval."""
36
+ # Avoid rolling product generation at source sample interval
37
+ if rolling and accumulation_interval == sample_interval:
38
+ return False
39
+ # Avoid product generation if the accumulation_interval is less than the sample interval
40
+ if accumulation_interval < sample_interval:
41
+ return False
42
+ # Avoid producti generation if accumulation_interval is not multiple of sample_interval
43
+ return accumulation_interval % sample_interval == 0
44
+
45
+
46
+ def try_get_required_filepaths(
47
+ product,
48
+ data_archive_dir,
49
+ data_source,
50
+ campaign_name,
51
+ station_name,
52
+ debugging_mode,
53
+ **product_kwargs,
54
+ ):
55
+ """Try to retrieve required filepaths for a product, or return None if unavailable."""
56
+ try:
57
+ filepaths = find_files(
58
+ data_archive_dir=data_archive_dir,
59
+ data_source=data_source,
60
+ campaign_name=campaign_name,
61
+ station_name=station_name,
62
+ product=product,
63
+ debugging_mode=debugging_mode,
64
+ **product_kwargs,
65
+ )
66
+ return filepaths
67
+ # If no files available, print informative message
68
+ except Exception as e:
69
+ temporal_resolution = ""
70
+ if "sample_interval" in product_kwargs and "rolling" in product_kwargs:
71
+ temporal_resolution = define_temporal_resolution(
72
+ seconds=product_kwargs["sample_interval"],
73
+ rolling=product_kwargs["rolling"],
74
+ )
75
+ print(str(e))
76
+ msg = (
77
+ f"{product} processing of {data_source} {campaign_name} {station_name} "
78
+ f"has not been launched because of missing {product} {temporal_resolution} data."
79
+ )
80
+ print(msg)
81
+ return None
82
+
83
+
84
+ def run_product_generation(
85
+ product: str,
86
+ logs_dir: str,
87
+ logs_filename: str,
88
+ parallel: bool,
89
+ verbose: bool,
90
+ folder_partitioning: str,
91
+ core_func: callable,
92
+ core_func_kwargs: dict,
93
+ pass_logger=False,
94
+ ):
95
+ """
96
+ Generic wrapper for DISDRODB product generation.
97
+
98
+ Parameters
99
+ ----------
100
+ product : str
101
+ Product name (e.g., "L0A", "L0B", ...).
102
+
103
+ logs_dir : str
104
+ Logs directory.
105
+ logs_filename : str
106
+ Logs filename.
107
+ parallel : bool
108
+ Parallel flag (for logger).
109
+ verbose : bool
110
+ Verbose logging flag.
111
+ folder_partitioning : str
112
+ Partitioning scheme.
113
+ core_func : callable
114
+ Function with signature `core_func(logger)` that does the product-specific work.
115
+ Must return an xarray.Dataset or pandas.DataFrame (used to determine log subdir).
116
+ """
117
+ with tempfile.TemporaryDirectory() as tmpdir:
118
+ # Initialize log file
119
+ logger, tmp_logger_filepath = create_logger_file(
120
+ logs_dir=tmpdir,
121
+ filename=logs_filename,
122
+ parallel=parallel,
123
+ )
124
+
125
+ # Inform that product creation has started
126
+ log_info(logger, f"{product} processing of {logs_filename} has started.", verbose=verbose)
127
+
128
+ # Initialize object
129
+ obj = None # if None, means the product creation failed
130
+
131
+ # Add logger to core_func_kwargs if specified
132
+ if pass_logger:
133
+ core_func_kwargs["logger"] = logger
134
+
135
+ # Try product creation
136
+ try:
137
+ # Run product creation
138
+ obj = core_func(**core_func_kwargs)
139
+
140
+ # Inform that product creation has ended
141
+ log_info(logger, f"{product} processing of {logs_filename} has ended.", verbose=verbose)
142
+
143
+ # Report error if the case
144
+ except Exception as e:
145
+ log_error(logger, f"{type(e).__name__}: {e}", verbose=verbose)
146
+
147
+ finally:
148
+ # Close logger
149
+ close_logger(logger)
150
+
151
+ # Move log file to final logs directory
152
+ success_flag = obj is not None
153
+ if success_flag: # and "time" in obj and len(obj["time"]) > 0:
154
+ logs_dir = define_file_folder_path(obj, dir_path=logs_dir, folder_partitioning=folder_partitioning)
155
+ os.makedirs(logs_dir, exist_ok=True)
156
+ if tmp_logger_filepath is not None: # (when running pytest, tmp_logger_filepath is None)
157
+ logger_filepath = os.path.join(logs_dir, os.path.basename(tmp_logger_filepath))
158
+ shutil.move(tmp_logger_filepath, logger_filepath)
159
+ else:
160
+ logger_filepath = None
161
+
162
+ # Free memory
163
+ del obj
164
+
165
+ # Return logger filepath
166
+ return logger_filepath