disdrodb 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/checks.py +70 -47
  4. disdrodb/api/configs.py +0 -2
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/info.py +3 -3
  7. disdrodb/api/io.py +48 -8
  8. disdrodb/api/path.py +116 -133
  9. disdrodb/api/search.py +12 -3
  10. disdrodb/cli/disdrodb_create_summary.py +113 -0
  11. disdrodb/cli/disdrodb_create_summary_station.py +11 -1
  12. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
  14. disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
  15. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  16. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  17. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  18. disdrodb/constants.py +1 -1
  19. disdrodb/data_transfer/download_data.py +123 -7
  20. disdrodb/etc/products/L1/global.yaml +1 -1
  21. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  22. disdrodb/etc/products/L2E/global.yaml +1 -1
  23. disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
  24. disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
  25. disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  26. disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  27. disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
  28. disdrodb/etc/products/L2M/global.yaml +11 -3
  29. disdrodb/issue/writer.py +2 -0
  30. disdrodb/l0/check_configs.py +49 -16
  31. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  32. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  33. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  34. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  35. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  36. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  37. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  38. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  39. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  40. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  41. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  42. disdrodb/l0/l0a_processing.py +10 -5
  43. disdrodb/l0/l0b_nc_processing.py +10 -6
  44. disdrodb/l0/l0b_processing.py +92 -72
  45. disdrodb/l0/l0c_processing.py +369 -251
  46. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +8 -1
  47. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  48. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  49. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  50. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  51. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  52. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  53. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  54. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  55. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  56. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  58. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  59. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
  60. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  61. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  62. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  63. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  66. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  67. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  70. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  71. disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
  72. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  73. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  79. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
  80. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
  81. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  82. disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
  83. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
  84. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  85. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  86. disdrodb/l1/beard_model.py +31 -129
  87. disdrodb/l1/fall_velocity.py +156 -57
  88. disdrodb/l1/filters.py +25 -28
  89. disdrodb/l1/processing.py +12 -14
  90. disdrodb/l1_env/routines.py +46 -17
  91. disdrodb/l2/empirical_dsd.py +6 -0
  92. disdrodb/l2/processing.py +3 -3
  93. disdrodb/metadata/checks.py +132 -125
  94. disdrodb/metadata/geolocation.py +0 -2
  95. disdrodb/psd/fitting.py +180 -210
  96. disdrodb/psd/models.py +1 -1
  97. disdrodb/routines/__init__.py +54 -0
  98. disdrodb/{l0/routines.py → routines/l0.py} +288 -418
  99. disdrodb/{l1/routines.py → routines/l1.py} +60 -92
  100. disdrodb/{l2/routines.py → routines/l2.py} +284 -485
  101. disdrodb/{routines.py → routines/wrappers.py} +100 -7
  102. disdrodb/scattering/axis_ratio.py +95 -85
  103. disdrodb/scattering/permittivity.py +24 -0
  104. disdrodb/scattering/routines.py +56 -36
  105. disdrodb/summary/routines.py +147 -45
  106. disdrodb/utils/archiving.py +434 -0
  107. disdrodb/utils/attrs.py +2 -0
  108. disdrodb/utils/cli.py +5 -5
  109. disdrodb/utils/dask.py +62 -1
  110. disdrodb/utils/decorators.py +31 -0
  111. disdrodb/utils/encoding.py +10 -1
  112. disdrodb/{l2 → utils}/event.py +1 -66
  113. disdrodb/utils/logger.py +1 -1
  114. disdrodb/utils/manipulations.py +22 -12
  115. disdrodb/utils/routines.py +166 -0
  116. disdrodb/utils/time.py +5 -293
  117. disdrodb/utils/xarray.py +3 -0
  118. disdrodb/viz/plots.py +109 -15
  119. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/METADATA +3 -2
  120. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/RECORD +124 -96
  121. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +1 -0
  122. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
  123. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
  124. {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0
@@ -19,8 +19,7 @@
19
19
  import numpy as np
20
20
  import pandas as pd
21
21
 
22
- from disdrodb.api.info import get_start_end_time_from_filepaths
23
- from disdrodb.utils.time import ensure_timedelta_seconds_interval, temporal_resolution_to_seconds
22
+ from disdrodb.utils.time import temporal_resolution_to_seconds
24
23
 
25
24
 
26
25
  def group_timesteps_into_event(
@@ -229,67 +228,3 @@ def group_timesteps_into_events(timesteps, event_max_time_gap):
229
228
 
230
229
 
231
230
  ####-----------------------------------------------------------------------------------.
232
-
233
-
234
- def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling): # noqa: ARG001
235
- """
236
- Provide information about the required files for each event.
237
-
238
- For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
239
- overlap with the event period, adjusted by the `accumulation_interval`. The event period is
240
- extended backward or forward based on the `rolling` parameter.
241
-
242
- Parameters
243
- ----------
244
- list_partitions : list of dict
245
- List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
246
- keys with `numpy.datetime64` values.
247
- filepaths : list of str
248
- List of file paths corresponding to data files.
249
- sample_interval : numpy.timedelta64 or int
250
- The sample interval of the input dataset.
251
- accumulation_interval : numpy.timedelta64 or int
252
- Time interval to adjust the event period for accumulation. If an integer is provided, it is
253
- assumed to be in seconds.
254
- rolling : bool
255
- If True, adjust the event period backward by `accumulation_interval` (rolling backward).
256
- If False, adjust forward (aggregate forward).
257
-
258
- Returns
259
- -------
260
- list of dict
261
- A list where each element is a dictionary containing:
262
- - 'start_time': Adjusted start time of the event (`numpy.datetime64`).
263
- - 'end_time': Adjusted end time of the event (`numpy.datetime64`).
264
- - 'filepaths': List of file paths overlapping with the adjusted event period.
265
-
266
- """
267
- # Ensure sample_interval and accumulation_interval is numpy.timedelta64
268
- accumulation_interval = ensure_timedelta_seconds_interval(accumulation_interval)
269
- sample_interval = ensure_timedelta_seconds_interval(sample_interval)
270
-
271
- # Retrieve file start_time and end_time
272
- files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
273
-
274
- # Retrieve information for each event
275
- event_info = []
276
- for event_dict in list_partitions:
277
- # Retrieve event time period
278
- event_start_time = event_dict["start_time"]
279
- event_end_time = event_dict["end_time"]
280
-
281
- # Adapt event_end_time if accumulation interval different from sample interval
282
- if sample_interval != accumulation_interval:
283
- event_end_time = event_end_time + accumulation_interval
284
-
285
- # Derive event filepaths
286
- overlaps = (files_start_time <= event_end_time) & (files_end_time >= event_start_time)
287
- event_filepaths = np.array(filepaths)[overlaps].tolist()
288
-
289
- # Create dictionary
290
- if len(event_filepaths) > 0:
291
- event_info.append(
292
- {"start_time": event_start_time, "end_time": event_end_time, "filepaths": event_filepaths},
293
- )
294
-
295
- return event_info
disdrodb/utils/logger.py CHANGED
@@ -42,7 +42,7 @@ def create_logger_file(logs_dir, filename, parallel):
42
42
  format_type = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
43
43
  handler.setFormatter(logging.Formatter(format_type))
44
44
  logger.addHandler(handler)
45
- logger.setLevel(logging.DEBUG)
45
+ logger.setLevel(logging.INFO)
46
46
 
47
47
  # Define logger filepath
48
48
  # - LogCaptureHandler of pytest does not have baseFilename attribute --> So set None
@@ -20,6 +20,7 @@
20
20
 
21
21
  import numpy as np
22
22
 
23
+ from disdrodb.constants import DIAMETER_DIMENSION
23
24
  from disdrodb.utils.xarray import unstack_datarray_dimension
24
25
 
25
26
 
@@ -53,19 +54,28 @@ def unstack_radar_variables(ds):
53
54
  return ds
54
55
 
55
56
 
56
- def resample_drop_number_concentration(da, diameter_bin_edges, method="linear"):
57
- """Resample drop number concentration N(D) DataArray to high resolution diameter bins."""
58
- diameters_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
59
-
60
- da = da.interp(coords={"diameter_bin_center": diameters_bin_center}, method=method)
57
+ def get_diameter_coords_dict_from_bin_edges(diameter_bin_edges):
58
+ """Get dictionary with all relevant diameter coordinates."""
59
+ if np.size(diameter_bin_edges) < 2:
60
+ raise ValueError("Expecting at least 2 values defining bin edges.")
61
+ diameter_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
61
62
  diameter_bin_width = np.diff(diameter_bin_edges)
62
63
  diameter_bin_lower = diameter_bin_edges[:-1]
63
64
  diameter_bin_upper = diameter_bin_edges[1:]
64
- da = da.assign_coords(
65
- {
66
- "diameter_bin_width": ("diameter_bin_center", diameter_bin_width),
67
- "diameter_bin_lower": ("diameter_bin_center", diameter_bin_lower),
68
- "diameter_bin_upper": ("diameter_bin_center", diameter_bin_upper),
69
- },
70
- )
65
+ coords_dict = {
66
+ "diameter_bin_center": (DIAMETER_DIMENSION, diameter_bin_center),
67
+ "diameter_bin_width": (DIAMETER_DIMENSION, diameter_bin_width),
68
+ "diameter_bin_lower": (DIAMETER_DIMENSION, diameter_bin_lower),
69
+ "diameter_bin_upper": (DIAMETER_DIMENSION, diameter_bin_upper),
70
+ }
71
+ return coords_dict
72
+
73
+
74
+ def resample_drop_number_concentration(drop_number_concentration, diameter_bin_edges, method="linear"):
75
+ """Resample drop number concentration N(D) DataArray to high resolution diameter bins."""
76
+ diameters_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
77
+
78
+ da = drop_number_concentration.interp(coords={"diameter_bin_center": diameters_bin_center}, method=method)
79
+ coords_dict = get_diameter_coords_dict_from_bin_edges(diameter_bin_edges)
80
+ da = da.assign_coords(coords_dict)
71
81
  return da
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """Utilities for DISDRODB processing routines."""
20
+ import os
21
+ import shutil
22
+ import tempfile
23
+
24
+ from disdrodb.api.io import find_files
25
+ from disdrodb.api.path import define_file_folder_path, define_temporal_resolution
26
+ from disdrodb.utils.logger import (
27
+ close_logger,
28
+ create_logger_file,
29
+ log_error,
30
+ log_info,
31
+ )
32
+
33
+
34
+ def is_possible_product(accumulation_interval, sample_interval, rolling):
35
+ """Assess if production is possible given the requested accumulation interval and source sample_interval."""
36
+ # Avoid rolling product generation at source sample interval
37
+ if rolling and accumulation_interval == sample_interval:
38
+ return False
39
+ # Avoid product generation if the accumulation_interval is less than the sample interval
40
+ if accumulation_interval < sample_interval:
41
+ return False
42
+ # Avoid producti generation if accumulation_interval is not multiple of sample_interval
43
+ return accumulation_interval % sample_interval == 0
44
+
45
+
46
+ def try_get_required_filepaths(
47
+ product,
48
+ data_archive_dir,
49
+ data_source,
50
+ campaign_name,
51
+ station_name,
52
+ debugging_mode,
53
+ **product_kwargs,
54
+ ):
55
+ """Try to retrieve required filepaths for a product, or return None if unavailable."""
56
+ try:
57
+ filepaths = find_files(
58
+ data_archive_dir=data_archive_dir,
59
+ data_source=data_source,
60
+ campaign_name=campaign_name,
61
+ station_name=station_name,
62
+ product=product,
63
+ debugging_mode=debugging_mode,
64
+ **product_kwargs,
65
+ )
66
+ return filepaths
67
+ # If no files available, print informative message
68
+ except Exception as e:
69
+ temporal_resolution = ""
70
+ if "sample_interval" in product_kwargs and "rolling" in product_kwargs:
71
+ temporal_resolution = define_temporal_resolution(
72
+ seconds=product_kwargs["sample_interval"],
73
+ rolling=product_kwargs["rolling"],
74
+ )
75
+ print(str(e))
76
+ msg = (
77
+ f"{product} processing of {data_source} {campaign_name} {station_name} "
78
+ f"has not been launched because of missing {product} {temporal_resolution} data."
79
+ )
80
+ print(msg)
81
+ return None
82
+
83
+
84
+ def run_product_generation(
85
+ product: str,
86
+ logs_dir: str,
87
+ logs_filename: str,
88
+ parallel: bool,
89
+ verbose: bool,
90
+ folder_partitioning: str,
91
+ core_func: callable,
92
+ core_func_kwargs: dict,
93
+ pass_logger=False,
94
+ ):
95
+ """
96
+ Generic wrapper for DISDRODB product generation.
97
+
98
+ Parameters
99
+ ----------
100
+ product : str
101
+ Product name (e.g., "L0A", "L0B", ...).
102
+
103
+ logs_dir : str
104
+ Logs directory.
105
+ logs_filename : str
106
+ Logs filename.
107
+ parallel : bool
108
+ Parallel flag (for logger).
109
+ verbose : bool
110
+ Verbose logging flag.
111
+ folder_partitioning : str
112
+ Partitioning scheme.
113
+ core_func : callable
114
+ Function with signature `core_func(logger)` that does the product-specific work.
115
+ Must return an xarray.Dataset or pandas.DataFrame (used to determine log subdir).
116
+ """
117
+ with tempfile.TemporaryDirectory() as tmpdir:
118
+ # Initialize log file
119
+ logger, tmp_logger_filepath = create_logger_file(
120
+ logs_dir=tmpdir,
121
+ filename=logs_filename,
122
+ parallel=parallel,
123
+ )
124
+
125
+ # Inform that product creation has started
126
+ log_info(logger, f"{product} processing of {logs_filename} has started.", verbose=verbose)
127
+
128
+ # Initialize object
129
+ obj = None # if None, means the product creation failed
130
+
131
+ # Add logger to core_func_kwargs if specified
132
+ if pass_logger:
133
+ core_func_kwargs["logger"] = logger
134
+
135
+ # Try product creation
136
+ try:
137
+ # Run product creation
138
+ obj = core_func(**core_func_kwargs)
139
+
140
+ # Inform that product creation has ended
141
+ log_info(logger, f"{product} processing of {logs_filename} has ended.", verbose=verbose)
142
+
143
+ # Report error if the case
144
+ except Exception as e:
145
+ log_error(logger, f"{type(e).__name__}: {e}", verbose=verbose)
146
+
147
+ finally:
148
+ # Close logger
149
+ close_logger(logger)
150
+
151
+ # Move log file to final logs directory
152
+ success_flag = obj is not None
153
+ if success_flag: # and "time" in obj and len(obj["time"]) > 0:
154
+ logs_dir = define_file_folder_path(obj, dir_path=logs_dir, folder_partitioning=folder_partitioning)
155
+ os.makedirs(logs_dir, exist_ok=True)
156
+ if tmp_logger_filepath is not None: # (when running pytest, tmp_logger_filepath is None)
157
+ logger_filepath = os.path.join(logs_dir, os.path.basename(tmp_logger_filepath))
158
+ shutil.move(tmp_logger_filepath, logger_filepath)
159
+ else:
160
+ logger_filepath = None
161
+
162
+ # Free memory
163
+ del obj
164
+
165
+ # Return logger filepath
166
+ return logger_filepath
disdrodb/utils/time.py CHANGED
@@ -29,6 +29,7 @@ from disdrodb.utils.xarray import define_fill_value_dictionary
29
29
 
30
30
  logger = logging.getLogger(__name__)
31
31
 
32
+
32
33
  ####------------------------------------------------------------------------------------.
33
34
  #### Sampling Interval Acronyms
34
35
 
@@ -61,7 +62,7 @@ def seconds_to_temporal_resolution(seconds):
61
62
  return temporal_resolution
62
63
 
63
64
 
64
- def get_resampling_information(temporal_resolution):
65
+ def get_sampling_information(temporal_resolution):
65
66
  """
66
67
  Extract resampling information from the temporal_resolution string.
67
68
 
@@ -126,7 +127,7 @@ def temporal_resolution_to_seconds(temporal_resolution):
126
127
  seconds
127
128
  Duration in seconds.
128
129
  """
129
- seconds, _ = get_resampling_information(temporal_resolution)
130
+ seconds, _ = get_sampling_information(temporal_resolution)
130
131
  return seconds
131
132
 
132
133
 
@@ -401,8 +402,8 @@ def ensure_sample_interval_in_seconds(sample_interval): # noqa: PLR0911
401
402
  )
402
403
 
403
404
 
404
- def ensure_timedelta_seconds_interval(interval):
405
- """Return interval as numpy.timedelta64 in seconds."""
405
+ def ensure_timedelta_seconds(interval):
406
+ """Return an a scalar value/array in seconds or timedelta object as numpy.timedelta64 in seconds."""
406
407
  if isinstance(interval, (xr.DataArray, np.ndarray)):
407
408
  return ensure_sample_interval_in_seconds(interval).astype("m8[s]")
408
409
  return np.array(ensure_sample_interval_in_seconds(interval), dtype="m8[s]")
@@ -512,292 +513,3 @@ def infer_sample_interval(ds, robust=False, verbose=False, logger=None):
512
513
  )
513
514
  log_warning(logger=logger, msg=msg, verbose=verbose)
514
515
  return int(sample_interval)
515
-
516
-
517
- ####---------------------------------------------------------------------------------
518
- #### Timesteps regularization
519
-
520
-
521
- def get_problematic_timestep_indices(timesteps, sample_interval):
522
- """Identify timesteps with missing previous or following timesteps."""
523
- previous_time = timesteps - pd.Timedelta(seconds=sample_interval)
524
- next_time = timesteps + pd.Timedelta(seconds=sample_interval)
525
- idx_previous_missing = np.where(~np.isin(previous_time, timesteps))[0][1:]
526
- idx_next_missing = np.where(~np.isin(next_time, timesteps))[0][:-1]
527
- idx_isolated_missing = np.intersect1d(idx_previous_missing, idx_next_missing)
528
- idx_previous_missing = idx_previous_missing[np.isin(idx_previous_missing, idx_isolated_missing, invert=True)]
529
- idx_next_missing = idx_next_missing[np.isin(idx_next_missing, idx_isolated_missing, invert=True)]
530
- return idx_previous_missing, idx_next_missing, idx_isolated_missing
531
-
532
-
533
- def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=True, logger=None, verbose=True):
534
- """Ensure timesteps match with the sample_interval.
535
-
536
- This function:
537
- - drop dataset indices with duplicated timesteps,
538
- - but does not add missing timesteps to the dataset.
539
- """
540
- # Check sorted by time and sort if necessary
541
- ds = ensure_sorted_by_time(ds)
542
-
543
- # Convert time to pandas.DatetimeIndex for easier manipulation
544
- times = pd.to_datetime(ds["time"].to_numpy())
545
-
546
- # Determine the start and end times
547
- start_time = times[0].floor(f"{sample_interval}s")
548
- end_time = times[-1].ceil(f"{sample_interval}s")
549
-
550
- # Create the expected time grid
551
- expected_times = pd.date_range(start=start_time, end=end_time, freq=f"{sample_interval}s")
552
-
553
- # Convert to numpy arrays
554
- times = times.to_numpy(dtype="M8[s]")
555
- expected_times = expected_times.to_numpy(dtype="M8[s]")
556
-
557
- # Map original times to the nearest expected times
558
- # Calculate the difference between original times and expected times
559
- time_deltas = np.abs(times - expected_times[:, None]).astype(int)
560
-
561
- # Find the index of the closest expected time for each original time
562
- nearest_indices = np.argmin(time_deltas, axis=0)
563
- adjusted_times = expected_times[nearest_indices]
564
-
565
- # Check for duplicates in adjusted times
566
- unique_times, counts = np.unique(adjusted_times, return_counts=True)
567
- duplicates = unique_times[counts > 1]
568
-
569
- # Initialize time quality flag
570
- # - 0 when ok or just rounded to closest 00
571
- # - 1 if previous timestep is missing
572
- # - 2 if next timestep is missing
573
- # - 3 if previous and next timestep is missing
574
- # - 4 if solved duplicated timesteps
575
- # - 5 if needed to drop duplicated timesteps and select the last
576
- flag_previous_missing = 1
577
- flag_next_missing = 2
578
- flag_isolated_timestep = 3
579
- flag_solved_duplicated_timestep = 4
580
- flag_dropped_duplicated_timestep = 5
581
- qc_flag = np.zeros(adjusted_times.shape)
582
-
583
- # Initialize list with the duplicated timesteps index to drop
584
- # - We drop the first occurrence because is likely the shortest interval
585
- idx_to_drop = []
586
-
587
- # Attempt to resolve for duplicates
588
- if duplicates.size > 0:
589
- # Handle duplicates
590
- for dup_time in duplicates:
591
- # Indices of duplicates
592
- dup_indices = np.where(adjusted_times == dup_time)[0]
593
- n_duplicates = len(dup_indices)
594
- # Define previous and following timestep
595
- prev_time = dup_time - pd.Timedelta(seconds=sample_interval)
596
- next_time = dup_time + pd.Timedelta(seconds=sample_interval)
597
- # Try to find missing slots before and after
598
- # - If more than 3 duplicates, impossible to solve !
599
- count_solved = 0
600
- # If the previous timestep is available, set that one
601
- if n_duplicates == 2:
602
- if prev_time not in adjusted_times:
603
- adjusted_times[dup_indices[0]] = prev_time
604
- qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
605
- count_solved += 1
606
- elif next_time not in adjusted_times:
607
- adjusted_times[dup_indices[-1]] = next_time
608
- qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
609
- count_solved += 1
610
- else:
611
- pass
612
- elif n_duplicates == 3:
613
- if prev_time not in adjusted_times:
614
- adjusted_times[dup_indices[0]] = prev_time
615
- qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
616
- count_solved += 1
617
- if next_time not in adjusted_times:
618
- adjusted_times[dup_indices[-1]] = next_time
619
- qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
620
- count_solved += 1
621
- if count_solved != n_duplicates - 1:
622
- idx_to_drop = np.append(idx_to_drop, dup_indices[0:-1])
623
- qc_flag[dup_indices[-1]] = flag_dropped_duplicated_timestep
624
- msg = (
625
- f"Cannot resolve {n_duplicates} duplicated timesteps "
626
- f"(after trailing seconds correction) around {dup_time}."
627
- )
628
- log_warning(logger=logger, msg=msg, verbose=verbose)
629
- if robust:
630
- raise ValueError(msg)
631
-
632
- # Update the time coordinate (Convert to ns for xarray compatibility)
633
- ds = ds.assign_coords({"time": adjusted_times.astype("datetime64[ns]")})
634
-
635
- # Update quality flag values for next and previous timestep is missing
636
- if add_quality_flag:
637
- idx_previous_missing, idx_next_missing, idx_isolated_missing = get_problematic_timestep_indices(
638
- adjusted_times,
639
- sample_interval,
640
- )
641
- qc_flag[idx_previous_missing] = np.maximum(qc_flag[idx_previous_missing], flag_previous_missing)
642
- qc_flag[idx_next_missing] = np.maximum(qc_flag[idx_next_missing], flag_next_missing)
643
- qc_flag[idx_isolated_missing] = np.maximum(qc_flag[idx_isolated_missing], flag_isolated_timestep)
644
-
645
- # If the first timestep is at 00:00 and currently flagged as previous missing (1), reset to 0
646
- # first_time = pd.to_datetime(adjusted_times[0]).time()
647
- # first_expected_time = pd.Timestamp("00:00:00").time()
648
- # if first_time == first_expected_time and qc_flag[0] == flag_previous_missing:
649
- # qc_flag[0] = 0
650
-
651
- # # If the last timestep is flagged and currently flagged as next missing (2), reset it to 0
652
- # last_time = pd.to_datetime(adjusted_times[-1]).time()
653
- # last_time_expected = (pd.Timestamp("00:00:00") - pd.Timedelta(30, unit="seconds")).time()
654
- # # Check if adding one interval would go beyond the end_time
655
- # if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
656
- # qc_flag[-1] = 0
657
-
658
- # Assign time quality flag coordinate
659
- ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
660
- ds = ds.set_coords("time_qc")
661
-
662
- # Drop duplicated timesteps
663
- # - Using ds = ds.drop_isel({"time": idx_to_drop.astype(int)}) raise:
664
- # --> pandas.errors.InvalidIndexError: Reindexing only valid with uniquely valued Index objects
665
- # --> https://github.com/pydata/xarray/issues/6605
666
- if len(idx_to_drop) > 0:
667
- idx_to_drop = idx_to_drop.astype(int)
668
- idx_valid_timesteps = np.arange(0, ds["time"].size)
669
- idx_valid_timesteps = np.delete(idx_valid_timesteps, idx_to_drop)
670
- ds = ds.isel(time=idx_valid_timesteps)
671
- # Return dataset
672
- return ds
673
-
674
-
675
- ####---------------------------------------------------------------------------------
676
- #### Time blocks
677
-
678
-
679
- def check_freq(freq: str) -> None:
680
- """Check validity of freq argument."""
681
- valid_freq = ["none", "year", "season", "quarter", "month", "day", "hour"]
682
- if not isinstance(freq, str):
683
- raise TypeError("'freq' must be a string.")
684
- if freq not in valid_freq:
685
- raise ValueError(
686
- f"'freq' '{freq}' is not possible. Must be one of: {valid_freq}.",
687
- )
688
- return freq
689
-
690
-
691
- def generate_time_blocks(start_time: np.datetime64, end_time: np.datetime64, freq: str) -> np.ndarray: # noqa: PLR0911
692
- """Generate time blocks between `start_time` and `end_time` for a given frequency.
693
-
694
- Parameters
695
- ----------
696
- start_time : numpy.datetime64
697
- Inclusive start of the overall time range.
698
- end_time : numpy.datetime64
699
- Inclusive end of the overall time range.
700
- freq : str
701
- Frequency specifier. Accepted values are:
702
- - 'none' : return a single block [start_time, end_time]
703
- - 'day' : split into daily blocks
704
- - 'month' : split into calendar months
705
- - 'quarter' : split into calendar quarters
706
- - 'year' : split into calendar years
707
- - 'season' : split into meteorological seasons (MAM, JJA, SON, DJF)
708
-
709
- Returns
710
- -------
711
- numpy.ndarray
712
- Array of shape (n, 2) with dtype datetime64[s], where each row is [block_start, block_end].
713
-
714
- """
715
- freq = check_freq(freq)
716
- if freq == "none":
717
- return np.array([[start_time, end_time]], dtype="datetime64[s]")
718
-
719
- if freq == "hour":
720
- periods = pd.period_range(start=start_time, end=end_time, freq="h")
721
- blocks = np.array(
722
- [
723
- [
724
- period.start_time.to_datetime64().astype("datetime64[s]"),
725
- period.end_time.to_datetime64().astype("datetime64[s]"),
726
- ]
727
- for period in periods
728
- ],
729
- dtype="datetime64[s]",
730
- )
731
- return blocks
732
-
733
- if freq == "day":
734
- periods = pd.period_range(start=start_time, end=end_time, freq="d")
735
- blocks = np.array(
736
- [
737
- [
738
- period.start_time.to_datetime64().astype("datetime64[s]"),
739
- period.end_time.to_datetime64().astype("datetime64[s]"),
740
- ]
741
- for period in periods
742
- ],
743
- dtype="datetime64[s]",
744
- )
745
- return blocks
746
-
747
- if freq == "month":
748
- periods = pd.period_range(start=start_time, end=end_time, freq="M")
749
- blocks = np.array(
750
- [
751
- [
752
- period.start_time.to_datetime64().astype("datetime64[s]"),
753
- period.end_time.to_datetime64().astype("datetime64[s]"),
754
- ]
755
- for period in periods
756
- ],
757
- dtype="datetime64[s]",
758
- )
759
- return blocks
760
-
761
- if freq == "year":
762
- periods = pd.period_range(start=start_time, end=end_time, freq="Y")
763
- blocks = np.array(
764
- [
765
- [
766
- period.start_time.to_datetime64().astype("datetime64[s]"),
767
- period.end_time.to_datetime64().astype("datetime64[s]"),
768
- ]
769
- for period in periods
770
- ],
771
- dtype="datetime64[s]",
772
- )
773
- return blocks
774
-
775
- if freq == "quarter":
776
- periods = pd.period_range(start=start_time, end=end_time, freq="Q")
777
- blocks = np.array(
778
- [
779
- [
780
- period.start_time.to_datetime64().astype("datetime64[s]"),
781
- period.end_time.floor("s").to_datetime64().astype("datetime64[s]"),
782
- ]
783
- for period in periods
784
- ],
785
- dtype="datetime64[s]",
786
- )
787
- return blocks
788
-
789
- if freq == "season":
790
- # Fiscal quarter frequency ending in Feb → seasons DJF, MAM, JJA, SON
791
- periods = pd.period_range(start=start_time, end=end_time, freq="Q-FEB")
792
- blocks = np.array(
793
- [
794
- [
795
- period.start_time.to_datetime64().astype("datetime64[s]"),
796
- period.end_time.to_datetime64().astype("datetime64[s]"),
797
- ]
798
- for period in periods
799
- ],
800
- dtype="datetime64[s]",
801
- )
802
- return blocks
803
- raise NotImplementedError(f"Frequency '{freq}' is not implemented.")
disdrodb/utils/xarray.py CHANGED
@@ -106,6 +106,7 @@ def xr_get_last_valid_idx(da_condition, dim, fill_value=None):
106
106
  def _check_coord_handling(coord_handling):
107
107
  if coord_handling not in {"keep", "drop", "unstack"}:
108
108
  raise ValueError("coord_handling must be one of 'keep', 'drop', or 'unstack'.")
109
+ return coord_handling
109
110
 
110
111
 
111
112
  def _unstack_coordinates(xr_obj, dim, prefix, suffix):
@@ -163,6 +164,8 @@ def unstack_datarray_dimension(da, dim, coord_handling="keep", prefix="", suffix
163
164
  """
164
165
  # Retrieve DataArray name
165
166
  name = da.name
167
+ coord_handling = _check_coord_handling(coord_handling)
168
+
166
169
  # Unstack variables
167
170
  ds = da.to_dataset(dim=dim)
168
171
  rename_dict = {dim_value: f"{prefix}{name}{suffix}{dim_value}" for dim_value in list(ds.data_vars)}