disdrodb 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +4 -0
- disdrodb/_version.py +2 -2
- disdrodb/api/checks.py +70 -47
- disdrodb/api/configs.py +0 -2
- disdrodb/api/create_directories.py +0 -2
- disdrodb/api/info.py +3 -3
- disdrodb/api/io.py +48 -8
- disdrodb/api/path.py +116 -133
- disdrodb/api/search.py +12 -3
- disdrodb/cli/disdrodb_create_summary.py +113 -0
- disdrodb/cli/disdrodb_create_summary_station.py +11 -1
- disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
- disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
- disdrodb/cli/disdrodb_run_l1_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
- disdrodb/constants.py +1 -1
- disdrodb/data_transfer/download_data.py +123 -7
- disdrodb/etc/products/L1/global.yaml +1 -1
- disdrodb/etc/products/L2E/5MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +1 -1
- disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
- disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
- disdrodb/etc/products/L2M/global.yaml +11 -3
- disdrodb/issue/writer.py +2 -0
- disdrodb/l0/check_configs.py +49 -16
- disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
- disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
- disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
- disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
- disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
- disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
- disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
- disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
- disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
- disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
- disdrodb/l0/l0a_processing.py +10 -5
- disdrodb/l0/l0b_nc_processing.py +10 -6
- disdrodb/l0/l0b_processing.py +92 -72
- disdrodb/l0/l0c_processing.py +369 -251
- disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +8 -1
- disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
- disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
- disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
- disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
- disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
- disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
- disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
- disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
- disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
- disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
- disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
- disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
- disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
- disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
- disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
- disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
- disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
- disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
- disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
- disdrodb/l1/beard_model.py +31 -129
- disdrodb/l1/fall_velocity.py +156 -57
- disdrodb/l1/filters.py +25 -28
- disdrodb/l1/processing.py +12 -14
- disdrodb/l1_env/routines.py +46 -17
- disdrodb/l2/empirical_dsd.py +6 -0
- disdrodb/l2/processing.py +3 -3
- disdrodb/metadata/checks.py +132 -125
- disdrodb/metadata/geolocation.py +0 -2
- disdrodb/psd/fitting.py +180 -210
- disdrodb/psd/models.py +1 -1
- disdrodb/routines/__init__.py +54 -0
- disdrodb/{l0/routines.py → routines/l0.py} +288 -418
- disdrodb/{l1/routines.py → routines/l1.py} +60 -92
- disdrodb/{l2/routines.py → routines/l2.py} +284 -485
- disdrodb/{routines.py → routines/wrappers.py} +100 -7
- disdrodb/scattering/axis_ratio.py +95 -85
- disdrodb/scattering/permittivity.py +24 -0
- disdrodb/scattering/routines.py +56 -36
- disdrodb/summary/routines.py +147 -45
- disdrodb/utils/archiving.py +434 -0
- disdrodb/utils/attrs.py +2 -0
- disdrodb/utils/cli.py +5 -5
- disdrodb/utils/dask.py +62 -1
- disdrodb/utils/decorators.py +31 -0
- disdrodb/utils/encoding.py +10 -1
- disdrodb/{l2 → utils}/event.py +1 -66
- disdrodb/utils/logger.py +1 -1
- disdrodb/utils/manipulations.py +22 -12
- disdrodb/utils/routines.py +166 -0
- disdrodb/utils/time.py +5 -293
- disdrodb/utils/xarray.py +3 -0
- disdrodb/viz/plots.py +109 -15
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/METADATA +3 -2
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/RECORD +124 -96
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +1 -0
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0
disdrodb/{l2 → utils}/event.py
RENAMED
|
@@ -19,8 +19,7 @@
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pandas as pd
|
|
21
21
|
|
|
22
|
-
from disdrodb.
|
|
23
|
-
from disdrodb.utils.time import ensure_timedelta_seconds_interval, temporal_resolution_to_seconds
|
|
22
|
+
from disdrodb.utils.time import temporal_resolution_to_seconds
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
def group_timesteps_into_event(
|
|
@@ -229,67 +228,3 @@ def group_timesteps_into_events(timesteps, event_max_time_gap):
|
|
|
229
228
|
|
|
230
229
|
|
|
231
230
|
####-----------------------------------------------------------------------------------.
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling): # noqa: ARG001
|
|
235
|
-
"""
|
|
236
|
-
Provide information about the required files for each event.
|
|
237
|
-
|
|
238
|
-
For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
|
|
239
|
-
overlap with the event period, adjusted by the `accumulation_interval`. The event period is
|
|
240
|
-
extended backward or forward based on the `rolling` parameter.
|
|
241
|
-
|
|
242
|
-
Parameters
|
|
243
|
-
----------
|
|
244
|
-
list_partitions : list of dict
|
|
245
|
-
List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
|
|
246
|
-
keys with `numpy.datetime64` values.
|
|
247
|
-
filepaths : list of str
|
|
248
|
-
List of file paths corresponding to data files.
|
|
249
|
-
sample_interval : numpy.timedelta64 or int
|
|
250
|
-
The sample interval of the input dataset.
|
|
251
|
-
accumulation_interval : numpy.timedelta64 or int
|
|
252
|
-
Time interval to adjust the event period for accumulation. If an integer is provided, it is
|
|
253
|
-
assumed to be in seconds.
|
|
254
|
-
rolling : bool
|
|
255
|
-
If True, adjust the event period backward by `accumulation_interval` (rolling backward).
|
|
256
|
-
If False, adjust forward (aggregate forward).
|
|
257
|
-
|
|
258
|
-
Returns
|
|
259
|
-
-------
|
|
260
|
-
list of dict
|
|
261
|
-
A list where each element is a dictionary containing:
|
|
262
|
-
- 'start_time': Adjusted start time of the event (`numpy.datetime64`).
|
|
263
|
-
- 'end_time': Adjusted end time of the event (`numpy.datetime64`).
|
|
264
|
-
- 'filepaths': List of file paths overlapping with the adjusted event period.
|
|
265
|
-
|
|
266
|
-
"""
|
|
267
|
-
# Ensure sample_interval and accumulation_interval is numpy.timedelta64
|
|
268
|
-
accumulation_interval = ensure_timedelta_seconds_interval(accumulation_interval)
|
|
269
|
-
sample_interval = ensure_timedelta_seconds_interval(sample_interval)
|
|
270
|
-
|
|
271
|
-
# Retrieve file start_time and end_time
|
|
272
|
-
files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
|
|
273
|
-
|
|
274
|
-
# Retrieve information for each event
|
|
275
|
-
event_info = []
|
|
276
|
-
for event_dict in list_partitions:
|
|
277
|
-
# Retrieve event time period
|
|
278
|
-
event_start_time = event_dict["start_time"]
|
|
279
|
-
event_end_time = event_dict["end_time"]
|
|
280
|
-
|
|
281
|
-
# Adapt event_end_time if accumulation interval different from sample interval
|
|
282
|
-
if sample_interval != accumulation_interval:
|
|
283
|
-
event_end_time = event_end_time + accumulation_interval
|
|
284
|
-
|
|
285
|
-
# Derive event filepaths
|
|
286
|
-
overlaps = (files_start_time <= event_end_time) & (files_end_time >= event_start_time)
|
|
287
|
-
event_filepaths = np.array(filepaths)[overlaps].tolist()
|
|
288
|
-
|
|
289
|
-
# Create dictionary
|
|
290
|
-
if len(event_filepaths) > 0:
|
|
291
|
-
event_info.append(
|
|
292
|
-
{"start_time": event_start_time, "end_time": event_end_time, "filepaths": event_filepaths},
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
return event_info
|
disdrodb/utils/logger.py
CHANGED
|
@@ -42,7 +42,7 @@ def create_logger_file(logs_dir, filename, parallel):
|
|
|
42
42
|
format_type = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
43
43
|
handler.setFormatter(logging.Formatter(format_type))
|
|
44
44
|
logger.addHandler(handler)
|
|
45
|
-
logger.setLevel(logging.
|
|
45
|
+
logger.setLevel(logging.INFO)
|
|
46
46
|
|
|
47
47
|
# Define logger filepath
|
|
48
48
|
# - LogCaptureHandler of pytest does not have baseFilename attribute --> So set None
|
disdrodb/utils/manipulations.py
CHANGED
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
|
|
21
21
|
import numpy as np
|
|
22
22
|
|
|
23
|
+
from disdrodb.constants import DIAMETER_DIMENSION
|
|
23
24
|
from disdrodb.utils.xarray import unstack_datarray_dimension
|
|
24
25
|
|
|
25
26
|
|
|
@@ -53,19 +54,28 @@ def unstack_radar_variables(ds):
|
|
|
53
54
|
return ds
|
|
54
55
|
|
|
55
56
|
|
|
56
|
-
def
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
def get_diameter_coords_dict_from_bin_edges(diameter_bin_edges):
|
|
58
|
+
"""Get dictionary with all relevant diameter coordinates."""
|
|
59
|
+
if np.size(diameter_bin_edges) < 2:
|
|
60
|
+
raise ValueError("Expecting at least 2 values defining bin edges.")
|
|
61
|
+
diameter_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
|
|
61
62
|
diameter_bin_width = np.diff(diameter_bin_edges)
|
|
62
63
|
diameter_bin_lower = diameter_bin_edges[:-1]
|
|
63
64
|
diameter_bin_upper = diameter_bin_edges[1:]
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
65
|
+
coords_dict = {
|
|
66
|
+
"diameter_bin_center": (DIAMETER_DIMENSION, diameter_bin_center),
|
|
67
|
+
"diameter_bin_width": (DIAMETER_DIMENSION, diameter_bin_width),
|
|
68
|
+
"diameter_bin_lower": (DIAMETER_DIMENSION, diameter_bin_lower),
|
|
69
|
+
"diameter_bin_upper": (DIAMETER_DIMENSION, diameter_bin_upper),
|
|
70
|
+
}
|
|
71
|
+
return coords_dict
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def resample_drop_number_concentration(drop_number_concentration, diameter_bin_edges, method="linear"):
|
|
75
|
+
"""Resample drop number concentration N(D) DataArray to high resolution diameter bins."""
|
|
76
|
+
diameters_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
|
|
77
|
+
|
|
78
|
+
da = drop_number_concentration.interp(coords={"diameter_bin_center": diameters_bin_center}, method=method)
|
|
79
|
+
coords_dict = get_diameter_coords_dict_from_bin_edges(diameter_bin_edges)
|
|
80
|
+
da = da.assign_coords(coords_dict)
|
|
71
81
|
return da
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# -----------------------------------------------------------------------------.
|
|
4
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
5
|
+
#
|
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
# (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
# GNU General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU General Public License
|
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
|
+
# -----------------------------------------------------------------------------.
|
|
19
|
+
"""Utilities for DISDRODB processing routines."""
|
|
20
|
+
import os
|
|
21
|
+
import shutil
|
|
22
|
+
import tempfile
|
|
23
|
+
|
|
24
|
+
from disdrodb.api.io import find_files
|
|
25
|
+
from disdrodb.api.path import define_file_folder_path, define_temporal_resolution
|
|
26
|
+
from disdrodb.utils.logger import (
|
|
27
|
+
close_logger,
|
|
28
|
+
create_logger_file,
|
|
29
|
+
log_error,
|
|
30
|
+
log_info,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def is_possible_product(accumulation_interval, sample_interval, rolling):
|
|
35
|
+
"""Assess if production is possible given the requested accumulation interval and source sample_interval."""
|
|
36
|
+
# Avoid rolling product generation at source sample interval
|
|
37
|
+
if rolling and accumulation_interval == sample_interval:
|
|
38
|
+
return False
|
|
39
|
+
# Avoid product generation if the accumulation_interval is less than the sample interval
|
|
40
|
+
if accumulation_interval < sample_interval:
|
|
41
|
+
return False
|
|
42
|
+
# Avoid producti generation if accumulation_interval is not multiple of sample_interval
|
|
43
|
+
return accumulation_interval % sample_interval == 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def try_get_required_filepaths(
|
|
47
|
+
product,
|
|
48
|
+
data_archive_dir,
|
|
49
|
+
data_source,
|
|
50
|
+
campaign_name,
|
|
51
|
+
station_name,
|
|
52
|
+
debugging_mode,
|
|
53
|
+
**product_kwargs,
|
|
54
|
+
):
|
|
55
|
+
"""Try to retrieve required filepaths for a product, or return None if unavailable."""
|
|
56
|
+
try:
|
|
57
|
+
filepaths = find_files(
|
|
58
|
+
data_archive_dir=data_archive_dir,
|
|
59
|
+
data_source=data_source,
|
|
60
|
+
campaign_name=campaign_name,
|
|
61
|
+
station_name=station_name,
|
|
62
|
+
product=product,
|
|
63
|
+
debugging_mode=debugging_mode,
|
|
64
|
+
**product_kwargs,
|
|
65
|
+
)
|
|
66
|
+
return filepaths
|
|
67
|
+
# If no files available, print informative message
|
|
68
|
+
except Exception as e:
|
|
69
|
+
temporal_resolution = ""
|
|
70
|
+
if "sample_interval" in product_kwargs and "rolling" in product_kwargs:
|
|
71
|
+
temporal_resolution = define_temporal_resolution(
|
|
72
|
+
seconds=product_kwargs["sample_interval"],
|
|
73
|
+
rolling=product_kwargs["rolling"],
|
|
74
|
+
)
|
|
75
|
+
print(str(e))
|
|
76
|
+
msg = (
|
|
77
|
+
f"{product} processing of {data_source} {campaign_name} {station_name} "
|
|
78
|
+
f"has not been launched because of missing {product} {temporal_resolution} data."
|
|
79
|
+
)
|
|
80
|
+
print(msg)
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def run_product_generation(
|
|
85
|
+
product: str,
|
|
86
|
+
logs_dir: str,
|
|
87
|
+
logs_filename: str,
|
|
88
|
+
parallel: bool,
|
|
89
|
+
verbose: bool,
|
|
90
|
+
folder_partitioning: str,
|
|
91
|
+
core_func: callable,
|
|
92
|
+
core_func_kwargs: dict,
|
|
93
|
+
pass_logger=False,
|
|
94
|
+
):
|
|
95
|
+
"""
|
|
96
|
+
Generic wrapper for DISDRODB product generation.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
product : str
|
|
101
|
+
Product name (e.g., "L0A", "L0B", ...).
|
|
102
|
+
|
|
103
|
+
logs_dir : str
|
|
104
|
+
Logs directory.
|
|
105
|
+
logs_filename : str
|
|
106
|
+
Logs filename.
|
|
107
|
+
parallel : bool
|
|
108
|
+
Parallel flag (for logger).
|
|
109
|
+
verbose : bool
|
|
110
|
+
Verbose logging flag.
|
|
111
|
+
folder_partitioning : str
|
|
112
|
+
Partitioning scheme.
|
|
113
|
+
core_func : callable
|
|
114
|
+
Function with signature `core_func(logger)` that does the product-specific work.
|
|
115
|
+
Must return an xarray.Dataset or pandas.DataFrame (used to determine log subdir).
|
|
116
|
+
"""
|
|
117
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
118
|
+
# Initialize log file
|
|
119
|
+
logger, tmp_logger_filepath = create_logger_file(
|
|
120
|
+
logs_dir=tmpdir,
|
|
121
|
+
filename=logs_filename,
|
|
122
|
+
parallel=parallel,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Inform that product creation has started
|
|
126
|
+
log_info(logger, f"{product} processing of {logs_filename} has started.", verbose=verbose)
|
|
127
|
+
|
|
128
|
+
# Initialize object
|
|
129
|
+
obj = None # if None, means the product creation failed
|
|
130
|
+
|
|
131
|
+
# Add logger to core_func_kwargs if specified
|
|
132
|
+
if pass_logger:
|
|
133
|
+
core_func_kwargs["logger"] = logger
|
|
134
|
+
|
|
135
|
+
# Try product creation
|
|
136
|
+
try:
|
|
137
|
+
# Run product creation
|
|
138
|
+
obj = core_func(**core_func_kwargs)
|
|
139
|
+
|
|
140
|
+
# Inform that product creation has ended
|
|
141
|
+
log_info(logger, f"{product} processing of {logs_filename} has ended.", verbose=verbose)
|
|
142
|
+
|
|
143
|
+
# Report error if the case
|
|
144
|
+
except Exception as e:
|
|
145
|
+
log_error(logger, f"{type(e).__name__}: {e}", verbose=verbose)
|
|
146
|
+
|
|
147
|
+
finally:
|
|
148
|
+
# Close logger
|
|
149
|
+
close_logger(logger)
|
|
150
|
+
|
|
151
|
+
# Move log file to final logs directory
|
|
152
|
+
success_flag = obj is not None
|
|
153
|
+
if success_flag: # and "time" in obj and len(obj["time"]) > 0:
|
|
154
|
+
logs_dir = define_file_folder_path(obj, dir_path=logs_dir, folder_partitioning=folder_partitioning)
|
|
155
|
+
os.makedirs(logs_dir, exist_ok=True)
|
|
156
|
+
if tmp_logger_filepath is not None: # (when running pytest, tmp_logger_filepath is None)
|
|
157
|
+
logger_filepath = os.path.join(logs_dir, os.path.basename(tmp_logger_filepath))
|
|
158
|
+
shutil.move(tmp_logger_filepath, logger_filepath)
|
|
159
|
+
else:
|
|
160
|
+
logger_filepath = None
|
|
161
|
+
|
|
162
|
+
# Free memory
|
|
163
|
+
del obj
|
|
164
|
+
|
|
165
|
+
# Return logger filepath
|
|
166
|
+
return logger_filepath
|
disdrodb/utils/time.py
CHANGED
|
@@ -29,6 +29,7 @@ from disdrodb.utils.xarray import define_fill_value_dictionary
|
|
|
29
29
|
|
|
30
30
|
logger = logging.getLogger(__name__)
|
|
31
31
|
|
|
32
|
+
|
|
32
33
|
####------------------------------------------------------------------------------------.
|
|
33
34
|
#### Sampling Interval Acronyms
|
|
34
35
|
|
|
@@ -61,7 +62,7 @@ def seconds_to_temporal_resolution(seconds):
|
|
|
61
62
|
return temporal_resolution
|
|
62
63
|
|
|
63
64
|
|
|
64
|
-
def
|
|
65
|
+
def get_sampling_information(temporal_resolution):
|
|
65
66
|
"""
|
|
66
67
|
Extract resampling information from the temporal_resolution string.
|
|
67
68
|
|
|
@@ -126,7 +127,7 @@ def temporal_resolution_to_seconds(temporal_resolution):
|
|
|
126
127
|
seconds
|
|
127
128
|
Duration in seconds.
|
|
128
129
|
"""
|
|
129
|
-
seconds, _ =
|
|
130
|
+
seconds, _ = get_sampling_information(temporal_resolution)
|
|
130
131
|
return seconds
|
|
131
132
|
|
|
132
133
|
|
|
@@ -401,8 +402,8 @@ def ensure_sample_interval_in_seconds(sample_interval): # noqa: PLR0911
|
|
|
401
402
|
)
|
|
402
403
|
|
|
403
404
|
|
|
404
|
-
def
|
|
405
|
-
"""Return
|
|
405
|
+
def ensure_timedelta_seconds(interval):
|
|
406
|
+
"""Return an a scalar value/array in seconds or timedelta object as numpy.timedelta64 in seconds."""
|
|
406
407
|
if isinstance(interval, (xr.DataArray, np.ndarray)):
|
|
407
408
|
return ensure_sample_interval_in_seconds(interval).astype("m8[s]")
|
|
408
409
|
return np.array(ensure_sample_interval_in_seconds(interval), dtype="m8[s]")
|
|
@@ -512,292 +513,3 @@ def infer_sample_interval(ds, robust=False, verbose=False, logger=None):
|
|
|
512
513
|
)
|
|
513
514
|
log_warning(logger=logger, msg=msg, verbose=verbose)
|
|
514
515
|
return int(sample_interval)
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
####---------------------------------------------------------------------------------
|
|
518
|
-
#### Timesteps regularization
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
def get_problematic_timestep_indices(timesteps, sample_interval):
|
|
522
|
-
"""Identify timesteps with missing previous or following timesteps."""
|
|
523
|
-
previous_time = timesteps - pd.Timedelta(seconds=sample_interval)
|
|
524
|
-
next_time = timesteps + pd.Timedelta(seconds=sample_interval)
|
|
525
|
-
idx_previous_missing = np.where(~np.isin(previous_time, timesteps))[0][1:]
|
|
526
|
-
idx_next_missing = np.where(~np.isin(next_time, timesteps))[0][:-1]
|
|
527
|
-
idx_isolated_missing = np.intersect1d(idx_previous_missing, idx_next_missing)
|
|
528
|
-
idx_previous_missing = idx_previous_missing[np.isin(idx_previous_missing, idx_isolated_missing, invert=True)]
|
|
529
|
-
idx_next_missing = idx_next_missing[np.isin(idx_next_missing, idx_isolated_missing, invert=True)]
|
|
530
|
-
return idx_previous_missing, idx_next_missing, idx_isolated_missing
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=True, logger=None, verbose=True):
|
|
534
|
-
"""Ensure timesteps match with the sample_interval.
|
|
535
|
-
|
|
536
|
-
This function:
|
|
537
|
-
- drop dataset indices with duplicated timesteps,
|
|
538
|
-
- but does not add missing timesteps to the dataset.
|
|
539
|
-
"""
|
|
540
|
-
# Check sorted by time and sort if necessary
|
|
541
|
-
ds = ensure_sorted_by_time(ds)
|
|
542
|
-
|
|
543
|
-
# Convert time to pandas.DatetimeIndex for easier manipulation
|
|
544
|
-
times = pd.to_datetime(ds["time"].to_numpy())
|
|
545
|
-
|
|
546
|
-
# Determine the start and end times
|
|
547
|
-
start_time = times[0].floor(f"{sample_interval}s")
|
|
548
|
-
end_time = times[-1].ceil(f"{sample_interval}s")
|
|
549
|
-
|
|
550
|
-
# Create the expected time grid
|
|
551
|
-
expected_times = pd.date_range(start=start_time, end=end_time, freq=f"{sample_interval}s")
|
|
552
|
-
|
|
553
|
-
# Convert to numpy arrays
|
|
554
|
-
times = times.to_numpy(dtype="M8[s]")
|
|
555
|
-
expected_times = expected_times.to_numpy(dtype="M8[s]")
|
|
556
|
-
|
|
557
|
-
# Map original times to the nearest expected times
|
|
558
|
-
# Calculate the difference between original times and expected times
|
|
559
|
-
time_deltas = np.abs(times - expected_times[:, None]).astype(int)
|
|
560
|
-
|
|
561
|
-
# Find the index of the closest expected time for each original time
|
|
562
|
-
nearest_indices = np.argmin(time_deltas, axis=0)
|
|
563
|
-
adjusted_times = expected_times[nearest_indices]
|
|
564
|
-
|
|
565
|
-
# Check for duplicates in adjusted times
|
|
566
|
-
unique_times, counts = np.unique(adjusted_times, return_counts=True)
|
|
567
|
-
duplicates = unique_times[counts > 1]
|
|
568
|
-
|
|
569
|
-
# Initialize time quality flag
|
|
570
|
-
# - 0 when ok or just rounded to closest 00
|
|
571
|
-
# - 1 if previous timestep is missing
|
|
572
|
-
# - 2 if next timestep is missing
|
|
573
|
-
# - 3 if previous and next timestep is missing
|
|
574
|
-
# - 4 if solved duplicated timesteps
|
|
575
|
-
# - 5 if needed to drop duplicated timesteps and select the last
|
|
576
|
-
flag_previous_missing = 1
|
|
577
|
-
flag_next_missing = 2
|
|
578
|
-
flag_isolated_timestep = 3
|
|
579
|
-
flag_solved_duplicated_timestep = 4
|
|
580
|
-
flag_dropped_duplicated_timestep = 5
|
|
581
|
-
qc_flag = np.zeros(adjusted_times.shape)
|
|
582
|
-
|
|
583
|
-
# Initialize list with the duplicated timesteps index to drop
|
|
584
|
-
# - We drop the first occurrence because is likely the shortest interval
|
|
585
|
-
idx_to_drop = []
|
|
586
|
-
|
|
587
|
-
# Attempt to resolve for duplicates
|
|
588
|
-
if duplicates.size > 0:
|
|
589
|
-
# Handle duplicates
|
|
590
|
-
for dup_time in duplicates:
|
|
591
|
-
# Indices of duplicates
|
|
592
|
-
dup_indices = np.where(adjusted_times == dup_time)[0]
|
|
593
|
-
n_duplicates = len(dup_indices)
|
|
594
|
-
# Define previous and following timestep
|
|
595
|
-
prev_time = dup_time - pd.Timedelta(seconds=sample_interval)
|
|
596
|
-
next_time = dup_time + pd.Timedelta(seconds=sample_interval)
|
|
597
|
-
# Try to find missing slots before and after
|
|
598
|
-
# - If more than 3 duplicates, impossible to solve !
|
|
599
|
-
count_solved = 0
|
|
600
|
-
# If the previous timestep is available, set that one
|
|
601
|
-
if n_duplicates == 2:
|
|
602
|
-
if prev_time not in adjusted_times:
|
|
603
|
-
adjusted_times[dup_indices[0]] = prev_time
|
|
604
|
-
qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
|
|
605
|
-
count_solved += 1
|
|
606
|
-
elif next_time not in adjusted_times:
|
|
607
|
-
adjusted_times[dup_indices[-1]] = next_time
|
|
608
|
-
qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
|
|
609
|
-
count_solved += 1
|
|
610
|
-
else:
|
|
611
|
-
pass
|
|
612
|
-
elif n_duplicates == 3:
|
|
613
|
-
if prev_time not in adjusted_times:
|
|
614
|
-
adjusted_times[dup_indices[0]] = prev_time
|
|
615
|
-
qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
|
|
616
|
-
count_solved += 1
|
|
617
|
-
if next_time not in adjusted_times:
|
|
618
|
-
adjusted_times[dup_indices[-1]] = next_time
|
|
619
|
-
qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
|
|
620
|
-
count_solved += 1
|
|
621
|
-
if count_solved != n_duplicates - 1:
|
|
622
|
-
idx_to_drop = np.append(idx_to_drop, dup_indices[0:-1])
|
|
623
|
-
qc_flag[dup_indices[-1]] = flag_dropped_duplicated_timestep
|
|
624
|
-
msg = (
|
|
625
|
-
f"Cannot resolve {n_duplicates} duplicated timesteps "
|
|
626
|
-
f"(after trailing seconds correction) around {dup_time}."
|
|
627
|
-
)
|
|
628
|
-
log_warning(logger=logger, msg=msg, verbose=verbose)
|
|
629
|
-
if robust:
|
|
630
|
-
raise ValueError(msg)
|
|
631
|
-
|
|
632
|
-
# Update the time coordinate (Convert to ns for xarray compatibility)
|
|
633
|
-
ds = ds.assign_coords({"time": adjusted_times.astype("datetime64[ns]")})
|
|
634
|
-
|
|
635
|
-
# Update quality flag values for next and previous timestep is missing
|
|
636
|
-
if add_quality_flag:
|
|
637
|
-
idx_previous_missing, idx_next_missing, idx_isolated_missing = get_problematic_timestep_indices(
|
|
638
|
-
adjusted_times,
|
|
639
|
-
sample_interval,
|
|
640
|
-
)
|
|
641
|
-
qc_flag[idx_previous_missing] = np.maximum(qc_flag[idx_previous_missing], flag_previous_missing)
|
|
642
|
-
qc_flag[idx_next_missing] = np.maximum(qc_flag[idx_next_missing], flag_next_missing)
|
|
643
|
-
qc_flag[idx_isolated_missing] = np.maximum(qc_flag[idx_isolated_missing], flag_isolated_timestep)
|
|
644
|
-
|
|
645
|
-
# If the first timestep is at 00:00 and currently flagged as previous missing (1), reset to 0
|
|
646
|
-
# first_time = pd.to_datetime(adjusted_times[0]).time()
|
|
647
|
-
# first_expected_time = pd.Timestamp("00:00:00").time()
|
|
648
|
-
# if first_time == first_expected_time and qc_flag[0] == flag_previous_missing:
|
|
649
|
-
# qc_flag[0] = 0
|
|
650
|
-
|
|
651
|
-
# # If the last timestep is flagged and currently flagged as next missing (2), reset it to 0
|
|
652
|
-
# last_time = pd.to_datetime(adjusted_times[-1]).time()
|
|
653
|
-
# last_time_expected = (pd.Timestamp("00:00:00") - pd.Timedelta(30, unit="seconds")).time()
|
|
654
|
-
# # Check if adding one interval would go beyond the end_time
|
|
655
|
-
# if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
|
|
656
|
-
# qc_flag[-1] = 0
|
|
657
|
-
|
|
658
|
-
# Assign time quality flag coordinate
|
|
659
|
-
ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
|
|
660
|
-
ds = ds.set_coords("time_qc")
|
|
661
|
-
|
|
662
|
-
# Drop duplicated timesteps
|
|
663
|
-
# - Using ds = ds.drop_isel({"time": idx_to_drop.astype(int)}) raise:
|
|
664
|
-
# --> pandas.errors.InvalidIndexError: Reindexing only valid with uniquely valued Index objects
|
|
665
|
-
# --> https://github.com/pydata/xarray/issues/6605
|
|
666
|
-
if len(idx_to_drop) > 0:
|
|
667
|
-
idx_to_drop = idx_to_drop.astype(int)
|
|
668
|
-
idx_valid_timesteps = np.arange(0, ds["time"].size)
|
|
669
|
-
idx_valid_timesteps = np.delete(idx_valid_timesteps, idx_to_drop)
|
|
670
|
-
ds = ds.isel(time=idx_valid_timesteps)
|
|
671
|
-
# Return dataset
|
|
672
|
-
return ds
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
####---------------------------------------------------------------------------------
|
|
676
|
-
#### Time blocks
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
def check_freq(freq: str) -> None:
|
|
680
|
-
"""Check validity of freq argument."""
|
|
681
|
-
valid_freq = ["none", "year", "season", "quarter", "month", "day", "hour"]
|
|
682
|
-
if not isinstance(freq, str):
|
|
683
|
-
raise TypeError("'freq' must be a string.")
|
|
684
|
-
if freq not in valid_freq:
|
|
685
|
-
raise ValueError(
|
|
686
|
-
f"'freq' '{freq}' is not possible. Must be one of: {valid_freq}.",
|
|
687
|
-
)
|
|
688
|
-
return freq
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
def generate_time_blocks(start_time: np.datetime64, end_time: np.datetime64, freq: str) -> np.ndarray: # noqa: PLR0911
|
|
692
|
-
"""Generate time blocks between `start_time` and `end_time` for a given frequency.
|
|
693
|
-
|
|
694
|
-
Parameters
|
|
695
|
-
----------
|
|
696
|
-
start_time : numpy.datetime64
|
|
697
|
-
Inclusive start of the overall time range.
|
|
698
|
-
end_time : numpy.datetime64
|
|
699
|
-
Inclusive end of the overall time range.
|
|
700
|
-
freq : str
|
|
701
|
-
Frequency specifier. Accepted values are:
|
|
702
|
-
- 'none' : return a single block [start_time, end_time]
|
|
703
|
-
- 'day' : split into daily blocks
|
|
704
|
-
- 'month' : split into calendar months
|
|
705
|
-
- 'quarter' : split into calendar quarters
|
|
706
|
-
- 'year' : split into calendar years
|
|
707
|
-
- 'season' : split into meteorological seasons (MAM, JJA, SON, DJF)
|
|
708
|
-
|
|
709
|
-
Returns
|
|
710
|
-
-------
|
|
711
|
-
numpy.ndarray
|
|
712
|
-
Array of shape (n, 2) with dtype datetime64[s], where each row is [block_start, block_end].
|
|
713
|
-
|
|
714
|
-
"""
|
|
715
|
-
freq = check_freq(freq)
|
|
716
|
-
if freq == "none":
|
|
717
|
-
return np.array([[start_time, end_time]], dtype="datetime64[s]")
|
|
718
|
-
|
|
719
|
-
if freq == "hour":
|
|
720
|
-
periods = pd.period_range(start=start_time, end=end_time, freq="h")
|
|
721
|
-
blocks = np.array(
|
|
722
|
-
[
|
|
723
|
-
[
|
|
724
|
-
period.start_time.to_datetime64().astype("datetime64[s]"),
|
|
725
|
-
period.end_time.to_datetime64().astype("datetime64[s]"),
|
|
726
|
-
]
|
|
727
|
-
for period in periods
|
|
728
|
-
],
|
|
729
|
-
dtype="datetime64[s]",
|
|
730
|
-
)
|
|
731
|
-
return blocks
|
|
732
|
-
|
|
733
|
-
if freq == "day":
|
|
734
|
-
periods = pd.period_range(start=start_time, end=end_time, freq="d")
|
|
735
|
-
blocks = np.array(
|
|
736
|
-
[
|
|
737
|
-
[
|
|
738
|
-
period.start_time.to_datetime64().astype("datetime64[s]"),
|
|
739
|
-
period.end_time.to_datetime64().astype("datetime64[s]"),
|
|
740
|
-
]
|
|
741
|
-
for period in periods
|
|
742
|
-
],
|
|
743
|
-
dtype="datetime64[s]",
|
|
744
|
-
)
|
|
745
|
-
return blocks
|
|
746
|
-
|
|
747
|
-
if freq == "month":
|
|
748
|
-
periods = pd.period_range(start=start_time, end=end_time, freq="M")
|
|
749
|
-
blocks = np.array(
|
|
750
|
-
[
|
|
751
|
-
[
|
|
752
|
-
period.start_time.to_datetime64().astype("datetime64[s]"),
|
|
753
|
-
period.end_time.to_datetime64().astype("datetime64[s]"),
|
|
754
|
-
]
|
|
755
|
-
for period in periods
|
|
756
|
-
],
|
|
757
|
-
dtype="datetime64[s]",
|
|
758
|
-
)
|
|
759
|
-
return blocks
|
|
760
|
-
|
|
761
|
-
if freq == "year":
|
|
762
|
-
periods = pd.period_range(start=start_time, end=end_time, freq="Y")
|
|
763
|
-
blocks = np.array(
|
|
764
|
-
[
|
|
765
|
-
[
|
|
766
|
-
period.start_time.to_datetime64().astype("datetime64[s]"),
|
|
767
|
-
period.end_time.to_datetime64().astype("datetime64[s]"),
|
|
768
|
-
]
|
|
769
|
-
for period in periods
|
|
770
|
-
],
|
|
771
|
-
dtype="datetime64[s]",
|
|
772
|
-
)
|
|
773
|
-
return blocks
|
|
774
|
-
|
|
775
|
-
if freq == "quarter":
|
|
776
|
-
periods = pd.period_range(start=start_time, end=end_time, freq="Q")
|
|
777
|
-
blocks = np.array(
|
|
778
|
-
[
|
|
779
|
-
[
|
|
780
|
-
period.start_time.to_datetime64().astype("datetime64[s]"),
|
|
781
|
-
period.end_time.floor("s").to_datetime64().astype("datetime64[s]"),
|
|
782
|
-
]
|
|
783
|
-
for period in periods
|
|
784
|
-
],
|
|
785
|
-
dtype="datetime64[s]",
|
|
786
|
-
)
|
|
787
|
-
return blocks
|
|
788
|
-
|
|
789
|
-
if freq == "season":
|
|
790
|
-
# Fiscal quarter frequency ending in Feb → seasons DJF, MAM, JJA, SON
|
|
791
|
-
periods = pd.period_range(start=start_time, end=end_time, freq="Q-FEB")
|
|
792
|
-
blocks = np.array(
|
|
793
|
-
[
|
|
794
|
-
[
|
|
795
|
-
period.start_time.to_datetime64().astype("datetime64[s]"),
|
|
796
|
-
period.end_time.to_datetime64().astype("datetime64[s]"),
|
|
797
|
-
]
|
|
798
|
-
for period in periods
|
|
799
|
-
],
|
|
800
|
-
dtype="datetime64[s]",
|
|
801
|
-
)
|
|
802
|
-
return blocks
|
|
803
|
-
raise NotImplementedError(f"Frequency '{freq}' is not implemented.")
|
disdrodb/utils/xarray.py
CHANGED
|
@@ -106,6 +106,7 @@ def xr_get_last_valid_idx(da_condition, dim, fill_value=None):
|
|
|
106
106
|
def _check_coord_handling(coord_handling):
|
|
107
107
|
if coord_handling not in {"keep", "drop", "unstack"}:
|
|
108
108
|
raise ValueError("coord_handling must be one of 'keep', 'drop', or 'unstack'.")
|
|
109
|
+
return coord_handling
|
|
109
110
|
|
|
110
111
|
|
|
111
112
|
def _unstack_coordinates(xr_obj, dim, prefix, suffix):
|
|
@@ -163,6 +164,8 @@ def unstack_datarray_dimension(da, dim, coord_handling="keep", prefix="", suffix
|
|
|
163
164
|
"""
|
|
164
165
|
# Retrieve DataArray name
|
|
165
166
|
name = da.name
|
|
167
|
+
coord_handling = _check_coord_handling(coord_handling)
|
|
168
|
+
|
|
166
169
|
# Unstack variables
|
|
167
170
|
ds = da.to_dataset(dim=dim)
|
|
168
171
|
rename_dict = {dim_value: f"{prefix}{name}{suffix}{dim_value}" for dim_value in list(ds.data_vars)}
|