disdrodb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +64 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +139 -9
- disdrodb/api/configs.py +4 -2
- disdrodb/api/info.py +10 -10
- disdrodb/api/io.py +237 -18
- disdrodb/api/path.py +81 -75
- disdrodb/api/search.py +6 -6
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
- disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +145 -14
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +44 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +41 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +30 -30
- disdrodb/l0/l0b_nc_processing.py +108 -2
- disdrodb/l0/l0b_processing.py +4 -4
- disdrodb/l0/l0c_processing.py +5 -13
- disdrodb/l0/manuals/SWS250.pdf +0 -0
- disdrodb/l0/manuals/VPF730.pdf +0 -0
- disdrodb/l0/manuals/VPF750.pdf +0 -0
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +105 -0
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +128 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/{PARSIVEL → PARSIVEL2}/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → NCAR/FARM_PARSIVEL2.py} +43 -70
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +29 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +31 -14
- disdrodb/l0/routines.py +105 -14
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +45 -44
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1/routines.py +35 -42
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/event.py +27 -120
- disdrodb/l2/processing.py +267 -116
- disdrodb/l2/routines.py +618 -254
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +463 -144
- disdrodb/psd/models.py +8 -5
- disdrodb/routines.py +3 -3
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +56 -36
- disdrodb/scattering/permittivity.py +486 -0
- disdrodb/scattering/routines.py +701 -159
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4120 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +59 -8
- disdrodb/utils/dataframe.py +63 -9
- disdrodb/utils/directories.py +49 -17
- disdrodb/utils/encoding.py +33 -19
- disdrodb/utils/logger.py +13 -6
- disdrodb/utils/manipulations.py +71 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +165 -19
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +85 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +327 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/RECORD +127 -87
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
- disdrodb/l1/encoding_attrs.py +0 -635
- disdrodb/l2/processing_options.py +0 -213
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
disdrodb/l2/empirical_dsd.py
CHANGED
|
@@ -23,8 +23,8 @@ Infinite values should be removed beforehand or otherwise are propagated through
|
|
|
23
23
|
import numpy as np
|
|
24
24
|
import xarray as xr
|
|
25
25
|
|
|
26
|
-
from disdrodb import DIAMETER_DIMENSION, VELOCITY_DIMENSION
|
|
27
26
|
from disdrodb.api.checks import check_sensor_name
|
|
27
|
+
from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
|
|
28
28
|
from disdrodb.utils.xarray import (
|
|
29
29
|
remove_diameter_coordinates,
|
|
30
30
|
remove_velocity_coordinates,
|
|
@@ -66,7 +66,7 @@ def get_drop_average_velocity(drop_number):
|
|
|
66
66
|
----------
|
|
67
67
|
drop_number : xarray.DataArray
|
|
68
68
|
Array of drop counts \\( n(D,v) \\) per diameter (and velocity, if available) bins
|
|
69
|
-
over the
|
|
69
|
+
over the measurement interval.
|
|
70
70
|
The DataArray must have the ``velocity_bin_center`` coordinate.
|
|
71
71
|
|
|
72
72
|
Returns
|
|
@@ -80,6 +80,7 @@ def get_drop_average_velocity(drop_number):
|
|
|
80
80
|
dim=VELOCITY_DIMENSION,
|
|
81
81
|
skipna=False,
|
|
82
82
|
)
|
|
83
|
+
average_velocity.name = "average_velocity"
|
|
83
84
|
return average_velocity
|
|
84
85
|
|
|
85
86
|
|
|
@@ -138,6 +139,9 @@ def _compute_qc_bins_metrics(arr):
|
|
|
138
139
|
return output
|
|
139
140
|
|
|
140
141
|
|
|
142
|
+
BINS_METRICS = ["Nbins", "Nbins_missing", "Nbins_missing_fraction", "Nbins_missing_consecutive"]
|
|
143
|
+
|
|
144
|
+
|
|
141
145
|
def compute_qc_bins_metrics(ds):
|
|
142
146
|
"""
|
|
143
147
|
Compute quality-control metrics for drop-count bins along the diameter dimension.
|
|
@@ -191,11 +195,19 @@ def compute_qc_bins_metrics(ds):
|
|
|
191
195
|
)
|
|
192
196
|
|
|
193
197
|
# Assign meaningful labels to the qc 'metric' dimension
|
|
194
|
-
|
|
195
|
-
ds_qc_bins = da_qc_bins.assign_coords(metric=variables).to_dataset(dim="metric")
|
|
198
|
+
ds_qc_bins = da_qc_bins.assign_coords(metric=BINS_METRICS).to_dataset(dim="metric")
|
|
196
199
|
return ds_qc_bins
|
|
197
200
|
|
|
198
201
|
|
|
202
|
+
def add_bins_metrics(ds):
|
|
203
|
+
"""Add bin metrics if missing."""
|
|
204
|
+
bins_metrics = BINS_METRICS
|
|
205
|
+
if not np.all(np.isin(bins_metrics, list(ds.data_vars))):
|
|
206
|
+
# Add bins statistics
|
|
207
|
+
ds.update(compute_qc_bins_metrics(ds))
|
|
208
|
+
return ds
|
|
209
|
+
|
|
210
|
+
|
|
199
211
|
####-------------------------------------------------------------------------------------------------------------------.
|
|
200
212
|
#### DSD Spectrum, Concentration, Moments
|
|
201
213
|
|
|
@@ -252,7 +264,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
|
|
|
252
264
|
Width of each diameter bin \\( \\Delta D \\) in millimeters (mm).
|
|
253
265
|
drop_number : xarray.DataArray
|
|
254
266
|
Array of drop counts \\( n(D) or n(D,v) \\) per diameter (and velocity if available)
|
|
255
|
-
bins over the
|
|
267
|
+
bins over the measurement interval.
|
|
256
268
|
sample_interval : float or xarray.DataArray
|
|
257
269
|
Time over which the drops are counted \\( \\Delta t \\) in seconds (s).
|
|
258
270
|
sampling_area : float or xarray.DataArray
|
|
@@ -277,7 +289,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
|
|
|
277
289
|
- \\( n(D,v) \\): Number of drops counted in diameter (and velocity) bins.
|
|
278
290
|
- \\( A_{\text{eff}}(D) \\): Effective sampling area of the sensor for diameter \\( D \\) in square meters (m²).
|
|
279
291
|
- \\( \\Delta D \\): Diameter bin width in millimeters (mm).
|
|
280
|
-
- \\( \\Delta t \\):
|
|
292
|
+
- \\( \\Delta t \\): Measurement interval in seconds (s).
|
|
281
293
|
- \\( v(D) \\): Fall velocity of drops in diameter bin \\( D \\) in meters per second (m/s).
|
|
282
294
|
|
|
283
295
|
The effective sampling area \\( A_{\text{eff}}(D) \\) depends on the sensor and may vary with drop diameter.
|
|
@@ -919,8 +931,7 @@ def get_min_max_diameter(drop_counts):
|
|
|
919
931
|
return min_drop_diameter, max_drop_diameter
|
|
920
932
|
|
|
921
933
|
|
|
922
|
-
def
|
|
923
|
-
"""Get raindrop diameter with highest occurrence."""
|
|
934
|
+
def _get_mode_diameter(drop_number_concentration, diameter):
|
|
924
935
|
# If all NaN, set to 0 otherwise argmax fail when all NaN data
|
|
925
936
|
idx_all_nan_mask = np.isnan(drop_number_concentration).all(dim=DIAMETER_DIMENSION)
|
|
926
937
|
drop_number_concentration = drop_number_concentration.where(~idx_all_nan_mask, 0)
|
|
@@ -939,6 +950,43 @@ def get_mode_diameter(drop_number_concentration, diameter):
|
|
|
939
950
|
return diameter_mode
|
|
940
951
|
|
|
941
952
|
|
|
953
|
+
def get_mode_diameter(
|
|
954
|
+
drop_number_concentration,
|
|
955
|
+
diameter,
|
|
956
|
+
):
|
|
957
|
+
"""Get raindrop diameter with highest occurrence.
|
|
958
|
+
|
|
959
|
+
Parameters
|
|
960
|
+
----------
|
|
961
|
+
drop_number_concentration : xarray.DataArray
|
|
962
|
+
The drop number concentration N(D) for each diameter bin, typically in units of
|
|
963
|
+
number per cubic meter per millimeter (m⁻³·mm⁻¹).
|
|
964
|
+
diameter : xarray.DataArray
|
|
965
|
+
The equivalent volume diameters D of the drops in each bin, in meters (m).
|
|
966
|
+
|
|
967
|
+
Returns
|
|
968
|
+
-------
|
|
969
|
+
xarray.DataArray
|
|
970
|
+
The diameter with the highest drop number concentration.
|
|
971
|
+
"""
|
|
972
|
+
# Use map_blocks if working with Dask arrays
|
|
973
|
+
if hasattr(drop_number_concentration.data, "chunks"):
|
|
974
|
+
# Define the template for output
|
|
975
|
+
template = remove_diameter_coordinates(drop_number_concentration.isel({DIAMETER_DIMENSION: 0}))
|
|
976
|
+
diameter_mode = xr.map_blocks(
|
|
977
|
+
_get_mode_diameter,
|
|
978
|
+
drop_number_concentration,
|
|
979
|
+
kwargs={"diameter": diameter.compute()},
|
|
980
|
+
template=template,
|
|
981
|
+
)
|
|
982
|
+
else:
|
|
983
|
+
diameter_mode = _get_mode_diameter(
|
|
984
|
+
drop_number_concentration=drop_number_concentration,
|
|
985
|
+
diameter=diameter,
|
|
986
|
+
)
|
|
987
|
+
return diameter_mode
|
|
988
|
+
|
|
989
|
+
|
|
942
990
|
####-------------------------------------------------------------------------------------------------------------------.
|
|
943
991
|
#### Mass Distribution Diameters
|
|
944
992
|
|
|
@@ -1369,7 +1417,7 @@ def get_normalized_intercept_parameter_from_moments(moment_3, moment_4):
|
|
|
1369
1417
|
[m⁻³·mm³] (number per cubic meter times diameter cubed).
|
|
1370
1418
|
|
|
1371
1419
|
moment_4 : float or array-like
|
|
1372
|
-
The
|
|
1420
|
+
The fourth moment of the drop size distribution, \\( M_3 \\), in units of
|
|
1373
1421
|
[m⁻³·mm4].
|
|
1374
1422
|
|
|
1375
1423
|
Returns
|
|
@@ -1581,7 +1629,7 @@ def get_kinetic_energy_variables_from_drop_number(
|
|
|
1581
1629
|
- \\( D_i \\) is the diameter of bin \\( i \\).
|
|
1582
1630
|
- \\( v_j \\) is the velocity of bin \\( j \\).
|
|
1583
1631
|
- \\( A \\) is the sampling area.
|
|
1584
|
-
- \\( \\Delta t \\) is the
|
|
1632
|
+
- \\( \\Delta t \\) is the measurement interval in seconds.
|
|
1585
1633
|
- \\( R \\) is the rainfall rate in mm/hr.
|
|
1586
1634
|
|
|
1587
1635
|
"""
|
disdrodb/l2/event.py
CHANGED
|
@@ -15,110 +15,17 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Functions for event definition."""
|
|
18
|
-
|
|
18
|
+
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pandas as pd
|
|
21
|
-
import xarray as xr
|
|
22
21
|
|
|
23
22
|
from disdrodb.api.info import get_start_end_time_from_filepaths
|
|
24
|
-
from disdrodb.utils.time import
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
@dask.delayed
|
|
28
|
-
def _delayed_open_dataset(filepath):
|
|
29
|
-
with dask.config.set(scheduler="synchronous"):
|
|
30
|
-
ds = xr.open_dataset(filepath, chunks={}, autoclose=True, decode_timedelta=False, cache=False)
|
|
31
|
-
return ds
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def identify_events(
|
|
35
|
-
filepaths,
|
|
36
|
-
parallel=False,
|
|
37
|
-
min_n_drops=5,
|
|
38
|
-
neighbor_min_size=2,
|
|
39
|
-
neighbor_time_interval="5MIN",
|
|
40
|
-
intra_event_max_time_gap="6H",
|
|
41
|
-
event_min_duration="5MIN",
|
|
42
|
-
event_min_size=3,
|
|
43
|
-
):
|
|
44
|
-
"""Return a list of rainy events.
|
|
45
|
-
|
|
46
|
-
Rainy timesteps are defined when N > min_n_drops.
|
|
47
|
-
Any rainy isolated timesteps (based on neighborhood criteria) is removed.
|
|
48
|
-
Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
|
|
49
|
-
exceed `intra_event_max_time_gap`. Finally, events that do not meet minimum size or duration
|
|
50
|
-
requirements are filtered out.
|
|
51
|
-
|
|
52
|
-
Parameters
|
|
53
|
-
----------
|
|
54
|
-
filepaths: list
|
|
55
|
-
List of L1C file paths.
|
|
56
|
-
parallel: bool
|
|
57
|
-
Whether to load the files in parallel.
|
|
58
|
-
Set parallel=True only in a multiprocessing environment.
|
|
59
|
-
The default is False.
|
|
60
|
-
neighbor_time_interval : str
|
|
61
|
-
The time interval around a given a timestep defining the neighborhood.
|
|
62
|
-
Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
|
|
63
|
-
neighbor_min_size : int, optional
|
|
64
|
-
The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
|
|
65
|
-
timestep to be considered non-isolated. Isolated timesteps are removed !
|
|
66
|
-
- If `neighbor_min_size=0, then no timestep is considered isolated and no filtering occurs.
|
|
67
|
-
- If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
|
|
68
|
-
- If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
|
|
69
|
-
Defaults to 1.
|
|
70
|
-
intra_event_max_time_gap: str
|
|
71
|
-
The maximum time interval between two timesteps to be considered part of the same event.
|
|
72
|
-
This parameters is used to group timesteps into events !
|
|
73
|
-
event_min_duration : str
|
|
74
|
-
The minimum duration an event must span. Events shorter than this duration are discarded.
|
|
75
|
-
event_min_size : int, optional
|
|
76
|
-
The minimum number of valid timesteps required for an event. Defaults to 1.
|
|
77
|
-
|
|
78
|
-
Returns
|
|
79
|
-
-------
|
|
80
|
-
list of dict
|
|
81
|
-
A list of events, where each event is represented as a dictionary with keys:
|
|
82
|
-
- "start_time": np.datetime64, start time of the event
|
|
83
|
-
- "end_time": np.datetime64, end time of the event
|
|
84
|
-
- "duration": np.timedelta64, duration of the event
|
|
85
|
-
- "n_timesteps": int, number of valid timesteps in the event
|
|
86
|
-
"""
|
|
87
|
-
# Open datasets in parallel
|
|
88
|
-
if parallel:
|
|
89
|
-
list_ds = dask.compute([_delayed_open_dataset(filepath) for filepath in filepaths])[0]
|
|
90
|
-
else:
|
|
91
|
-
list_ds = [xr.open_dataset(filepath, chunks={}, cache=False, decode_timedelta=False) for filepath in filepaths]
|
|
92
|
-
# Filter dataset for requested variables
|
|
93
|
-
variables = ["time", "N"]
|
|
94
|
-
list_ds = [ds[variables] for ds in list_ds]
|
|
95
|
-
# Concat datasets
|
|
96
|
-
ds = xr.concat(list_ds, dim="time", compat="no_conflicts", combine_attrs="override")
|
|
97
|
-
# Read in memory the variable needed
|
|
98
|
-
ds = ds.compute()
|
|
99
|
-
# Close file on disk
|
|
100
|
-
_ = [ds.close() for ds in list_ds]
|
|
101
|
-
del list_ds
|
|
102
|
-
# Sort dataset by time
|
|
103
|
-
ds = ensure_sorted_by_time(ds)
|
|
104
|
-
# Define candidate timesteps to group into events
|
|
105
|
-
idx_valid = ds["N"].data > min_n_drops
|
|
106
|
-
timesteps = ds["time"].data[idx_valid]
|
|
107
|
-
# Define event list
|
|
108
|
-
event_list = group_timesteps_into_event(
|
|
109
|
-
timesteps=timesteps,
|
|
110
|
-
neighbor_min_size=neighbor_min_size,
|
|
111
|
-
neighbor_time_interval=neighbor_time_interval,
|
|
112
|
-
intra_event_max_time_gap=intra_event_max_time_gap,
|
|
113
|
-
event_min_duration=event_min_duration,
|
|
114
|
-
event_min_size=event_min_size,
|
|
115
|
-
)
|
|
116
|
-
return event_list
|
|
23
|
+
from disdrodb.utils.time import ensure_timedelta_seconds_interval, temporal_resolution_to_seconds
|
|
117
24
|
|
|
118
25
|
|
|
119
26
|
def group_timesteps_into_event(
|
|
120
27
|
timesteps,
|
|
121
|
-
|
|
28
|
+
event_max_time_gap,
|
|
122
29
|
event_min_size=0,
|
|
123
30
|
event_min_duration="0S",
|
|
124
31
|
neighbor_min_size=0,
|
|
@@ -130,7 +37,7 @@ def group_timesteps_into_event(
|
|
|
130
37
|
This function groups valid candidate timesteps into events by considering how they cluster
|
|
131
38
|
in time. Any isolated timesteps (based on neighborhood criteria) are first removed. Then,
|
|
132
39
|
consecutive timesteps are grouped into the same event if the time gap between them does not
|
|
133
|
-
exceed `
|
|
40
|
+
exceed `event_max_time_gap`. Finally, events that do not meet minimum size or duration
|
|
134
41
|
requirements are filtered out.
|
|
135
42
|
|
|
136
43
|
Please note that neighbor_min_size and neighbor_time_interval are very sensitive to the
|
|
@@ -150,7 +57,7 @@ def group_timesteps_into_event(
|
|
|
150
57
|
- If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
|
|
151
58
|
- If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
|
|
152
59
|
Defaults to 1.
|
|
153
|
-
|
|
60
|
+
event_max_time_gap: str
|
|
154
61
|
The maximum time interval between two timesteps to be considered part of the same event.
|
|
155
62
|
This parameters is used to group timesteps into events !
|
|
156
63
|
event_min_duration : str
|
|
@@ -168,9 +75,9 @@ def group_timesteps_into_event(
|
|
|
168
75
|
- "n_timesteps": int, number of valid timesteps in the event
|
|
169
76
|
"""
|
|
170
77
|
# Retrieve datetime arguments
|
|
171
|
-
neighbor_time_interval = pd.Timedelta(
|
|
172
|
-
|
|
173
|
-
event_min_duration = pd.Timedelta(
|
|
78
|
+
neighbor_time_interval = pd.Timedelta(temporal_resolution_to_seconds(neighbor_time_interval), unit="seconds")
|
|
79
|
+
event_max_time_gap = pd.Timedelta(temporal_resolution_to_seconds(event_max_time_gap), unit="seconds")
|
|
80
|
+
event_min_duration = pd.Timedelta(temporal_resolution_to_seconds(event_min_duration), unit="seconds")
|
|
174
81
|
|
|
175
82
|
# Remove isolated timesteps
|
|
176
83
|
timesteps = remove_isolated_timesteps(
|
|
@@ -180,8 +87,8 @@ def group_timesteps_into_event(
|
|
|
180
87
|
)
|
|
181
88
|
|
|
182
89
|
# Group timesteps into events
|
|
183
|
-
# - If two timesteps are separated by less than
|
|
184
|
-
events = group_timesteps_into_events(timesteps,
|
|
90
|
+
# - If two timesteps are separated by less than event_max_time_gap, are considered the same event
|
|
91
|
+
events = group_timesteps_into_events(timesteps, event_max_time_gap)
|
|
185
92
|
|
|
186
93
|
# Define list of event
|
|
187
94
|
event_list = [
|
|
@@ -270,7 +177,7 @@ def remove_isolated_timesteps(timesteps, neighbor_min_size, neighbor_time_interv
|
|
|
270
177
|
return non_isolated_timesteps
|
|
271
178
|
|
|
272
179
|
|
|
273
|
-
def group_timesteps_into_events(timesteps,
|
|
180
|
+
def group_timesteps_into_events(timesteps, event_max_time_gap):
|
|
274
181
|
"""
|
|
275
182
|
Group valid timesteps into events based on a maximum allowed dry interval.
|
|
276
183
|
|
|
@@ -278,7 +185,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
|
|
|
278
185
|
----------
|
|
279
186
|
timesteps : array-like of np.datetime64
|
|
280
187
|
Sorted array of valid timesteps.
|
|
281
|
-
|
|
188
|
+
event_max_time_gap : np.timedelta64
|
|
282
189
|
Maximum time interval allowed between consecutive valid timesteps for them
|
|
283
190
|
to be considered part of the same event.
|
|
284
191
|
|
|
@@ -297,9 +204,9 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
|
|
|
297
204
|
# Compute differences between consecutive timesteps
|
|
298
205
|
diffs = np.diff(timesteps)
|
|
299
206
|
|
|
300
|
-
# Identify the indices where the gap is larger than
|
|
207
|
+
# Identify the indices where the gap is larger than event_max_time_gap
|
|
301
208
|
# These indices represent boundaries between events
|
|
302
|
-
break_indices = np.where(diffs >
|
|
209
|
+
break_indices = np.where(diffs > event_max_time_gap)[0] + 1
|
|
303
210
|
|
|
304
211
|
# Split the timesteps at the identified break points
|
|
305
212
|
events = np.split(timesteps, break_indices)
|
|
@@ -311,7 +218,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
|
|
|
311
218
|
# current_t = timesteps[i]
|
|
312
219
|
# previous_t = timesteps[i - 1]
|
|
313
220
|
|
|
314
|
-
# if current_t - previous_t <=
|
|
221
|
+
# if current_t - previous_t <= event_max_time_gap:
|
|
315
222
|
# current_event.append(current_t)
|
|
316
223
|
# else:
|
|
317
224
|
# events.append(current_event)
|
|
@@ -324,21 +231,23 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
|
|
|
324
231
|
####-----------------------------------------------------------------------------------.
|
|
325
232
|
|
|
326
233
|
|
|
327
|
-
def
|
|
234
|
+
def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling): # noqa: ARG001
|
|
328
235
|
"""
|
|
329
236
|
Provide information about the required files for each event.
|
|
330
237
|
|
|
331
|
-
For each event in `
|
|
238
|
+
For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
|
|
332
239
|
overlap with the event period, adjusted by the `accumulation_interval`. The event period is
|
|
333
240
|
extended backward or forward based on the `rolling` parameter.
|
|
334
241
|
|
|
335
242
|
Parameters
|
|
336
243
|
----------
|
|
337
|
-
|
|
244
|
+
list_partitions : list of dict
|
|
338
245
|
List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
|
|
339
246
|
keys with `numpy.datetime64` values.
|
|
340
247
|
filepaths : list of str
|
|
341
248
|
List of file paths corresponding to data files.
|
|
249
|
+
sample_interval : numpy.timedelta64 or int
|
|
250
|
+
The sample interval of the input dataset.
|
|
342
251
|
accumulation_interval : numpy.timedelta64 or int
|
|
343
252
|
Time interval to adjust the event period for accumulation. If an integer is provided, it is
|
|
344
253
|
assumed to be in seconds.
|
|
@@ -355,25 +264,23 @@ def get_events_info(list_events, filepaths, accumulation_interval, rolling):
|
|
|
355
264
|
- 'filepaths': List of file paths overlapping with the adjusted event period.
|
|
356
265
|
|
|
357
266
|
"""
|
|
358
|
-
# Ensure accumulation_interval is numpy.timedelta64
|
|
359
|
-
|
|
360
|
-
|
|
267
|
+
# Ensure sample_interval and accumulation_interval is numpy.timedelta64
|
|
268
|
+
accumulation_interval = ensure_timedelta_seconds_interval(accumulation_interval)
|
|
269
|
+
sample_interval = ensure_timedelta_seconds_interval(sample_interval)
|
|
361
270
|
|
|
362
271
|
# Retrieve file start_time and end_time
|
|
363
272
|
files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
|
|
364
273
|
|
|
365
274
|
# Retrieve information for each event
|
|
366
275
|
event_info = []
|
|
367
|
-
for event_dict in
|
|
276
|
+
for event_dict in list_partitions:
|
|
368
277
|
# Retrieve event time period
|
|
369
278
|
event_start_time = event_dict["start_time"]
|
|
370
279
|
event_end_time = event_dict["end_time"]
|
|
371
280
|
|
|
372
|
-
#
|
|
373
|
-
if
|
|
374
|
-
|
|
375
|
-
else: # aggregate forward
|
|
376
|
-
event_end_time = event_end_time + np.array(accumulation_interval, dtype="m8[s]")
|
|
281
|
+
# Adapt event_end_time if accumulation interval different from sample interval
|
|
282
|
+
if sample_interval != accumulation_interval:
|
|
283
|
+
event_end_time = event_end_time + accumulation_interval
|
|
377
284
|
|
|
378
285
|
# Derive event filepaths
|
|
379
286
|
overlaps = (files_start_time <= event_end_time) & (files_end_time >= event_start_time)
|