disdrodb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. disdrodb/__init__.py +64 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +139 -9
  7. disdrodb/api/configs.py +4 -2
  8. disdrodb/api/info.py +10 -10
  9. disdrodb/api/io.py +237 -18
  10. disdrodb/api/path.py +81 -75
  11. disdrodb/api/search.py +6 -6
  12. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  13. disdrodb/cli/disdrodb_run_l0.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
  19. disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
  20. disdrodb/configs.py +149 -4
  21. disdrodb/constants.py +61 -0
  22. disdrodb/data_transfer/download_data.py +145 -14
  23. disdrodb/etc/configs/attributes.yaml +339 -0
  24. disdrodb/etc/configs/encodings.yaml +473 -0
  25. disdrodb/etc/products/L1/global.yaml +13 -0
  26. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  27. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  28. disdrodb/etc/products/L2E/global.yaml +22 -0
  29. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  30. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  31. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  32. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  33. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/global.yaml +26 -0
  35. disdrodb/l0/__init__.py +13 -0
  36. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  37. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  38. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  39. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  40. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  41. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  42. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  43. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +44 -3
  44. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +41 -1
  45. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  46. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  47. disdrodb/l0/l0a_processing.py +30 -30
  48. disdrodb/l0/l0b_nc_processing.py +108 -2
  49. disdrodb/l0/l0b_processing.py +4 -4
  50. disdrodb/l0/l0c_processing.py +5 -13
  51. disdrodb/l0/manuals/SWS250.pdf +0 -0
  52. disdrodb/l0/manuals/VPF730.pdf +0 -0
  53. disdrodb/l0/manuals/VPF750.pdf +0 -0
  54. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  55. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  56. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  57. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +105 -0
  58. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +128 -0
  59. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  62. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  63. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  64. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  65. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  66. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  67. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  68. disdrodb/l0/readers/{PARSIVEL → PARSIVEL2}/KIT/BURKINA_FASO.py +1 -1
  69. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  70. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  71. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → NCAR/FARM_PARSIVEL2.py} +43 -70
  72. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  73. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  74. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  75. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  76. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +29 -12
  77. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +69 -0
  78. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  79. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  80. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  81. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  82. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  83. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  84. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +31 -14
  85. disdrodb/l0/routines.py +105 -14
  86. disdrodb/l1/__init__.py +5 -0
  87. disdrodb/l1/filters.py +34 -20
  88. disdrodb/l1/processing.py +45 -44
  89. disdrodb/l1/resampling.py +77 -66
  90. disdrodb/l1/routines.py +35 -42
  91. disdrodb/l1_env/routines.py +18 -3
  92. disdrodb/l2/__init__.py +7 -0
  93. disdrodb/l2/empirical_dsd.py +58 -10
  94. disdrodb/l2/event.py +27 -120
  95. disdrodb/l2/processing.py +267 -116
  96. disdrodb/l2/routines.py +618 -254
  97. disdrodb/metadata/standards.py +3 -1
  98. disdrodb/psd/fitting.py +463 -144
  99. disdrodb/psd/models.py +8 -5
  100. disdrodb/routines.py +3 -3
  101. disdrodb/scattering/__init__.py +16 -4
  102. disdrodb/scattering/axis_ratio.py +56 -36
  103. disdrodb/scattering/permittivity.py +486 -0
  104. disdrodb/scattering/routines.py +701 -159
  105. disdrodb/summary/__init__.py +17 -0
  106. disdrodb/summary/routines.py +4120 -0
  107. disdrodb/utils/attrs.py +68 -125
  108. disdrodb/utils/compression.py +30 -1
  109. disdrodb/utils/dask.py +59 -8
  110. disdrodb/utils/dataframe.py +63 -9
  111. disdrodb/utils/directories.py +49 -17
  112. disdrodb/utils/encoding.py +33 -19
  113. disdrodb/utils/logger.py +13 -6
  114. disdrodb/utils/manipulations.py +71 -0
  115. disdrodb/utils/subsetting.py +214 -0
  116. disdrodb/utils/time.py +165 -19
  117. disdrodb/utils/writer.py +20 -7
  118. disdrodb/utils/xarray.py +85 -4
  119. disdrodb/viz/__init__.py +13 -0
  120. disdrodb/viz/plots.py +327 -0
  121. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
  122. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/RECORD +127 -87
  123. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
  124. disdrodb/l1/encoding_attrs.py +0 -635
  125. disdrodb/l2/processing_options.py +0 -213
  126. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  127. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
  128. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
  129. {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
@@ -23,8 +23,8 @@ Infinite values should be removed beforehand or otherwise are propagated through
23
23
  import numpy as np
24
24
  import xarray as xr
25
25
 
26
- from disdrodb import DIAMETER_DIMENSION, VELOCITY_DIMENSION
27
26
  from disdrodb.api.checks import check_sensor_name
27
+ from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
28
28
  from disdrodb.utils.xarray import (
29
29
  remove_diameter_coordinates,
30
30
  remove_velocity_coordinates,
@@ -66,7 +66,7 @@ def get_drop_average_velocity(drop_number):
66
66
  ----------
67
67
  drop_number : xarray.DataArray
68
68
  Array of drop counts \\( n(D,v) \\) per diameter (and velocity, if available) bins
69
- over the time integration period.
69
+ over the measurement interval.
70
70
  The DataArray must have the ``velocity_bin_center`` coordinate.
71
71
 
72
72
  Returns
@@ -80,6 +80,7 @@ def get_drop_average_velocity(drop_number):
80
80
  dim=VELOCITY_DIMENSION,
81
81
  skipna=False,
82
82
  )
83
+ average_velocity.name = "average_velocity"
83
84
  return average_velocity
84
85
 
85
86
 
@@ -138,6 +139,9 @@ def _compute_qc_bins_metrics(arr):
138
139
  return output
139
140
 
140
141
 
142
+ BINS_METRICS = ["Nbins", "Nbins_missing", "Nbins_missing_fraction", "Nbins_missing_consecutive"]
143
+
144
+
141
145
  def compute_qc_bins_metrics(ds):
142
146
  """
143
147
  Compute quality-control metrics for drop-count bins along the diameter dimension.
@@ -191,11 +195,19 @@ def compute_qc_bins_metrics(ds):
191
195
  )
192
196
 
193
197
  # Assign meaningful labels to the qc 'metric' dimension
194
- variables = ["Nbins", "Nbins_missing", "Nbins_missing_fraction", "Nbins_missing_consecutive"]
195
- ds_qc_bins = da_qc_bins.assign_coords(metric=variables).to_dataset(dim="metric")
198
+ ds_qc_bins = da_qc_bins.assign_coords(metric=BINS_METRICS).to_dataset(dim="metric")
196
199
  return ds_qc_bins
197
200
 
198
201
 
202
+ def add_bins_metrics(ds):
203
+ """Add bin metrics if missing."""
204
+ bins_metrics = BINS_METRICS
205
+ if not np.all(np.isin(bins_metrics, list(ds.data_vars))):
206
+ # Add bins statistics
207
+ ds.update(compute_qc_bins_metrics(ds))
208
+ return ds
209
+
210
+
199
211
  ####-------------------------------------------------------------------------------------------------------------------.
200
212
  #### DSD Spectrum, Concentration, Moments
201
213
 
@@ -252,7 +264,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
252
264
  Width of each diameter bin \\( \\Delta D \\) in millimeters (mm).
253
265
  drop_number : xarray.DataArray
254
266
  Array of drop counts \\( n(D) or n(D,v) \\) per diameter (and velocity if available)
255
- bins over the time integration period.
267
+ bins over the measurement interval.
256
268
  sample_interval : float or xarray.DataArray
257
269
  Time over which the drops are counted \\( \\Delta t \\) in seconds (s).
258
270
  sampling_area : float or xarray.DataArray
@@ -277,7 +289,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
277
289
  - \\( n(D,v) \\): Number of drops counted in diameter (and velocity) bins.
278
290
  - \\( A_{\text{eff}}(D) \\): Effective sampling area of the sensor for diameter \\( D \\) in square meters (m²).
279
291
  - \\( \\Delta D \\): Diameter bin width in millimeters (mm).
280
- - \\( \\Delta t \\): Time integration period in seconds (s).
292
+ - \\( \\Delta t \\): Measurement interval in seconds (s).
281
293
  - \\( v(D) \\): Fall velocity of drops in diameter bin \\( D \\) in meters per second (m/s).
282
294
 
283
295
  The effective sampling area \\( A_{\text{eff}}(D) \\) depends on the sensor and may vary with drop diameter.
@@ -919,8 +931,7 @@ def get_min_max_diameter(drop_counts):
919
931
  return min_drop_diameter, max_drop_diameter
920
932
 
921
933
 
922
- def get_mode_diameter(drop_number_concentration, diameter):
923
- """Get raindrop diameter with highest occurrence."""
934
+ def _get_mode_diameter(drop_number_concentration, diameter):
924
935
  # If all NaN, set to 0 otherwise argmax fail when all NaN data
925
936
  idx_all_nan_mask = np.isnan(drop_number_concentration).all(dim=DIAMETER_DIMENSION)
926
937
  drop_number_concentration = drop_number_concentration.where(~idx_all_nan_mask, 0)
@@ -939,6 +950,43 @@ def get_mode_diameter(drop_number_concentration, diameter):
939
950
  return diameter_mode
940
951
 
941
952
 
953
+ def get_mode_diameter(
954
+ drop_number_concentration,
955
+ diameter,
956
+ ):
957
+ """Get raindrop diameter with highest occurrence.
958
+
959
+ Parameters
960
+ ----------
961
+ drop_number_concentration : xarray.DataArray
962
+ The drop number concentration N(D) for each diameter bin, typically in units of
963
+ number per cubic meter per millimeter (m⁻³·mm⁻¹).
964
+ diameter : xarray.DataArray
965
+ The equivalent volume diameters D of the drops in each bin, in meters (m).
966
+
967
+ Returns
968
+ -------
969
+ xarray.DataArray
970
+ The diameter with the highest drop number concentration.
971
+ """
972
+ # Use map_blocks if working with Dask arrays
973
+ if hasattr(drop_number_concentration.data, "chunks"):
974
+ # Define the template for output
975
+ template = remove_diameter_coordinates(drop_number_concentration.isel({DIAMETER_DIMENSION: 0}))
976
+ diameter_mode = xr.map_blocks(
977
+ _get_mode_diameter,
978
+ drop_number_concentration,
979
+ kwargs={"diameter": diameter.compute()},
980
+ template=template,
981
+ )
982
+ else:
983
+ diameter_mode = _get_mode_diameter(
984
+ drop_number_concentration=drop_number_concentration,
985
+ diameter=diameter,
986
+ )
987
+ return diameter_mode
988
+
989
+
942
990
  ####-------------------------------------------------------------------------------------------------------------------.
943
991
  #### Mass Distribution Diameters
944
992
 
@@ -1369,7 +1417,7 @@ def get_normalized_intercept_parameter_from_moments(moment_3, moment_4):
1369
1417
  [m⁻³·mm³] (number per cubic meter times diameter cubed).
1370
1418
 
1371
1419
  moment_4 : float or array-like
1372
- The foruth moment of the drop size distribution, \\( M_3 \\), in units of
1420
+ The fourth moment of the drop size distribution, \\( M_3 \\), in units of
1373
1421
  [m⁻³·mm4].
1374
1422
 
1375
1423
  Returns
@@ -1581,7 +1629,7 @@ def get_kinetic_energy_variables_from_drop_number(
1581
1629
  - \\( D_i \\) is the diameter of bin \\( i \\).
1582
1630
  - \\( v_j \\) is the velocity of bin \\( j \\).
1583
1631
  - \\( A \\) is the sampling area.
1584
- - \\( \\Delta t \\) is the time integration period in seconds.
1632
+ - \\( \\Delta t \\) is the measurement interval in seconds.
1585
1633
  - \\( R \\) is the rainfall rate in mm/hr.
1586
1634
 
1587
1635
  """
disdrodb/l2/event.py CHANGED
@@ -15,110 +15,17 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Functions for event definition."""
18
- import dask
18
+
19
19
  import numpy as np
20
20
  import pandas as pd
21
- import xarray as xr
22
21
 
23
22
  from disdrodb.api.info import get_start_end_time_from_filepaths
24
- from disdrodb.utils.time import acronym_to_seconds, ensure_sorted_by_time
25
-
26
-
27
- @dask.delayed
28
- def _delayed_open_dataset(filepath):
29
- with dask.config.set(scheduler="synchronous"):
30
- ds = xr.open_dataset(filepath, chunks={}, autoclose=True, decode_timedelta=False, cache=False)
31
- return ds
32
-
33
-
34
- def identify_events(
35
- filepaths,
36
- parallel=False,
37
- min_n_drops=5,
38
- neighbor_min_size=2,
39
- neighbor_time_interval="5MIN",
40
- intra_event_max_time_gap="6H",
41
- event_min_duration="5MIN",
42
- event_min_size=3,
43
- ):
44
- """Return a list of rainy events.
45
-
46
- Rainy timesteps are defined when N > min_n_drops.
47
- Any rainy isolated timesteps (based on neighborhood criteria) is removed.
48
- Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
49
- exceed `intra_event_max_time_gap`. Finally, events that do not meet minimum size or duration
50
- requirements are filtered out.
51
-
52
- Parameters
53
- ----------
54
- filepaths: list
55
- List of L1C file paths.
56
- parallel: bool
57
- Whether to load the files in parallel.
58
- Set parallel=True only in a multiprocessing environment.
59
- The default is False.
60
- neighbor_time_interval : str
61
- The time interval around a given a timestep defining the neighborhood.
62
- Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
63
- neighbor_min_size : int, optional
64
- The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
65
- timestep to be considered non-isolated. Isolated timesteps are removed !
66
- - If `neighbor_min_size=0, then no timestep is considered isolated and no filtering occurs.
67
- - If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
68
- - If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
69
- Defaults to 1.
70
- intra_event_max_time_gap: str
71
- The maximum time interval between two timesteps to be considered part of the same event.
72
- This parameters is used to group timesteps into events !
73
- event_min_duration : str
74
- The minimum duration an event must span. Events shorter than this duration are discarded.
75
- event_min_size : int, optional
76
- The minimum number of valid timesteps required for an event. Defaults to 1.
77
-
78
- Returns
79
- -------
80
- list of dict
81
- A list of events, where each event is represented as a dictionary with keys:
82
- - "start_time": np.datetime64, start time of the event
83
- - "end_time": np.datetime64, end time of the event
84
- - "duration": np.timedelta64, duration of the event
85
- - "n_timesteps": int, number of valid timesteps in the event
86
- """
87
- # Open datasets in parallel
88
- if parallel:
89
- list_ds = dask.compute([_delayed_open_dataset(filepath) for filepath in filepaths])[0]
90
- else:
91
- list_ds = [xr.open_dataset(filepath, chunks={}, cache=False, decode_timedelta=False) for filepath in filepaths]
92
- # Filter dataset for requested variables
93
- variables = ["time", "N"]
94
- list_ds = [ds[variables] for ds in list_ds]
95
- # Concat datasets
96
- ds = xr.concat(list_ds, dim="time", compat="no_conflicts", combine_attrs="override")
97
- # Read in memory the variable needed
98
- ds = ds.compute()
99
- # Close file on disk
100
- _ = [ds.close() for ds in list_ds]
101
- del list_ds
102
- # Sort dataset by time
103
- ds = ensure_sorted_by_time(ds)
104
- # Define candidate timesteps to group into events
105
- idx_valid = ds["N"].data > min_n_drops
106
- timesteps = ds["time"].data[idx_valid]
107
- # Define event list
108
- event_list = group_timesteps_into_event(
109
- timesteps=timesteps,
110
- neighbor_min_size=neighbor_min_size,
111
- neighbor_time_interval=neighbor_time_interval,
112
- intra_event_max_time_gap=intra_event_max_time_gap,
113
- event_min_duration=event_min_duration,
114
- event_min_size=event_min_size,
115
- )
116
- return event_list
23
+ from disdrodb.utils.time import ensure_timedelta_seconds_interval, temporal_resolution_to_seconds
117
24
 
118
25
 
119
26
  def group_timesteps_into_event(
120
27
  timesteps,
121
- intra_event_max_time_gap,
28
+ event_max_time_gap,
122
29
  event_min_size=0,
123
30
  event_min_duration="0S",
124
31
  neighbor_min_size=0,
@@ -130,7 +37,7 @@ def group_timesteps_into_event(
130
37
  This function groups valid candidate timesteps into events by considering how they cluster
131
38
  in time. Any isolated timesteps (based on neighborhood criteria) are first removed. Then,
132
39
  consecutive timesteps are grouped into the same event if the time gap between them does not
133
- exceed `intra_event_max_time_gap`. Finally, events that do not meet minimum size or duration
40
+ exceed `event_max_time_gap`. Finally, events that do not meet minimum size or duration
134
41
  requirements are filtered out.
135
42
 
136
43
  Please note that neighbor_min_size and neighbor_time_interval are very sensitive to the
@@ -150,7 +57,7 @@ def group_timesteps_into_event(
150
57
  - If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
151
58
  - If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
152
59
  Defaults to 1.
153
- intra_event_max_time_gap: str
60
+ event_max_time_gap: str
154
61
  The maximum time interval between two timesteps to be considered part of the same event.
155
62
  This parameters is used to group timesteps into events !
156
63
  event_min_duration : str
@@ -168,9 +75,9 @@ def group_timesteps_into_event(
168
75
  - "n_timesteps": int, number of valid timesteps in the event
169
76
  """
170
77
  # Retrieve datetime arguments
171
- neighbor_time_interval = pd.Timedelta(acronym_to_seconds(neighbor_time_interval), unit="seconds")
172
- intra_event_max_time_gap = pd.Timedelta(acronym_to_seconds(intra_event_max_time_gap), unit="seconds")
173
- event_min_duration = pd.Timedelta(acronym_to_seconds(event_min_duration), unit="seconds")
78
+ neighbor_time_interval = pd.Timedelta(temporal_resolution_to_seconds(neighbor_time_interval), unit="seconds")
79
+ event_max_time_gap = pd.Timedelta(temporal_resolution_to_seconds(event_max_time_gap), unit="seconds")
80
+ event_min_duration = pd.Timedelta(temporal_resolution_to_seconds(event_min_duration), unit="seconds")
174
81
 
175
82
  # Remove isolated timesteps
176
83
  timesteps = remove_isolated_timesteps(
@@ -180,8 +87,8 @@ def group_timesteps_into_event(
180
87
  )
181
88
 
182
89
  # Group timesteps into events
183
- # - If two timesteps are separated by less than intra_event_max_time_gap, are considered the same event
184
- events = group_timesteps_into_events(timesteps, intra_event_max_time_gap)
90
+ # - If two timesteps are separated by less than event_max_time_gap, are considered the same event
91
+ events = group_timesteps_into_events(timesteps, event_max_time_gap)
185
92
 
186
93
  # Define list of event
187
94
  event_list = [
@@ -270,7 +177,7 @@ def remove_isolated_timesteps(timesteps, neighbor_min_size, neighbor_time_interv
270
177
  return non_isolated_timesteps
271
178
 
272
179
 
273
- def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
180
+ def group_timesteps_into_events(timesteps, event_max_time_gap):
274
181
  """
275
182
  Group valid timesteps into events based on a maximum allowed dry interval.
276
183
 
@@ -278,7 +185,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
278
185
  ----------
279
186
  timesteps : array-like of np.datetime64
280
187
  Sorted array of valid timesteps.
281
- intra_event_max_time_gap : np.timedelta64
188
+ event_max_time_gap : np.timedelta64
282
189
  Maximum time interval allowed between consecutive valid timesteps for them
283
190
  to be considered part of the same event.
284
191
 
@@ -297,9 +204,9 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
297
204
  # Compute differences between consecutive timesteps
298
205
  diffs = np.diff(timesteps)
299
206
 
300
- # Identify the indices where the gap is larger than intra_event_max_time_gap
207
+ # Identify the indices where the gap is larger than event_max_time_gap
301
208
  # These indices represent boundaries between events
302
- break_indices = np.where(diffs > intra_event_max_time_gap)[0] + 1
209
+ break_indices = np.where(diffs > event_max_time_gap)[0] + 1
303
210
 
304
211
  # Split the timesteps at the identified break points
305
212
  events = np.split(timesteps, break_indices)
@@ -311,7 +218,7 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
311
218
  # current_t = timesteps[i]
312
219
  # previous_t = timesteps[i - 1]
313
220
 
314
- # if current_t - previous_t <= intra_event_max_time_gap:
221
+ # if current_t - previous_t <= event_max_time_gap:
315
222
  # current_event.append(current_t)
316
223
  # else:
317
224
  # events.append(current_event)
@@ -324,21 +231,23 @@ def group_timesteps_into_events(timesteps, intra_event_max_time_gap):
324
231
  ####-----------------------------------------------------------------------------------.
325
232
 
326
233
 
327
- def get_events_info(list_events, filepaths, accumulation_interval, rolling):
234
+ def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling): # noqa: ARG001
328
235
  """
329
236
  Provide information about the required files for each event.
330
237
 
331
- For each event in `list_events`, this function identifies the file paths from `filepaths` that
238
+ For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
332
239
  overlap with the event period, adjusted by the `accumulation_interval`. The event period is
333
240
  extended backward or forward based on the `rolling` parameter.
334
241
 
335
242
  Parameters
336
243
  ----------
337
- list_events : list of dict
244
+ list_partitions : list of dict
338
245
  List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
339
246
  keys with `numpy.datetime64` values.
340
247
  filepaths : list of str
341
248
  List of file paths corresponding to data files.
249
+ sample_interval : numpy.timedelta64 or int
250
+ The sample interval of the input dataset.
342
251
  accumulation_interval : numpy.timedelta64 or int
343
252
  Time interval to adjust the event period for accumulation. If an integer is provided, it is
344
253
  assumed to be in seconds.
@@ -355,25 +264,23 @@ def get_events_info(list_events, filepaths, accumulation_interval, rolling):
355
264
  - 'filepaths': List of file paths overlapping with the adjusted event period.
356
265
 
357
266
  """
358
- # Ensure accumulation_interval is numpy.timedelta64
359
- if not isinstance(accumulation_interval, np.timedelta64):
360
- accumulation_interval = np.timedelta64(accumulation_interval, "s")
267
+ # Ensure sample_interval and accumulation_interval is numpy.timedelta64
268
+ accumulation_interval = ensure_timedelta_seconds_interval(accumulation_interval)
269
+ sample_interval = ensure_timedelta_seconds_interval(sample_interval)
361
270
 
362
271
  # Retrieve file start_time and end_time
363
272
  files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
364
273
 
365
274
  # Retrieve information for each event
366
275
  event_info = []
367
- for event_dict in list_events:
276
+ for event_dict in list_partitions:
368
277
  # Retrieve event time period
369
278
  event_start_time = event_dict["start_time"]
370
279
  event_end_time = event_dict["end_time"]
371
280
 
372
- # Add buffer to account for accumulation interval
373
- if rolling: # backward
374
- event_start_time = event_start_time - np.array(accumulation_interval, dtype="m8[s]")
375
- else: # aggregate forward
376
- event_end_time = event_end_time + np.array(accumulation_interval, dtype="m8[s]")
281
+ # Adapt event_end_time if accumulation interval different from sample interval
282
+ if sample_interval != accumulation_interval:
283
+ event_end_time = event_end_time + accumulation_interval
377
284
 
378
285
  # Derive event filepaths
379
286
  overlaps = (files_start_time <= event_end_time) & (files_end_time >= event_start_time)