disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. disdrodb/__init__.py +68 -34
  2. disdrodb/_config.py +5 -4
  3. disdrodb/_version.py +16 -3
  4. disdrodb/accessor/__init__.py +20 -0
  5. disdrodb/accessor/methods.py +125 -0
  6. disdrodb/api/checks.py +177 -24
  7. disdrodb/api/configs.py +3 -3
  8. disdrodb/api/info.py +13 -13
  9. disdrodb/api/io.py +281 -22
  10. disdrodb/api/path.py +184 -195
  11. disdrodb/api/search.py +18 -9
  12. disdrodb/cli/disdrodb_create_summary.py +103 -0
  13. disdrodb/cli/disdrodb_create_summary_station.py +91 -0
  14. disdrodb/cli/disdrodb_run_l0.py +1 -1
  15. disdrodb/cli/disdrodb_run_l0_station.py +1 -1
  16. disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
  17. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  18. disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
  19. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  20. disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
  21. disdrodb/cli/disdrodb_run_l1_station.py +2 -2
  22. disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
  23. disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
  24. disdrodb/configs.py +149 -4
  25. disdrodb/constants.py +61 -0
  26. disdrodb/data_transfer/download_data.py +127 -11
  27. disdrodb/etc/configs/attributes.yaml +339 -0
  28. disdrodb/etc/configs/encodings.yaml +473 -0
  29. disdrodb/etc/products/L1/global.yaml +13 -0
  30. disdrodb/etc/products/L2E/10MIN.yaml +12 -0
  31. disdrodb/etc/products/L2E/1MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +22 -0
  33. disdrodb/etc/products/L2M/10MIN.yaml +12 -0
  34. disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
  35. disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
  38. disdrodb/etc/products/L2M/global.yaml +26 -0
  39. disdrodb/issue/writer.py +2 -0
  40. disdrodb/l0/__init__.py +13 -0
  41. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
  42. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  43. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
  44. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  45. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
  46. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
  47. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
  48. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
  49. disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
  50. disdrodb/l0/l0a_processing.py +37 -32
  51. disdrodb/l0/l0b_nc_processing.py +118 -8
  52. disdrodb/l0/l0b_processing.py +30 -65
  53. disdrodb/l0/l0c_processing.py +369 -259
  54. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
  55. disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
  56. disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
  57. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
  58. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
  59. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
  60. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  61. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  62. disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
  63. disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
  64. disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
  65. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
  66. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
  67. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
  68. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
  69. disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
  71. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
  72. disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
  73. disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
  74. disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
  75. disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
  76. disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
  77. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
  78. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  79. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
  80. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
  81. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
  83. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
  84. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
  85. disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
  86. disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
  87. disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
  88. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
  89. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
  90. disdrodb/l1/__init__.py +5 -0
  91. disdrodb/l1/fall_velocity.py +46 -0
  92. disdrodb/l1/filters.py +34 -20
  93. disdrodb/l1/processing.py +46 -45
  94. disdrodb/l1/resampling.py +77 -66
  95. disdrodb/l1_env/routines.py +18 -3
  96. disdrodb/l2/__init__.py +7 -0
  97. disdrodb/l2/empirical_dsd.py +58 -10
  98. disdrodb/l2/processing.py +268 -117
  99. disdrodb/metadata/checks.py +132 -125
  100. disdrodb/metadata/standards.py +3 -1
  101. disdrodb/psd/fitting.py +631 -345
  102. disdrodb/psd/models.py +9 -6
  103. disdrodb/routines/__init__.py +54 -0
  104. disdrodb/{l0/routines.py → routines/l0.py} +316 -355
  105. disdrodb/{l1/routines.py → routines/l1.py} +76 -116
  106. disdrodb/routines/l2.py +1019 -0
  107. disdrodb/{routines.py → routines/wrappers.py} +98 -10
  108. disdrodb/scattering/__init__.py +16 -4
  109. disdrodb/scattering/axis_ratio.py +61 -37
  110. disdrodb/scattering/permittivity.py +504 -0
  111. disdrodb/scattering/routines.py +746 -184
  112. disdrodb/summary/__init__.py +17 -0
  113. disdrodb/summary/routines.py +4196 -0
  114. disdrodb/utils/archiving.py +434 -0
  115. disdrodb/utils/attrs.py +68 -125
  116. disdrodb/utils/cli.py +5 -5
  117. disdrodb/utils/compression.py +30 -1
  118. disdrodb/utils/dask.py +121 -9
  119. disdrodb/utils/dataframe.py +61 -7
  120. disdrodb/utils/decorators.py +31 -0
  121. disdrodb/utils/directories.py +35 -15
  122. disdrodb/utils/encoding.py +37 -19
  123. disdrodb/{l2 → utils}/event.py +15 -173
  124. disdrodb/utils/logger.py +14 -7
  125. disdrodb/utils/manipulations.py +81 -0
  126. disdrodb/utils/routines.py +166 -0
  127. disdrodb/utils/subsetting.py +214 -0
  128. disdrodb/utils/time.py +35 -177
  129. disdrodb/utils/writer.py +20 -7
  130. disdrodb/utils/xarray.py +5 -4
  131. disdrodb/viz/__init__.py +13 -0
  132. disdrodb/viz/plots.py +398 -0
  133. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
  134. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
  135. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
  136. disdrodb/l1/encoding_attrs.py +0 -642
  137. disdrodb/l2/processing_options.py +0 -213
  138. disdrodb/l2/routines.py +0 -868
  139. /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
  140. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
  141. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
  142. {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
disdrodb/l1/processing.py CHANGED
@@ -16,22 +16,19 @@
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Core functions for DISDRODB L1 production."""
18
18
 
19
-
20
19
  import xarray as xr
21
20
 
22
- from disdrodb import DIAMETER_DIMENSION, VELOCITY_DIMENSION
23
- from disdrodb.l1.encoding_attrs import get_attrs_dict, get_encoding_dict
21
+ from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
24
22
  from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity
25
23
  from disdrodb.l1.filters import define_spectrum_mask, filter_diameter_bins, filter_velocity_bins
26
24
  from disdrodb.l1.resampling import add_sample_interval
27
25
  from disdrodb.l1_env.routines import load_env_dataset
28
26
  from disdrodb.l2.empirical_dsd import ( # TODO: maybe move out of L2
29
- compute_qc_bins_metrics,
27
+ add_bins_metrics,
30
28
  get_min_max_diameter,
31
29
  )
32
- from disdrodb.utils.attrs import set_attrs
33
- from disdrodb.utils.encoding import set_encodings
34
30
  from disdrodb.utils.time import ensure_sample_interval_in_seconds, infer_sample_interval
31
+ from disdrodb.utils.writer import finalize_product
35
32
 
36
33
 
37
34
  def generate_l1(
@@ -51,7 +48,7 @@ def generate_l1(
51
48
  small_velocity_threshold=2.5, # 3
52
49
  maintain_smallest_drops=True,
53
50
  ):
54
- """Generate the DISDRODB L1 dataset from the DISDRODB L0C dataset.
51
+ """Generate DISDRODB L1 Dataset from DISDRODB L0C Dataset.
55
52
 
56
53
  Parameters
57
54
  ----------
@@ -86,19 +83,19 @@ def generate_l1(
86
83
  Returns
87
84
  -------
88
85
  xarray.Dataset
89
- DISRODB L1 dataset.
86
+ DISDRODB L1 dataset.
90
87
  """
91
- # Take as input an L0 !
92
-
93
88
  # Retrieve source attributes
94
89
  attrs = ds.attrs.copy()
95
90
 
96
91
  # Determine if the velocity dimension is available
97
92
  has_velocity_dimension = VELOCITY_DIMENSION in ds.dims
98
93
 
99
- # Initialize L2 dataset
100
- ds_l1 = xr.Dataset()
94
+ # Retrieve sensor_name
95
+ # - If not present, don't drop Parsivels first two bins
96
+ sensor_name = attrs.get("sensor_name", "")
101
97
 
98
+ # ---------------------------------------------------------------------------
102
99
  # Retrieve sample interval
103
100
  # --> sample_interval is a coordinate of L0C products
104
101
  if "sample_interval" in ds:
@@ -107,39 +104,52 @@ def generate_l1(
107
104
  # This line is not called in the DISDRODB processing chain !
108
105
  sample_interval = infer_sample_interval(ds, verbose=False)
109
106
 
110
- # Re-add sample interval as coordinate (in seconds)
111
- ds = add_sample_interval(ds, sample_interval=sample_interval)
112
-
113
107
  # ---------------------------------------------------------------------------
114
108
  # Retrieve ENV dataset or take defaults
115
109
  # --> Used only for Beard fall velocity currently !
116
110
  ds_env = load_env_dataset(ds)
117
111
 
112
+ # ---------------------------------------------------------------------------
113
+ # Initialize L1 dataset
114
+ ds_l1 = xr.Dataset()
115
+
116
+ # Add raw_drop_number variable to L1 dataset
117
+ ds_l1["raw_drop_number"] = ds["raw_drop_number"]
118
+
119
+ # Add sample interval as coordinate (in seconds)
120
+ ds_l1 = add_sample_interval(ds_l1, sample_interval=sample_interval)
121
+
122
+ # Add L0C coordinates that might got lost
123
+ if "time_qc" in ds_l1:
124
+ ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
125
+
118
126
  # -------------------------------------------------------------------------------------------
119
127
  # Filter dataset by diameter and velocity bins
128
+ if sensor_name in ["PARSIVEL", "PARSIVEL2"]:
129
+ # - Remove first two bins because never reports data !
130
+ # - If not removed, can alter e.g. L2M model fitting
131
+ ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=0.312) # it includes the 0.2495-0.3745 bin
132
+
120
133
  # - Filter diameter bins
121
- ds = filter_diameter_bins(ds=ds, minimum_diameter=minimum_diameter, maximum_diameter=maximum_diameter)
134
+ ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=minimum_diameter, maximum_diameter=maximum_diameter)
122
135
  # - Filter velocity bins
123
136
  if has_velocity_dimension:
124
- ds = filter_velocity_bins(ds=ds, minimum_velocity=minimum_velocity, maximum_velocity=maximum_velocity)
137
+ ds_l1 = filter_velocity_bins(ds=ds_l1, minimum_velocity=minimum_velocity, maximum_velocity=maximum_velocity)
125
138
 
126
139
  # -------------------------------------------------------------------------------------------
127
140
  # Compute fall velocity
128
- fall_velocity = get_raindrop_fall_velocity(
129
- diameter=ds["diameter_bin_center"],
141
+ ds_l1["fall_velocity"] = get_raindrop_fall_velocity(
142
+ diameter=ds_l1["diameter_bin_center"],
130
143
  method=fall_velocity_method,
131
144
  ds_env=ds_env, # mm
132
145
  )
133
146
 
134
- # Add fall velocity
135
- ds_l1["fall_velocity"] = fall_velocity
136
-
137
147
  # -------------------------------------------------------------------------------------------
138
148
  # Define filtering mask according to fall velocity
139
149
  if has_velocity_dimension:
140
150
  mask = define_spectrum_mask(
141
- drop_number=ds["raw_drop_number"],
142
- fall_velocity=fall_velocity,
151
+ drop_number=ds_l1["raw_drop_number"],
152
+ fall_velocity=ds_l1["fall_velocity"],
143
153
  above_velocity_fraction=above_velocity_fraction,
144
154
  above_velocity_tolerance=above_velocity_tolerance,
145
155
  below_velocity_fraction=below_velocity_fraction,
@@ -152,14 +162,14 @@ def generate_l1(
152
162
  # -------------------------------------------------------------------------------------------
153
163
  # Retrieve drop number and drop_counts arrays
154
164
  if has_velocity_dimension:
155
- drop_number = ds["raw_drop_number"].where(mask) # 2D (diameter, velocity)
165
+ drop_number = ds_l1["raw_drop_number"].where(mask) # 2D (diameter, velocity)
156
166
  drop_counts = drop_number.sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
157
- drop_counts_raw = ds["raw_drop_number"].sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
167
+ drop_counts_raw = ds_l1["raw_drop_number"].sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
158
168
 
159
169
  else:
160
- drop_number = ds["raw_drop_number"] # 1D (diameter)
161
- drop_counts = ds["raw_drop_number"] # 1D (diameter)
162
- drop_counts_raw = ds["raw_drop_number"]
170
+ drop_number = ds_l1["raw_drop_number"] # 1D (diameter)
171
+ drop_counts = ds_l1["raw_drop_number"] # 1D (diameter)
172
+ drop_counts_raw = ds_l1["raw_drop_number"]
163
173
 
164
174
  # Add drop number and drop_counts
165
175
  ds_l1["drop_number"] = drop_number
@@ -173,30 +183,21 @@ def generate_l1(
173
183
  ds_l1["Dmin"] = min_drop_diameter
174
184
  ds_l1["Dmax"] = max_drop_diameter
175
185
  ds_l1["N"] = drop_counts.sum(dim=DIAMETER_DIMENSION)
176
- ds_l1["Nremoved"] = drop_counts_raw.sum(dim=DIAMETER_DIMENSION) - ds_l1["N"]
186
+ ds_l1["Nraw"] = drop_counts_raw.sum(dim=DIAMETER_DIMENSION)
187
+ ds_l1["Nremoved"] = ds_l1["Nraw"] - ds_l1["N"]
177
188
 
178
189
  # Add bins statistics
179
- ds_l1.update(compute_qc_bins_metrics(ds_l1))
190
+ ds_l1 = add_bins_metrics(ds_l1)
180
191
 
181
192
  # -------------------------------------------------------------------------------------------
182
193
  # Add quality flags
183
194
  # TODO: snow_flags, insects_flag, ...
184
195
 
185
- # -------------------------------------------------------------------------------------------
186
- #### Add L0C coordinates that might got lost
187
- if "time_qc" in ds:
188
- ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
189
-
190
196
  #### ----------------------------------------------------------------------------.
191
- #### Add encodings and attributes
192
- # Add variables attributes
193
- attrs_dict = get_attrs_dict()
194
- ds_l1 = set_attrs(ds_l1, attrs_dict=attrs_dict)
195
-
196
- # Add variables encoding
197
- encoding_dict = get_encoding_dict()
198
- ds_l1 = set_encodings(ds_l1, encoding_dict=encoding_dict)
199
-
197
+ #### Finalize dataset
200
198
  # Add global attributes
201
199
  ds_l1.attrs = attrs
200
+
201
+ # Add variables attributes and encodings
202
+ ds_l1 = finalize_product(ds_l1, product="L1")
202
203
  return ds_l1
disdrodb/l1/resampling.py CHANGED
@@ -15,12 +15,11 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Utilities for temporal resampling."""
18
-
19
-
18
+ import numpy as np
20
19
  import pandas as pd
21
20
  import xarray as xr
22
21
 
23
- from disdrodb.utils.time import regularize_dataset
22
+ from disdrodb.utils.time import ensure_sample_interval_in_seconds, regularize_dataset
24
23
 
25
24
  DEFAULT_ACCUMULATIONS = ["10s", "30s", "1min", "2min", "5min", "10min", "30min", "1hour"]
26
25
 
@@ -96,6 +95,24 @@ def define_window_size(sample_interval, accumulation_interval):
96
95
  return window_size
97
96
 
98
97
 
98
+ def _resample(ds, variables, accumulation, op):
99
+ if not variables:
100
+ return {}
101
+ ds_subset = ds[variables]
102
+ if "time" in ds_subset.dims:
103
+ return getattr(ds_subset.resample({"time": accumulation}), op)(skipna=False)
104
+ return ds_subset
105
+
106
+
107
+ def _rolling(ds, variables, window_size, op):
108
+ if not variables:
109
+ return {}
110
+ ds_subset = ds[variables]
111
+ if "time" in ds_subset.dims:
112
+ return getattr(ds_subset.rolling(time=window_size, center=False), op)(skipna=False)
113
+ return ds_subset
114
+
115
+
99
116
  def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
100
117
  """
101
118
  Resample the dataset to a specified accumulation interval.
@@ -128,20 +145,61 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
128
145
  - The function updates the dataset attributes and the sample_interval coordinate.
129
146
 
130
147
  """
131
- # Retrieve attributes
132
- attrs = ds.attrs.copy()
133
-
134
- # TODO: here infill NaN with zero if necessary before regularizing !
148
+ # --------------------------------------------------------------------------.
149
+ # Ensure sample interval in seconds
150
+ sample_interval = int(ensure_sample_interval_in_seconds(sample_interval))
151
+
152
+ # --------------------------------------------------------------------------.
153
+ # Raise error if the accumulation_interval is less than the sample interval
154
+ if accumulation_interval < sample_interval:
155
+ raise ValueError("Expecting an accumulation_interval > sample interval.")
156
+ # Raise error if accumulation_interval is not multiple of sample_interval
157
+ if not accumulation_interval % sample_interval == 0:
158
+ raise ValueError("The accumulation_interval is not a multiple of sample interval.")
159
+
160
+ # --------------------------------------------------------------------------.
161
+ #### Preprocess the dataset
162
+ # Here we set NaN in the raw_drop_number to 0
163
+ # - We assume that NaN corresponds to 0
164
+ # - When we regularize, we infill with NaN
165
+ # - When we aggregate with sum, we don't skip NaN
166
+ # --> Aggregation with original missing timesteps currently results in NaN !
167
+
168
+ # Infill NaN values with zeros for drop_number and raw_drop_number
169
+ # - This might alter integrated statistics if NaN in spectrum does not actually correspond to 0 !
170
+ # - TODO: NaN should not be set as 0 !
171
+ for var in ["drop_number", "raw_drop_number"]:
172
+ if var in ds:
173
+ ds[var] = xr.where(np.isnan(ds[var]), 0, ds[var])
135
174
 
136
175
  # Ensure regular dataset without missing timesteps
176
+ # --> This adds NaN values for missing timesteps
137
177
  ds = regularize_dataset(ds, freq=f"{sample_interval}s")
138
178
 
179
+ # --------------------------------------------------------------------------.
180
+ # Define dataset attributes
181
+ attrs = ds.attrs.copy()
182
+ if rolling:
183
+ attrs["disdrodb_rolled_product"] = "True"
184
+ else:
185
+ attrs["disdrodb_rolled_product"] = "False"
186
+
187
+ if sample_interval == accumulation_interval:
188
+ attrs["disdrodb_aggregated_product"] = "False"
189
+ ds = add_sample_interval(ds, sample_interval=accumulation_interval)
190
+ ds.attrs = attrs
191
+ return ds
192
+
193
+ # --------------------------------------------------------------------------.
194
+ # Resample the dataset
195
+ attrs["disdrodb_aggregated_product"] = "True"
196
+
139
197
  # Initialize resample dataset
140
198
  ds_resampled = xr.Dataset()
141
199
 
142
200
  # Retrieve variables to average/sum
143
201
  var_to_average = ["fall_velocity"]
144
- var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nremoved"]
202
+ var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nraw", "Nremoved"]
145
203
  var_to_min = ["Dmin"]
146
204
  var_to_max = ["Dmax"]
147
205
 
@@ -154,6 +212,7 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
154
212
  # TODO Define custom processing
155
213
  # - quality_flag --> take worst
156
214
  # - skipna if less than fraction (to not waste lot of data when aggregating over i.e. hours)
215
+ # - Add tolerance on fraction of missing timesteps for large accumulation_intervals
157
216
 
158
217
  # Resample the dataset
159
218
  # - Rolling currently does not allow direct rolling forward.
@@ -163,74 +222,26 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
163
222
  # - https://github.com/pydata/xarray/issues/8958
164
223
  if not rolling:
165
224
  # Resample
166
- if len(var_to_average) > 0:
167
- ds_resampled.update(
168
- ds[var_to_average].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).mean(skipna=False),
169
- )
170
- if len(var_to_cumulate) > 0:
171
- ds_resampled.update(
172
- ds[var_to_cumulate].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).sum(skipna=False),
173
- )
174
- if len(var_to_min) > 0:
175
- ds_resampled.update(
176
- ds[var_to_min].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).min(skipna=False),
177
- )
178
- if len(var_to_max) > 0:
179
- ds_resampled.update(
180
- ds[var_to_max].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).max(skipna=False),
181
- )
182
-
225
+ accumulation = pd.Timedelta(seconds=accumulation_interval)
226
+ ds_resampled.update(_resample(ds=ds, variables=var_to_average, accumulation=accumulation, op="mean"))
227
+ ds_resampled.update(_resample(ds=ds, variables=var_to_cumulate, accumulation=accumulation, op="sum"))
228
+ ds_resampled.update(_resample(ds=ds, variables=var_to_min, accumulation=accumulation, op="min"))
229
+ ds_resampled.update(_resample(ds=ds, variables=var_to_max, accumulation=accumulation, op="max"))
183
230
  else:
184
231
  # Roll and Resample
185
232
  window_size = define_window_size(sample_interval=sample_interval, accumulation_interval=accumulation_interval)
186
- if len(var_to_average) > 0:
187
- ds_resampled.update(ds[var_to_average].rolling({"time": window_size}, center=False).mean(skipna=False))
188
- if len(var_to_cumulate) > 0:
189
- ds_resampled.update(ds[var_to_cumulate].rolling({"time": window_size}, center=False).sum(skipna=False))
190
-
191
- if len(var_to_min) > 0:
192
- ds_resampled.update(ds[var_to_min].rolling({"time": window_size}, center=False).min(skipna=False))
193
- if len(var_to_max) > 0:
194
- ds_resampled.update(ds[var_to_max].rolling({"time": window_size}, center=False).max(skipna=False))
195
- # Ensure time to correspond to the start time of the integration
233
+ ds_resampled.update(_rolling(ds=ds, variables=var_to_average, window_size=window_size, op="mean"))
234
+ ds_resampled.update(_rolling(ds=ds, variables=var_to_cumulate, window_size=window_size, op="sum"))
235
+ ds_resampled.update(_rolling(ds=ds, variables=var_to_min, window_size=window_size, op="min"))
236
+ ds_resampled.update(_rolling(ds=ds, variables=var_to_max, window_size=window_size, op="max"))
237
+ # Ensure time to correspond to the start time of the measurement period
196
238
  ds_resampled = ds_resampled.isel(time=slice(window_size - 1, None)).assign_coords(
197
239
  {"time": ds_resampled["time"].data[: -window_size + 1]},
198
240
  )
199
241
 
200
242
  # Add attributes
201
243
  ds_resampled.attrs = attrs
202
- if rolling:
203
- ds_resampled.attrs["rolled"] = "True"
204
- else:
205
- ds_resampled.attrs["rolled"] = "False"
206
244
 
207
245
  # Add accumulation_interval as new sample_interval coordinate
208
246
  ds_resampled = add_sample_interval(ds_resampled, sample_interval=accumulation_interval)
209
247
  return ds_resampled
210
-
211
-
212
- def get_possible_accumulations(sample_interval, accumulations=None):
213
- """
214
- Get a list of valid accumulation intervals based on the sampling time.
215
-
216
- Parameters
217
- ----------
218
- - sample_interval (int): The inferred sampling time in seconds.
219
- - accumulations (list of int or string): List of desired accumulation intervals.
220
- If provide integers, specify accumulation in seconds.
221
-
222
- Returns
223
- -------
224
- - list of int: Valid accumulation intervals in seconds.
225
- """
226
- # Select default accumulations
227
- if accumulations is None:
228
- accumulations = DEFAULT_ACCUMULATIONS
229
-
230
- # Get accumulations in seconds
231
- accumulations = [int(pd.Timedelta(acc).total_seconds()) if isinstance(acc, str) else acc for acc in accumulations]
232
-
233
- # Filter candidate accumulations to include only those that are multiples of the sampling time
234
- possible_accumulations = [acc for acc in accumulations if acc % sample_interval == 0]
235
-
236
- return possible_accumulations
@@ -15,9 +15,10 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Core functions for DISDRODB ENV production."""
18
-
19
18
  import xarray as xr
20
19
 
20
+ from disdrodb.constants import GEOLOCATION_COORDS
21
+
21
22
 
22
23
  def get_default_environment_dataset():
23
24
  """Define defaults values for the ENV dataset."""
@@ -30,9 +31,23 @@ def get_default_environment_dataset():
30
31
  return ds_env
31
32
 
32
33
 
34
+ def _assign_geolocation(ds_src, dst_dst):
35
+
36
+ dict_coords = {coord: ds_src[coord] for coord in GEOLOCATION_COORDS if coord in ds_src}
37
+ dst_dst = dst_dst.assign_coords(dict_coords)
38
+ return dst_dst
39
+
40
+
33
41
  def load_env_dataset(ds):
34
42
  """Load the ENV dataset."""
35
- # TODO - Retrieve relative_humidity and temperature from L1-ENV
43
+ # TODO: Retrieve relative_humidity and temperature from L1-ENV
36
44
  ds_env = get_default_environment_dataset()
37
- ds_env = ds_env.assign_coords({"altitude": ds["altitude"], "latitude": ds["latitude"]})
45
+ # Compute water density
46
+ # get_water_density(
47
+ # temperature=temperature,
48
+ # air_pressure=air_pressure,
49
+ # )
50
+ # --> (T == 10 --> 999.7, T == 20 --> 998.2
51
+ ds_env["water_density"] = 1000 # kg / m3 # TODO as function of ENV (temperature, ...) ?
52
+ ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env)
38
53
  return ds_env
disdrodb/l2/__init__.py CHANGED
@@ -15,3 +15,10 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Module for DISDRODB L2 production."""
18
+ from disdrodb.l2.processing import generate_l2_radar, generate_l2e, generate_l2m
19
+
20
+ __all__ = [
21
+ "generate_l2_radar",
22
+ "generate_l2e",
23
+ "generate_l2m",
24
+ ]
@@ -23,8 +23,8 @@ Infinite values should be removed beforehand or otherwise are propagated through
23
23
  import numpy as np
24
24
  import xarray as xr
25
25
 
26
- from disdrodb import DIAMETER_DIMENSION, VELOCITY_DIMENSION
27
26
  from disdrodb.api.checks import check_sensor_name
27
+ from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
28
28
  from disdrodb.utils.xarray import (
29
29
  remove_diameter_coordinates,
30
30
  remove_velocity_coordinates,
@@ -66,7 +66,7 @@ def get_drop_average_velocity(drop_number):
66
66
  ----------
67
67
  drop_number : xarray.DataArray
68
68
  Array of drop counts \\( n(D,v) \\) per diameter (and velocity, if available) bins
69
- over the time integration period.
69
+ over the measurement interval.
70
70
  The DataArray must have the ``velocity_bin_center`` coordinate.
71
71
 
72
72
  Returns
@@ -80,6 +80,7 @@ def get_drop_average_velocity(drop_number):
80
80
  dim=VELOCITY_DIMENSION,
81
81
  skipna=False,
82
82
  )
83
+ average_velocity.name = "average_velocity"
83
84
  return average_velocity
84
85
 
85
86
 
@@ -138,6 +139,9 @@ def _compute_qc_bins_metrics(arr):
138
139
  return output
139
140
 
140
141
 
142
+ BINS_METRICS = ["Nbins", "Nbins_missing", "Nbins_missing_fraction", "Nbins_missing_consecutive"]
143
+
144
+
141
145
  def compute_qc_bins_metrics(ds):
142
146
  """
143
147
  Compute quality-control metrics for drop-count bins along the diameter dimension.
@@ -191,11 +195,19 @@ def compute_qc_bins_metrics(ds):
191
195
  )
192
196
 
193
197
  # Assign meaningful labels to the qc 'metric' dimension
194
- variables = ["Nbins", "Nbins_missing", "Nbins_missing_fraction", "Nbins_missing_consecutive"]
195
- ds_qc_bins = da_qc_bins.assign_coords(metric=variables).to_dataset(dim="metric")
198
+ ds_qc_bins = da_qc_bins.assign_coords(metric=BINS_METRICS).to_dataset(dim="metric")
196
199
  return ds_qc_bins
197
200
 
198
201
 
202
+ def add_bins_metrics(ds):
203
+ """Add bin metrics if missing."""
204
+ bins_metrics = BINS_METRICS
205
+ if not np.all(np.isin(bins_metrics, list(ds.data_vars))):
206
+ # Add bins statistics
207
+ ds.update(compute_qc_bins_metrics(ds))
208
+ return ds
209
+
210
+
199
211
  ####-------------------------------------------------------------------------------------------------------------------.
200
212
  #### DSD Spectrum, Concentration, Moments
201
213
 
@@ -252,7 +264,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
252
264
  Width of each diameter bin \\( \\Delta D \\) in millimeters (mm).
253
265
  drop_number : xarray.DataArray
254
266
  Array of drop counts \\( n(D) or n(D,v) \\) per diameter (and velocity if available)
255
- bins over the time integration period.
267
+ bins over the measurement interval.
256
268
  sample_interval : float or xarray.DataArray
257
269
  Time over which the drops are counted \\( \\Delta t \\) in seconds (s).
258
270
  sampling_area : float or xarray.DataArray
@@ -277,7 +289,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
277
289
  - \\( n(D,v) \\): Number of drops counted in diameter (and velocity) bins.
278
290
  - \\( A_{\text{eff}}(D) \\): Effective sampling area of the sensor for diameter \\( D \\) in square meters (m²).
279
291
  - \\( \\Delta D \\): Diameter bin width in millimeters (mm).
280
- - \\( \\Delta t \\): Time integration period in seconds (s).
292
+ - \\( \\Delta t \\): Measurement interval in seconds (s).
281
293
  - \\( v(D) \\): Fall velocity of drops in diameter bin \\( D \\) in meters per second (m/s).
282
294
 
283
295
  The effective sampling area \\( A_{\text{eff}}(D) \\) depends on the sensor and may vary with drop diameter.
@@ -919,8 +931,7 @@ def get_min_max_diameter(drop_counts):
919
931
  return min_drop_diameter, max_drop_diameter
920
932
 
921
933
 
922
- def get_mode_diameter(drop_number_concentration, diameter):
923
- """Get raindrop diameter with highest occurrence."""
934
+ def _get_mode_diameter(drop_number_concentration, diameter):
924
935
  # If all NaN, set to 0 otherwise argmax fail when all NaN data
925
936
  idx_all_nan_mask = np.isnan(drop_number_concentration).all(dim=DIAMETER_DIMENSION)
926
937
  drop_number_concentration = drop_number_concentration.where(~idx_all_nan_mask, 0)
@@ -939,6 +950,43 @@ def get_mode_diameter(drop_number_concentration, diameter):
939
950
  return diameter_mode
940
951
 
941
952
 
953
+ def get_mode_diameter(
954
+ drop_number_concentration,
955
+ diameter,
956
+ ):
957
+ """Get raindrop diameter with highest occurrence.
958
+
959
+ Parameters
960
+ ----------
961
+ drop_number_concentration : xarray.DataArray
962
+ The drop number concentration N(D) for each diameter bin, typically in units of
963
+ number per cubic meter per millimeter (m⁻³·mm⁻¹).
964
+ diameter : xarray.DataArray
965
+ The equivalent volume diameters D of the drops in each bin, in meters (m).
966
+
967
+ Returns
968
+ -------
969
+ xarray.DataArray
970
+ The diameter with the highest drop number concentration.
971
+ """
972
+ # Use map_blocks if working with Dask arrays
973
+ if hasattr(drop_number_concentration.data, "chunks"):
974
+ # Define the template for output
975
+ template = remove_diameter_coordinates(drop_number_concentration.isel({DIAMETER_DIMENSION: 0}))
976
+ diameter_mode = xr.map_blocks(
977
+ _get_mode_diameter,
978
+ drop_number_concentration,
979
+ kwargs={"diameter": diameter.compute()},
980
+ template=template,
981
+ )
982
+ else:
983
+ diameter_mode = _get_mode_diameter(
984
+ drop_number_concentration=drop_number_concentration,
985
+ diameter=diameter,
986
+ )
987
+ return diameter_mode
988
+
989
+
942
990
  ####-------------------------------------------------------------------------------------------------------------------.
943
991
  #### Mass Distribution Diameters
944
992
 
@@ -1369,7 +1417,7 @@ def get_normalized_intercept_parameter_from_moments(moment_3, moment_4):
1369
1417
  [m⁻³·mm³] (number per cubic meter times diameter cubed).
1370
1418
 
1371
1419
  moment_4 : float or array-like
1372
- The foruth moment of the drop size distribution, \\( M_3 \\), in units of
1420
+ The fourth moment of the drop size distribution, \\( M_3 \\), in units of
1373
1421
  [m⁻³·mm4].
1374
1422
 
1375
1423
  Returns
@@ -1581,7 +1629,7 @@ def get_kinetic_energy_variables_from_drop_number(
1581
1629
  - \\( D_i \\) is the diameter of bin \\( i \\).
1582
1630
  - \\( v_j \\) is the velocity of bin \\( j \\).
1583
1631
  - \\( A \\) is the sampling area.
1584
- - \\( \\Delta t \\) is the time integration period in seconds.
1632
+ - \\( \\Delta t \\) is the measurement interval in seconds.
1585
1633
  - \\( R \\) is the rainfall rate in mm/hr.
1586
1634
 
1587
1635
  """