disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. disdrodb/__init__.py +1 -5
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -3
  4. disdrodb/api/checks.py +10 -0
  5. disdrodb/api/create_directories.py +0 -2
  6. disdrodb/api/io.py +14 -17
  7. disdrodb/api/path.py +42 -77
  8. disdrodb/api/search.py +89 -23
  9. disdrodb/cli/disdrodb_create_summary.py +11 -1
  10. disdrodb/cli/disdrodb_create_summary_station.py +10 -0
  11. disdrodb/cli/disdrodb_run_l0.py +1 -1
  12. disdrodb/cli/disdrodb_run_l0a.py +1 -1
  13. disdrodb/cli/disdrodb_run_l0b.py +1 -1
  14. disdrodb/cli/disdrodb_run_l0c.py +1 -1
  15. disdrodb/cli/disdrodb_run_l1.py +1 -1
  16. disdrodb/cli/disdrodb_run_l2e.py +1 -1
  17. disdrodb/cli/disdrodb_run_l2m.py +1 -1
  18. disdrodb/configs.py +30 -83
  19. disdrodb/constants.py +4 -3
  20. disdrodb/data_transfer/download_data.py +4 -2
  21. disdrodb/docs.py +2 -2
  22. disdrodb/etc/products/L1/1MIN.yaml +13 -0
  23. disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
  24. disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
  25. disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
  26. disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
  27. disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
  28. disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
  29. disdrodb/etc/products/L1/global.yaml +7 -1
  30. disdrodb/etc/products/L2E/10MIN.yaml +1 -12
  31. disdrodb/etc/products/L2E/5MIN.yaml +1 -0
  32. disdrodb/etc/products/L2E/global.yaml +1 -1
  33. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
  34. disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
  35. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
  36. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
  37. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
  38. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
  39. disdrodb/etc/products/L2M/global.yaml +11 -3
  40. disdrodb/l0/check_configs.py +49 -16
  41. disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
  42. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
  43. disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
  44. disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
  45. disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
  46. disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
  47. disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
  48. disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
  49. disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
  50. disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
  51. disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
  52. disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
  53. disdrodb/l0/l0_reader.py +2 -2
  54. disdrodb/l0/l0b_processing.py +70 -15
  55. disdrodb/l0/l0c_processing.py +7 -3
  56. disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
  57. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
  58. disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
  59. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
  60. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
  61. disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
  62. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
  63. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
  64. disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
  65. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
  66. disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
  67. disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
  68. disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
  69. disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
  70. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
  71. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
  72. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
  73. disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
  74. disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
  75. disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
  76. disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
  77. disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
  78. disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
  79. disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
  80. disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
  81. disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
  82. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
  83. disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
  84. disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
  85. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
  86. disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
  87. disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
  88. disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
  89. disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
  90. disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
  91. disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
  92. disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
  93. disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
  94. disdrodb/l1/beard_model.py +31 -129
  95. disdrodb/l1/fall_velocity.py +136 -83
  96. disdrodb/l1/filters.py +25 -28
  97. disdrodb/l1/processing.py +16 -17
  98. disdrodb/l1/resampling.py +101 -38
  99. disdrodb/l1_env/routines.py +46 -17
  100. disdrodb/l2/empirical_dsd.py +6 -0
  101. disdrodb/l2/processing.py +6 -5
  102. disdrodb/metadata/geolocation.py +0 -2
  103. disdrodb/metadata/search.py +3 -4
  104. disdrodb/psd/fitting.py +16 -13
  105. disdrodb/routines/l0.py +2 -2
  106. disdrodb/routines/l1.py +173 -60
  107. disdrodb/routines/l2.py +148 -284
  108. disdrodb/routines/options.py +345 -0
  109. disdrodb/routines/wrappers.py +14 -1
  110. disdrodb/scattering/axis_ratio.py +90 -84
  111. disdrodb/scattering/permittivity.py +6 -0
  112. disdrodb/summary/routines.py +735 -670
  113. disdrodb/utils/archiving.py +51 -44
  114. disdrodb/utils/attrs.py +3 -1
  115. disdrodb/utils/dask.py +4 -4
  116. disdrodb/utils/dict.py +33 -0
  117. disdrodb/utils/encoding.py +6 -1
  118. disdrodb/utils/routines.py +9 -8
  119. disdrodb/utils/time.py +11 -3
  120. disdrodb/viz/__init__.py +0 -13
  121. disdrodb/viz/plots.py +231 -1
  122. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
  123. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
  124. /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
  125. /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
  126. /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
  127. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
  128. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
  129. /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
  130. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
  131. /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
  132. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
  133. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
  134. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
  135. {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0
disdrodb/l1/processing.py CHANGED
@@ -19,8 +19,8 @@
19
19
  import xarray as xr
20
20
 
21
21
  from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
22
- from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity
23
- from disdrodb.l1.filters import define_spectrum_mask, filter_diameter_bins, filter_velocity_bins
22
+ from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity_from_ds
23
+ from disdrodb.l1.filters import define_raindrop_spectrum_mask, filter_diameter_bins, filter_velocity_bins
24
24
  from disdrodb.l1.resampling import add_sample_interval
25
25
  from disdrodb.l1_env.routines import load_env_dataset
26
26
  from disdrodb.l2.empirical_dsd import ( # TODO: maybe move out of L2
@@ -34,7 +34,7 @@ from disdrodb.utils.writer import finalize_product
34
34
  def generate_l1(
35
35
  ds,
36
36
  # Fall velocity option
37
- fall_velocity_method="Beard1976",
37
+ fall_velocity_model="Beard1976",
38
38
  # Diameter-Velocity Filtering Options
39
39
  minimum_diameter=0,
40
40
  maximum_diameter=10,
@@ -54,7 +54,7 @@ def generate_l1(
54
54
  ----------
55
55
  ds : xarray.Dataset
56
56
  DISDRODB L0C dataset.
57
- fall_velocity_method : str, optional
57
+ fall_velocity_model : str, optional
58
58
  Method to compute fall velocity.
59
59
  The default method is ``"Beard1976"``.
60
60
  minimum_diameter : float, optional
@@ -106,7 +106,9 @@ def generate_l1(
106
106
 
107
107
  # ---------------------------------------------------------------------------
108
108
  # Retrieve ENV dataset or take defaults
109
- # --> Used only for Beard fall velocity currently !
109
+ # - Used only for Beard fall velocity currently !
110
+ # - It checks and includes default geolocation if missing
111
+ # - For mobile disdrometer, infill missing geolocation with backward and forward filling
110
112
  ds_env = load_env_dataset(ds)
111
113
 
112
114
  # ---------------------------------------------------------------------------
@@ -119,16 +121,18 @@ def generate_l1(
119
121
  # Add sample interval as coordinate (in seconds)
120
122
  ds_l1 = add_sample_interval(ds_l1, sample_interval=sample_interval)
121
123
 
122
- # Add L0C coordinates that might got lost
123
- if "time_qc" in ds_l1:
124
- ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
124
+ # Add optional variables to L1 dataset
125
+ optional_variables = ["time_qc", "qc_resampling"]
126
+ for var in optional_variables:
127
+ if var in ds:
128
+ ds_l1[var] = ds[var]
125
129
 
126
130
  # -------------------------------------------------------------------------------------------
127
131
  # Filter dataset by diameter and velocity bins
128
132
  if sensor_name in ["PARSIVEL", "PARSIVEL2"]:
129
133
  # - Remove first two bins because never reports data !
130
134
  # - If not removed, can alter e.g. L2M model fitting
131
- ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=0.312) # it includes the 0.2495-0.3745 bin
135
+ ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=0.2495) # it includes the 0.2495-0.3745 bin
132
136
 
133
137
  # - Filter diameter bins
134
138
  ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=minimum_diameter, maximum_diameter=maximum_diameter)
@@ -138,16 +142,12 @@ def generate_l1(
138
142
 
139
143
  # -------------------------------------------------------------------------------------------
140
144
  # Compute fall velocity
141
- ds_l1["fall_velocity"] = get_raindrop_fall_velocity(
142
- diameter=ds_l1["diameter_bin_center"],
143
- method=fall_velocity_method,
144
- ds_env=ds_env, # mm
145
- )
145
+ ds_l1["fall_velocity"] = get_raindrop_fall_velocity_from_ds(ds=ds_l1, ds_env=ds_env, model=fall_velocity_model)
146
146
 
147
147
  # -------------------------------------------------------------------------------------------
148
148
  # Define filtering mask according to fall velocity
149
149
  if has_velocity_dimension:
150
- mask = define_spectrum_mask(
150
+ mask = define_raindrop_spectrum_mask(
151
151
  drop_number=ds_l1["raw_drop_number"],
152
152
  fall_velocity=ds_l1["fall_velocity"],
153
153
  above_velocity_fraction=above_velocity_fraction,
@@ -162,10 +162,9 @@ def generate_l1(
162
162
  # -------------------------------------------------------------------------------------------
163
163
  # Retrieve drop number and drop_counts arrays
164
164
  if has_velocity_dimension:
165
- drop_number = ds_l1["raw_drop_number"].where(mask) # 2D (diameter, velocity)
165
+ drop_number = ds_l1["raw_drop_number"].where(mask, 0) # 2D (diameter, velocity)
166
166
  drop_counts = drop_number.sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
167
167
  drop_counts_raw = ds_l1["raw_drop_number"].sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
168
-
169
168
  else:
170
169
  drop_number = ds_l1["raw_drop_number"] # 1D (diameter)
171
170
  drop_counts = ds_l1["raw_drop_number"] # 1D (diameter)
disdrodb/l1/resampling.py CHANGED
@@ -19,9 +19,12 @@ import numpy as np
19
19
  import pandas as pd
20
20
  import xarray as xr
21
21
 
22
- from disdrodb.utils.time import ensure_sample_interval_in_seconds, regularize_dataset
23
-
24
- DEFAULT_ACCUMULATIONS = ["10s", "30s", "1min", "2min", "5min", "10min", "30min", "1hour"]
22
+ from disdrodb.utils.time import (
23
+ ensure_sample_interval_in_seconds,
24
+ get_dataset_start_end_time,
25
+ get_sampling_information,
26
+ regularize_dataset,
27
+ )
25
28
 
26
29
 
27
30
  def add_sample_interval(ds, sample_interval):
@@ -95,6 +98,27 @@ def define_window_size(sample_interval, accumulation_interval):
95
98
  return window_size
96
99
 
97
100
 
101
+ def _finalize_qc_resampling(ds, sample_interval, accumulation_interval):
102
+ # Compute qc_resampling
103
+ # - 0 if not missing timesteps
104
+ # - 1 if all timesteps missing
105
+ n_timesteps = accumulation_interval / sample_interval
106
+ ds["qc_resampling"] = np.round(1 - ds["qc_resampling"] / n_timesteps, 1)
107
+ ds["qc_resampling"].attrs = {
108
+ "long_name": "Resampling Quality Control Flag",
109
+ "standard_name": "quality_flag",
110
+ "units": "",
111
+ "valid_min": 0.0,
112
+ "valid_max": 1.0,
113
+ "description": (
114
+ "Fraction of timesteps missing when resampling the data."
115
+ "0 = No timesteps missing; 1 = All timesteps missing;"
116
+ "Intermediate values indicate partial data coverage."
117
+ ),
118
+ }
119
+ return ds
120
+
121
+
98
122
  def _resample(ds, variables, accumulation, op):
99
123
  if not variables:
100
124
  return {}
@@ -113,23 +137,24 @@ def _rolling(ds, variables, window_size, op):
113
137
  return ds_subset
114
138
 
115
139
 
116
- def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
140
+ def resample_dataset(ds, sample_interval, temporal_resolution):
117
141
  """
118
142
  Resample the dataset to a specified accumulation interval.
119
143
 
144
+ The output timesteps correspond to the starts of the periods over which
145
+ the resampling operation has been performed !
146
+
120
147
  Parameters
121
148
  ----------
122
149
  ds : xarray.Dataset
123
150
  The input dataset to be resampled.
124
151
  sample_interval : int
125
- The sample interval of the input dataset.
126
- accumulation_interval : int
127
- The interval in seconds over which to accumulate the data.
128
- rolling : bool, optional
129
- If True, apply a rolling window before resampling. Default is True.
130
- If True, forward rolling is performed.
131
- The output timesteps correspond to the starts of the periods over which
132
- the resampling operation has been performed !
152
+ The sample interval (in seconds) of the input dataset.
153
+ temporal_resolution : str
154
+ The desired temporal resolution for resampling.
155
+ It should be a string representing the accumulation interval,
156
+ e.g., "5MIN" for 5 minutes, "1H" for 1 hour, "30S" for 30 seconds, etc.
157
+ Prefixed with "ROLL" for rolling resampling, e.g., "ROLL5MIN".
133
158
 
134
159
  Returns
135
160
  -------
@@ -149,6 +174,9 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
149
174
  # Ensure sample interval in seconds
150
175
  sample_interval = int(ensure_sample_interval_in_seconds(sample_interval))
151
176
 
177
+ # Retrieve accumulation_interval and rolling option
178
+ accumulation_interval, rolling = get_sampling_information(temporal_resolution)
179
+
152
180
  # --------------------------------------------------------------------------.
153
181
  # Raise error if the accumulation_interval is less than the sample interval
154
182
  if accumulation_interval < sample_interval:
@@ -157,51 +185,78 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
157
185
  if not accumulation_interval % sample_interval == 0:
158
186
  raise ValueError("The accumulation_interval is not a multiple of sample interval.")
159
187
 
188
+ # Retrieve input dataset start_time and end_time
189
+ start_time, end_time = get_dataset_start_end_time(ds, time_dim="time")
190
+
191
+ # Initialize qc_resampling
192
+ ds["qc_resampling"] = xr.ones_like(ds["time"], dtype="float")
193
+
194
+ # Retrieve dataset attributes
195
+ attrs = ds.attrs.copy()
196
+
197
+ # If no resampling, return as it is
198
+ if sample_interval == accumulation_interval:
199
+ attrs["disdrodb_aggregated_product"] = "False"
200
+ attrs["disdrodb_rolled_product"] = "False"
201
+ attrs["disdrodb_temporal_resolution"] = temporal_resolution
202
+
203
+ ds = _finalize_qc_resampling(ds, sample_interval=sample_interval, accumulation_interval=accumulation_interval)
204
+ ds = add_sample_interval(ds, sample_interval=accumulation_interval)
205
+ ds.attrs = attrs
206
+ return ds
207
+
160
208
  # --------------------------------------------------------------------------.
161
209
  #### Preprocess the dataset
162
- # Here we set NaN in the raw_drop_number to 0
163
- # - We assume that NaN corresponds to 0
164
- # - When we regularize, we infill with NaN
210
+ # - Set timesteps with NaN in drop_number to zero (and set qc_resampling to 0)
165
211
  # - When we aggregate with sum, we don't skip NaN
166
- # --> Aggregation with original missing timesteps currently results in NaN !
212
+ # --> Resampling over missing timesteps will result in NaN drop_number and qc_resampling = 1
213
+ # --> Resampling over timesteps with NaN in drop_number will result in finite drop_number but qc_resampling > 0
214
+ # - qc_resampling will inform on the amount of timesteps missing
167
215
 
168
- # Infill NaN values with zeros for drop_number and raw_drop_number
169
- # - This might alter integrated statistics if NaN in spectrum does not actually correspond to 0 !
170
- # - TODO: NaN should not be set as 0 !
171
- for var in ["drop_number", "raw_drop_number"]:
216
+ for var in ["drop_number", "raw_drop_number", "drop_counts", "drop_number_concentration"]:
172
217
  if var in ds:
173
- ds[var] = xr.where(np.isnan(ds[var]), 0, ds[var])
218
+ dims = set(ds[var].dims) - {"time"}
219
+ invalid_timesteps = np.isnan(ds[var]).any(dim=dims)
220
+ ds[var] = ds[var].where(~invalid_timesteps, 0)
221
+ ds["qc_resampling"] = ds["qc_resampling"].where(~invalid_timesteps, 0)
222
+
223
+ if np.all(invalid_timesteps).item():
224
+ raise ValueError("No timesteps with valid spectrum.")
174
225
 
175
226
  # Ensure regular dataset without missing timesteps
176
227
  # --> This adds NaN values for missing timesteps
177
- ds = regularize_dataset(ds, freq=f"{sample_interval}s")
228
+ ds = regularize_dataset(ds, freq=f"{sample_interval}s", start_time=start_time, end_time=end_time)
229
+ ds["qc_resampling"] = ds["qc_resampling"].where(~np.isnan(ds["qc_resampling"]), 0)
178
230
 
179
231
  # --------------------------------------------------------------------------.
180
232
  # Define dataset attributes
181
- attrs = ds.attrs.copy()
182
233
  if rolling:
183
234
  attrs["disdrodb_rolled_product"] = "True"
184
235
  else:
185
236
  attrs["disdrodb_rolled_product"] = "False"
186
237
 
187
- if sample_interval == accumulation_interval:
188
- attrs["disdrodb_aggregated_product"] = "False"
189
- ds = add_sample_interval(ds, sample_interval=accumulation_interval)
190
- ds.attrs = attrs
191
- return ds
192
-
193
- # --------------------------------------------------------------------------.
194
- # Resample the dataset
195
238
  attrs["disdrodb_aggregated_product"] = "True"
239
+ attrs["disdrodb_temporal_resolution"] = temporal_resolution
196
240
 
241
+ # --------------------------------------------------------------------------.
197
242
  # Initialize resample dataset
198
243
  ds_resampled = xr.Dataset()
199
244
 
200
245
  # Retrieve variables to average/sum
246
+ # - ATTENTION: it will not resample non-dimensional time coordinates of the dataset !
201
247
  var_to_average = ["fall_velocity"]
202
- var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nraw", "Nremoved"]
248
+ var_to_cumulate = [
249
+ "raw_drop_number",
250
+ "drop_number",
251
+ "drop_counts",
252
+ "drop_number_concentration",
253
+ "N",
254
+ "Nraw",
255
+ "Nremoved",
256
+ "qc_resampling",
257
+ ]
203
258
  var_to_min = ["Dmin"]
204
- var_to_max = ["Dmax"]
259
+ var_to_max = ["Dmax", "time_qc"]
205
260
 
206
261
  # Retrieve available variables
207
262
  var_to_average = [var for var in var_to_average if var in ds]
@@ -209,11 +264,6 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
209
264
  var_to_min = [var for var in var_to_min if var in ds]
210
265
  var_to_max = [var for var in var_to_max if var in ds]
211
266
 
212
- # TODO Define custom processing
213
- # - quality_flag --> take worst
214
- # - skipna if less than fraction (to not waste lot of data when aggregating over i.e. hours)
215
- # - Add tolerance on fraction of missing timesteps for large accumulation_intervals
216
-
217
267
  # Resample the dataset
218
268
  # - Rolling currently does not allow direct rolling forward.
219
269
  # - We currently use center=False which means search for data backward (right-aligned) !
@@ -239,6 +289,19 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
239
289
  {"time": ds_resampled["time"].data[: -window_size + 1]},
240
290
  )
241
291
 
292
+ # Finalize qc_resampling
293
+ ds_resampled = _finalize_qc_resampling(
294
+ ds_resampled,
295
+ sample_interval=sample_interval,
296
+ accumulation_interval=accumulation_interval,
297
+ )
298
+ # Set to NaN timesteps where qc_resampling == 1
299
+ # --> This occurs for missing timesteps in input dataset or all NaN drop_number arrays
300
+ variables = list(set(ds_resampled.data_vars) - {"qc_resampling"})
301
+ mask_missing_timesteps = ds_resampled["qc_resampling"] != 1
302
+ for var in variables:
303
+ ds_resampled[var] = ds_resampled[var].where(mask_missing_timesteps)
304
+
242
305
  # Add attributes
243
306
  ds_resampled.attrs = attrs
244
307
 
@@ -15,39 +15,68 @@
15
15
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Core functions for DISDRODB ENV production."""
18
+ import numpy as np
18
19
  import xarray as xr
19
20
 
20
21
  from disdrodb.constants import GEOLOCATION_COORDS
22
+ from disdrodb.l0.l0b_processing import ensure_valid_geolocation
23
+ from disdrodb.utils.logger import log_warning
24
+
25
+ DEFAULT_GEOLOCATION = {
26
+ "latitude": 46.159346,
27
+ "longitude": 8.774586,
28
+ "altitude": 0,
29
+ }
21
30
 
22
31
 
23
32
  def get_default_environment_dataset():
24
33
  """Define defaults values for the ENV dataset."""
25
34
  ds_env = xr.Dataset()
26
- ds_env["sea_level_air_pressure"] = 101_325
27
- ds_env["gas_constant_dry_air"] = 287.04
28
- ds_env["lapse_rate"] = 0.0065
29
- ds_env["relative_humidity"] = 0.95 # Value between 0 and 1 !
30
- ds_env["temperature"] = 20 + 273.15
35
+ ds_env["sea_level_air_pressure"] = 101_325 # Pa
36
+ ds_env["gas_constant_dry_air"] = 287.04 # J kg⁻¹ K⁻¹
37
+ ds_env["lapse_rate"] = 0.0065 # K m⁻¹
38
+ ds_env["relative_humidity"] = 0.95 # 0-1 !
39
+ ds_env["temperature"] = 20 + 273.15 # K
40
+ ds_env["water_density"] = 1000 # kg m⁻³ (T == 10 --> 999.7, T == 20 --> 998.2)
41
+ # get_water_density(temperature=temperature, air_pressure=air_pressure
31
42
  return ds_env
32
43
 
33
44
 
34
- def _assign_geolocation(ds_src, dst_dst):
45
+ def _assign_geolocation(ds_src, dst_dst, logger=None):
46
+ dict_coords = {}
47
+ for coord in GEOLOCATION_COORDS:
48
+ if coord in ds_src:
49
+ # Check geolocation validity
50
+ ds_src = ensure_valid_geolocation(ds_src, coord=coord, errors="coerce")
51
+ # Assign valid geolocation (or default one if invalid)
52
+ if "time" not in ds_src[coord].dims:
53
+ dict_coords[coord] = ds_src[coord] if not np.isnan(ds_src[coord]) else DEFAULT_GEOLOCATION[coord]
54
+ else: # If coordinates varies over time, infill NaN over time with forward and backward filling
55
+ dict_coords[coord] = ds_src[coord].ffill(dim="time").bfill(dim="time")
56
+ else:
57
+ dict_coords[coord] = DEFAULT_GEOLOCATION[coord]
58
+ log_warning(
59
+ logger=logger,
60
+ msg=f"{coord} not available. Setting {coord}={DEFAULT_GEOLOCATION[coord]}",
61
+ verbose=False,
62
+ )
35
63
 
36
- dict_coords = {coord: ds_src[coord] for coord in GEOLOCATION_COORDS if coord in ds_src}
64
+ # Assign geolocation
37
65
  dst_dst = dst_dst.assign_coords(dict_coords)
38
66
  return dst_dst
39
67
 
40
68
 
41
- def load_env_dataset(ds):
69
+ def load_env_dataset(ds=None, logger=None):
42
70
  """Load the ENV dataset."""
43
- # TODO: Retrieve relative_humidity and temperature from L1-ENV
71
+ # TODO: Retrieve relative_humidity, lapse_rate and temperature from DISDRODB-ENV product
72
+
73
+ # Load default environment dataset
44
74
  ds_env = get_default_environment_dataset()
45
- # Compute water density
46
- # get_water_density(
47
- # temperature=temperature,
48
- # air_pressure=air_pressure,
49
- # )
50
- # --> (T == 10 --> 999.7, T == 20 --> 998.2
51
- ds_env["water_density"] = 1000 # kg / m3 # TODO as function of ENV (temperature, ...) ?
52
- ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env)
75
+
76
+ # Assign geolocation if input dataset provided
77
+ if ds is not None:
78
+ ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env, logger=logger)
79
+ # Otherwise add default geolocation
80
+ else:
81
+ ds_env = ds_env.assign_coords(DEFAULT_GEOLOCATION)
53
82
  return ds_env
@@ -236,6 +236,12 @@ def get_effective_sampling_area(sensor_name, diameter):
236
236
  if sensor_name == "RD80":
237
237
  sampling_area = 0.005 # m2
238
238
  return sampling_area
239
+ if sensor_name == "SWS250": # TODO: L * (B - diameter / 2) ?
240
+ # Table 29 of the manual that the sample volume is 400cm3, path length?
241
+ # Distance between the end of the hood heaters is 291 mm.
242
+ # Adding a factor of 1.5 for better representation of the Tx-Rx distance: L= 436 mm.
243
+ sampling_area = 0.0091 # m2
244
+ return sampling_area
239
245
  raise NotImplementedError(f"Effective sampling area for {sensor_name} must yet to be specified in the software.")
240
246
 
241
247
 
disdrodb/l2/processing.py CHANGED
@@ -27,7 +27,6 @@ from disdrodb.l2.empirical_dsd import (
27
27
  add_bins_metrics,
28
28
  compute_integral_parameters,
29
29
  compute_spectrum_parameters,
30
- get_drop_average_velocity,
31
30
  get_drop_number_concentration,
32
31
  get_effective_sampling_area,
33
32
  get_kinetic_energy_variables_from_drop_number,
@@ -273,6 +272,8 @@ def generate_l2e(
273
272
  "Dmin",
274
273
  "Dmax",
275
274
  "fall_velocity",
275
+ "qc_resampling",
276
+ "time_qc",
276
277
  ]
277
278
 
278
279
  variables = [var for var in variables if var in ds]
@@ -282,8 +283,8 @@ def generate_l2e(
282
283
  # -------------------------------------------------------------------------------------------
283
284
  # Compute and add drop average velocity if an optical disdrometer (i.e OTT Parsivel or ThiesLPM)
284
285
  # - We recompute it because if the input dataset is aggregated, it must be updated !
285
- if has_velocity_dimension:
286
- ds["drop_average_velocity"] = get_drop_average_velocity(ds["drop_number"])
286
+ # if has_velocity_dimension:
287
+ # ds["drop_average_velocity"] = get_drop_average_velocity(ds["drop_number"])
287
288
 
288
289
  # -------------------------------------------------------------------------------------------
289
290
  # Define velocity array with dimension 'velocity_method'
@@ -441,7 +442,7 @@ def generate_l2m(
441
442
  diameter_spacing=0.05,
442
443
  # Processing options
443
444
  ds_env=None,
444
- fall_velocity_method="Beard1976",
445
+ fall_velocity_model="Beard1976",
445
446
  # Filtering options
446
447
  minimum_ndrops=1,
447
448
  minimum_nbins=3,
@@ -548,7 +549,7 @@ def generate_l2m(
548
549
  drop_number_concentration = psd(diameter)
549
550
 
550
551
  # Retrieve fall velocity for each new diameter bin
551
- velocity = get_raindrop_fall_velocity(diameter=diameter, method=fall_velocity_method, ds_env=ds_env) # mm
552
+ velocity = get_raindrop_fall_velocity(diameter=diameter, model=fall_velocity_model, ds_env=ds_env) # mm
552
553
 
553
554
  # Compute integral parameters
554
555
  ds_params = compute_integral_parameters(
@@ -60,8 +60,6 @@ def infer_altitude(latitude, longitude, dem="aster30m"):
60
60
  ----------
61
61
  https://www.opentopodata.org/api/
62
62
  """
63
- import requests
64
-
65
63
  url = f"https://api.opentopodata.org/v1/{dem}?locations={latitude},{longitude}"
66
64
  r = requests.get(url)
67
65
 
@@ -102,10 +102,9 @@ def get_list_metadata(
102
102
  Path to the root of the DISDRODB Metadata Archive. Format: ``<...>/DISDRODB``
103
103
  If None, the``metadata_archive_dir`` path specified in the DISDRODB active configuratio. The default is None.
104
104
  **product_kwargs : dict, optional
105
- Additional arguments required for some products.
106
- For example, for the "L2E" product, you need to specify ``rolling`` and
107
- ``sample_interval``. For the "L2M" product, you need to specify also
108
- the ``model_name``.
105
+ Additional arguments required for DISDRODB products L1, L2E and L2M.
106
+ For the L1, L2E and L2M products, ``temporal_resolution`` is required.
107
+ FOr the L2M product, ``model_name`` is required.
109
108
 
110
109
  Returns
111
110
  -------
disdrodb/psd/fitting.py CHANGED
@@ -23,7 +23,7 @@ from scipy.optimize import minimize
23
23
  from scipy.special import gamma, gammaln # Regularized lower incomplete gamma function
24
24
 
25
25
  from disdrodb.constants import DIAMETER_DIMENSION
26
- from disdrodb.l1.fall_velocity import get_dataset_fall_velocity
26
+ from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity_from_ds
27
27
  from disdrodb.l2.empirical_dsd import (
28
28
  get_median_volume_drop_diameter,
29
29
  get_moment,
@@ -607,7 +607,7 @@ def estimate_gamma_parameters(
607
607
 
608
608
  """
609
609
  # Define initial guess for parameters
610
- a = mu + 1 # (mu = a-1, a = mu+1)
610
+ a = mu + 1 # (mu = a-1, a = mu+1) (a > 0 --> mu=-1)
611
611
  scale = 1 / Lambda
612
612
  initial_params = [a, scale]
613
613
 
@@ -1208,13 +1208,13 @@ def apply_gamma_gs(
1208
1208
  ):
1209
1209
  """Estimate GammaPSD model parameters using Grid Search."""
1210
1210
  # Define parameters bounds
1211
- mu_bounds = (0.01, 20)
1212
- lambda_bounds = (0.01, 60)
1211
+ mu_bounds = (-1, 40)
1212
+ lambda_bounds = (0, 60)
1213
1213
 
1214
1214
  # Define initial set of parameters
1215
- mu_step = 0.5
1215
+ mu_step = 0.25
1216
1216
  lambda_step = 0.5
1217
- mu_values = np.arange(0.01, 20, step=mu_step)
1217
+ mu_values = np.arange(0, 40, step=mu_step)
1218
1218
  lambda_values = np.arange(0, 60, step=lambda_step)
1219
1219
 
1220
1220
  # First round of GS
@@ -1304,15 +1304,17 @@ def apply_lognormal_gs(
1304
1304
  """Estimate LognormalPSD model parameters using Grid Search."""
1305
1305
  # Define parameters bounds
1306
1306
  sigma_bounds = (0, np.inf) # > 0
1307
- scale_bounds = (0.1, np.inf) # > 0
1307
+ scale_bounds = (0, np.inf) # > 0
1308
1308
  # mu_bounds = (- np.inf, np.inf) # mu = np.log(scale)
1309
1309
 
1310
1310
  # Define initial set of parameters
1311
+ # --> Typically sigma between 0 and 3
1312
+ # --> Typically mu between -2 and 2
1311
1313
  scale_step = 0.2
1312
1314
  sigma_step = 0.2
1313
- scale_values = np.arange(0.1, 20, step=scale_step)
1314
- mu_values = np.log(scale_values) # TODO: define realistic values
1315
- sigma_values = np.arange(0, 20, step=sigma_step) # TODO: define realistic values
1315
+ scale_values = np.arange(scale_step, 20, step=scale_step)
1316
+ mu_values = np.log(scale_values)
1317
+ sigma_values = np.arange(0, 3, step=sigma_step)
1316
1318
 
1317
1319
  # First round of GS
1318
1320
  Nt, mu, sigma = _apply_lognormal_gs(
@@ -1333,7 +1335,8 @@ def apply_lognormal_gs(
1333
1335
  # Second round of GS
1334
1336
  sigma_values = define_param_range(sigma, sigma_step, bounds=sigma_bounds)
1335
1337
  scale_values = define_param_range(np.exp(mu), scale_step, bounds=scale_bounds)
1336
- mu_values = np.log(scale_values)
1338
+ with suppress_warnings():
1339
+ mu_values = np.log(scale_values)
1337
1340
  Nt, mu, sigma = _apply_lognormal_gs(
1338
1341
  mu_values=mu_values,
1339
1342
  sigma_values=sigma_values,
@@ -1365,7 +1368,7 @@ def apply_normalized_gamma_gs(
1365
1368
  ):
1366
1369
  """Estimate NormalizedGammaPSD model parameters using Grid Search."""
1367
1370
  # Define set of mu values
1368
- mu_arr = np.arange(0.01, 20, step=0.01)
1371
+ mu_arr = np.arange(-4, 30, step=0.01)
1369
1372
 
1370
1373
  # Perform grid search
1371
1374
  with suppress_warnings():
@@ -2353,7 +2356,7 @@ def get_gs_parameters(ds, psd_model, target="ND", transformation="log", error_or
2353
2356
 
2354
2357
  # Check fall velocity is available if target R
2355
2358
  if "fall_velocity" not in ds:
2356
- ds["fall_velocity"] = get_dataset_fall_velocity(ds)
2359
+ ds["fall_velocity"] = get_raindrop_fall_velocity_from_ds(ds)
2357
2360
 
2358
2361
  # Retrieve estimation function
2359
2362
  func = OPTIMIZATION_ROUTINES_DICT["GS"][psd_model]
disdrodb/routines/l0.py CHANGED
@@ -50,7 +50,7 @@ from disdrodb.l0.l0b_nc_processing import sanitize_ds
50
50
  from disdrodb.l0.l0b_processing import generate_l0b
51
51
  from disdrodb.l0.l0c_processing import TOLERANCE_SECONDS, create_l0c_datasets
52
52
  from disdrodb.metadata import read_station_metadata
53
- from disdrodb.utils.archiving import get_files_per_time_block
53
+ from disdrodb.utils.archiving import group_files_by_time_block
54
54
  from disdrodb.utils.dask import execute_tasks_safely
55
55
  from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
56
56
 
@@ -928,7 +928,7 @@ def run_l0c_station(
928
928
  # -------------------------------------------------------------------------.
929
929
  # Retrieve dictionary with the required files per time block
930
930
  # TODO: allow customizing this in config file, but risk of out of memory !
931
- list_event_info = get_files_per_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
931
+ list_event_info = group_files_by_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
932
932
 
933
933
  # -----------------------------------------------------------------.
934
934
  # Generate L0C files