disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +1 -5
- disdrodb/_version.py +2 -2
- disdrodb/accessor/methods.py +14 -3
- disdrodb/api/checks.py +10 -0
- disdrodb/api/create_directories.py +0 -2
- disdrodb/api/io.py +14 -17
- disdrodb/api/path.py +42 -77
- disdrodb/api/search.py +89 -23
- disdrodb/cli/disdrodb_create_summary.py +11 -1
- disdrodb/cli/disdrodb_create_summary_station.py +10 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0a.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l1.py +1 -1
- disdrodb/cli/disdrodb_run_l2e.py +1 -1
- disdrodb/cli/disdrodb_run_l2m.py +1 -1
- disdrodb/configs.py +30 -83
- disdrodb/constants.py +4 -3
- disdrodb/data_transfer/download_data.py +4 -2
- disdrodb/docs.py +2 -2
- disdrodb/etc/products/L1/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
- disdrodb/etc/products/L1/global.yaml +7 -1
- disdrodb/etc/products/L2E/10MIN.yaml +1 -12
- disdrodb/etc/products/L2E/5MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +1 -1
- disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +11 -3
- disdrodb/l0/check_configs.py +49 -16
- disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
- disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
- disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
- disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
- disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
- disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
- disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
- disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
- disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
- disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
- disdrodb/l0/l0_reader.py +2 -2
- disdrodb/l0/l0b_processing.py +70 -15
- disdrodb/l0/l0c_processing.py +7 -3
- disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
- disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
- disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
- disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
- disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
- disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
- disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
- disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
- disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
- disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
- disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
- disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
- disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
- disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
- disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
- disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
- disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
- disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
- disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
- disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
- disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
- disdrodb/l1/beard_model.py +31 -129
- disdrodb/l1/fall_velocity.py +136 -83
- disdrodb/l1/filters.py +25 -28
- disdrodb/l1/processing.py +16 -17
- disdrodb/l1/resampling.py +101 -38
- disdrodb/l1_env/routines.py +46 -17
- disdrodb/l2/empirical_dsd.py +6 -0
- disdrodb/l2/processing.py +6 -5
- disdrodb/metadata/geolocation.py +0 -2
- disdrodb/metadata/search.py +3 -4
- disdrodb/psd/fitting.py +16 -13
- disdrodb/routines/l0.py +2 -2
- disdrodb/routines/l1.py +173 -60
- disdrodb/routines/l2.py +148 -284
- disdrodb/routines/options.py +345 -0
- disdrodb/routines/wrappers.py +14 -1
- disdrodb/scattering/axis_ratio.py +90 -84
- disdrodb/scattering/permittivity.py +6 -0
- disdrodb/summary/routines.py +735 -670
- disdrodb/utils/archiving.py +51 -44
- disdrodb/utils/attrs.py +3 -1
- disdrodb/utils/dask.py +4 -4
- disdrodb/utils/dict.py +33 -0
- disdrodb/utils/encoding.py +6 -1
- disdrodb/utils/routines.py +9 -8
- disdrodb/utils/time.py +11 -3
- disdrodb/viz/__init__.py +0 -13
- disdrodb/viz/plots.py +231 -1
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
- /disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
- /disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
- /disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
- /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
- /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
- /disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
- /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
- /disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0
disdrodb/l1/processing.py
CHANGED
|
@@ -19,8 +19,8 @@
|
|
|
19
19
|
import xarray as xr
|
|
20
20
|
|
|
21
21
|
from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
|
|
22
|
-
from disdrodb.l1.fall_velocity import
|
|
23
|
-
from disdrodb.l1.filters import
|
|
22
|
+
from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity_from_ds
|
|
23
|
+
from disdrodb.l1.filters import define_raindrop_spectrum_mask, filter_diameter_bins, filter_velocity_bins
|
|
24
24
|
from disdrodb.l1.resampling import add_sample_interval
|
|
25
25
|
from disdrodb.l1_env.routines import load_env_dataset
|
|
26
26
|
from disdrodb.l2.empirical_dsd import ( # TODO: maybe move out of L2
|
|
@@ -34,7 +34,7 @@ from disdrodb.utils.writer import finalize_product
|
|
|
34
34
|
def generate_l1(
|
|
35
35
|
ds,
|
|
36
36
|
# Fall velocity option
|
|
37
|
-
|
|
37
|
+
fall_velocity_model="Beard1976",
|
|
38
38
|
# Diameter-Velocity Filtering Options
|
|
39
39
|
minimum_diameter=0,
|
|
40
40
|
maximum_diameter=10,
|
|
@@ -54,7 +54,7 @@ def generate_l1(
|
|
|
54
54
|
----------
|
|
55
55
|
ds : xarray.Dataset
|
|
56
56
|
DISDRODB L0C dataset.
|
|
57
|
-
|
|
57
|
+
fall_velocity_model : str, optional
|
|
58
58
|
Method to compute fall velocity.
|
|
59
59
|
The default method is ``"Beard1976"``.
|
|
60
60
|
minimum_diameter : float, optional
|
|
@@ -106,7 +106,9 @@ def generate_l1(
|
|
|
106
106
|
|
|
107
107
|
# ---------------------------------------------------------------------------
|
|
108
108
|
# Retrieve ENV dataset or take defaults
|
|
109
|
-
#
|
|
109
|
+
# - Used only for Beard fall velocity currently !
|
|
110
|
+
# - It checks and includes default geolocation if missing
|
|
111
|
+
# - For mobile disdrometer, infill missing geolocation with backward and forward filling
|
|
110
112
|
ds_env = load_env_dataset(ds)
|
|
111
113
|
|
|
112
114
|
# ---------------------------------------------------------------------------
|
|
@@ -119,16 +121,18 @@ def generate_l1(
|
|
|
119
121
|
# Add sample interval as coordinate (in seconds)
|
|
120
122
|
ds_l1 = add_sample_interval(ds_l1, sample_interval=sample_interval)
|
|
121
123
|
|
|
122
|
-
# Add
|
|
123
|
-
|
|
124
|
-
|
|
124
|
+
# Add optional variables to L1 dataset
|
|
125
|
+
optional_variables = ["time_qc", "qc_resampling"]
|
|
126
|
+
for var in optional_variables:
|
|
127
|
+
if var in ds:
|
|
128
|
+
ds_l1[var] = ds[var]
|
|
125
129
|
|
|
126
130
|
# -------------------------------------------------------------------------------------------
|
|
127
131
|
# Filter dataset by diameter and velocity bins
|
|
128
132
|
if sensor_name in ["PARSIVEL", "PARSIVEL2"]:
|
|
129
133
|
# - Remove first two bins because never reports data !
|
|
130
134
|
# - If not removed, can alter e.g. L2M model fitting
|
|
131
|
-
ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=0.
|
|
135
|
+
ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=0.2495) # it includes the 0.2495-0.3745 bin
|
|
132
136
|
|
|
133
137
|
# - Filter diameter bins
|
|
134
138
|
ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=minimum_diameter, maximum_diameter=maximum_diameter)
|
|
@@ -138,16 +142,12 @@ def generate_l1(
|
|
|
138
142
|
|
|
139
143
|
# -------------------------------------------------------------------------------------------
|
|
140
144
|
# Compute fall velocity
|
|
141
|
-
ds_l1["fall_velocity"] =
|
|
142
|
-
diameter=ds_l1["diameter_bin_center"],
|
|
143
|
-
method=fall_velocity_method,
|
|
144
|
-
ds_env=ds_env, # mm
|
|
145
|
-
)
|
|
145
|
+
ds_l1["fall_velocity"] = get_raindrop_fall_velocity_from_ds(ds=ds_l1, ds_env=ds_env, model=fall_velocity_model)
|
|
146
146
|
|
|
147
147
|
# -------------------------------------------------------------------------------------------
|
|
148
148
|
# Define filtering mask according to fall velocity
|
|
149
149
|
if has_velocity_dimension:
|
|
150
|
-
mask =
|
|
150
|
+
mask = define_raindrop_spectrum_mask(
|
|
151
151
|
drop_number=ds_l1["raw_drop_number"],
|
|
152
152
|
fall_velocity=ds_l1["fall_velocity"],
|
|
153
153
|
above_velocity_fraction=above_velocity_fraction,
|
|
@@ -162,10 +162,9 @@ def generate_l1(
|
|
|
162
162
|
# -------------------------------------------------------------------------------------------
|
|
163
163
|
# Retrieve drop number and drop_counts arrays
|
|
164
164
|
if has_velocity_dimension:
|
|
165
|
-
drop_number = ds_l1["raw_drop_number"].where(mask) # 2D (diameter, velocity)
|
|
165
|
+
drop_number = ds_l1["raw_drop_number"].where(mask, 0) # 2D (diameter, velocity)
|
|
166
166
|
drop_counts = drop_number.sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
|
|
167
167
|
drop_counts_raw = ds_l1["raw_drop_number"].sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
|
|
168
|
-
|
|
169
168
|
else:
|
|
170
169
|
drop_number = ds_l1["raw_drop_number"] # 1D (diameter)
|
|
171
170
|
drop_counts = ds_l1["raw_drop_number"] # 1D (diameter)
|
disdrodb/l1/resampling.py
CHANGED
|
@@ -19,9 +19,12 @@ import numpy as np
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
import xarray as xr
|
|
21
21
|
|
|
22
|
-
from disdrodb.utils.time import
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
from disdrodb.utils.time import (
|
|
23
|
+
ensure_sample_interval_in_seconds,
|
|
24
|
+
get_dataset_start_end_time,
|
|
25
|
+
get_sampling_information,
|
|
26
|
+
regularize_dataset,
|
|
27
|
+
)
|
|
25
28
|
|
|
26
29
|
|
|
27
30
|
def add_sample_interval(ds, sample_interval):
|
|
@@ -95,6 +98,27 @@ def define_window_size(sample_interval, accumulation_interval):
|
|
|
95
98
|
return window_size
|
|
96
99
|
|
|
97
100
|
|
|
101
|
+
def _finalize_qc_resampling(ds, sample_interval, accumulation_interval):
|
|
102
|
+
# Compute qc_resampling
|
|
103
|
+
# - 0 if not missing timesteps
|
|
104
|
+
# - 1 if all timesteps missing
|
|
105
|
+
n_timesteps = accumulation_interval / sample_interval
|
|
106
|
+
ds["qc_resampling"] = np.round(1 - ds["qc_resampling"] / n_timesteps, 1)
|
|
107
|
+
ds["qc_resampling"].attrs = {
|
|
108
|
+
"long_name": "Resampling Quality Control Flag",
|
|
109
|
+
"standard_name": "quality_flag",
|
|
110
|
+
"units": "",
|
|
111
|
+
"valid_min": 0.0,
|
|
112
|
+
"valid_max": 1.0,
|
|
113
|
+
"description": (
|
|
114
|
+
"Fraction of timesteps missing when resampling the data."
|
|
115
|
+
"0 = No timesteps missing; 1 = All timesteps missing;"
|
|
116
|
+
"Intermediate values indicate partial data coverage."
|
|
117
|
+
),
|
|
118
|
+
}
|
|
119
|
+
return ds
|
|
120
|
+
|
|
121
|
+
|
|
98
122
|
def _resample(ds, variables, accumulation, op):
|
|
99
123
|
if not variables:
|
|
100
124
|
return {}
|
|
@@ -113,23 +137,24 @@ def _rolling(ds, variables, window_size, op):
|
|
|
113
137
|
return ds_subset
|
|
114
138
|
|
|
115
139
|
|
|
116
|
-
def resample_dataset(ds, sample_interval,
|
|
140
|
+
def resample_dataset(ds, sample_interval, temporal_resolution):
|
|
117
141
|
"""
|
|
118
142
|
Resample the dataset to a specified accumulation interval.
|
|
119
143
|
|
|
144
|
+
The output timesteps correspond to the starts of the periods over which
|
|
145
|
+
the resampling operation has been performed !
|
|
146
|
+
|
|
120
147
|
Parameters
|
|
121
148
|
----------
|
|
122
149
|
ds : xarray.Dataset
|
|
123
150
|
The input dataset to be resampled.
|
|
124
151
|
sample_interval : int
|
|
125
|
-
The sample interval of the input dataset.
|
|
126
|
-
|
|
127
|
-
The
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
The output timesteps correspond to the starts of the periods over which
|
|
132
|
-
the resampling operation has been performed !
|
|
152
|
+
The sample interval (in seconds) of the input dataset.
|
|
153
|
+
temporal_resolution : str
|
|
154
|
+
The desired temporal resolution for resampling.
|
|
155
|
+
It should be a string representing the accumulation interval,
|
|
156
|
+
e.g., "5MIN" for 5 minutes, "1H" for 1 hour, "30S" for 30 seconds, etc.
|
|
157
|
+
Prefixed with "ROLL" for rolling resampling, e.g., "ROLL5MIN".
|
|
133
158
|
|
|
134
159
|
Returns
|
|
135
160
|
-------
|
|
@@ -149,6 +174,9 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
149
174
|
# Ensure sample interval in seconds
|
|
150
175
|
sample_interval = int(ensure_sample_interval_in_seconds(sample_interval))
|
|
151
176
|
|
|
177
|
+
# Retrieve accumulation_interval and rolling option
|
|
178
|
+
accumulation_interval, rolling = get_sampling_information(temporal_resolution)
|
|
179
|
+
|
|
152
180
|
# --------------------------------------------------------------------------.
|
|
153
181
|
# Raise error if the accumulation_interval is less than the sample interval
|
|
154
182
|
if accumulation_interval < sample_interval:
|
|
@@ -157,51 +185,78 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
157
185
|
if not accumulation_interval % sample_interval == 0:
|
|
158
186
|
raise ValueError("The accumulation_interval is not a multiple of sample interval.")
|
|
159
187
|
|
|
188
|
+
# Retrieve input dataset start_time and end_time
|
|
189
|
+
start_time, end_time = get_dataset_start_end_time(ds, time_dim="time")
|
|
190
|
+
|
|
191
|
+
# Initialize qc_resampling
|
|
192
|
+
ds["qc_resampling"] = xr.ones_like(ds["time"], dtype="float")
|
|
193
|
+
|
|
194
|
+
# Retrieve dataset attributes
|
|
195
|
+
attrs = ds.attrs.copy()
|
|
196
|
+
|
|
197
|
+
# If no resampling, return as it is
|
|
198
|
+
if sample_interval == accumulation_interval:
|
|
199
|
+
attrs["disdrodb_aggregated_product"] = "False"
|
|
200
|
+
attrs["disdrodb_rolled_product"] = "False"
|
|
201
|
+
attrs["disdrodb_temporal_resolution"] = temporal_resolution
|
|
202
|
+
|
|
203
|
+
ds = _finalize_qc_resampling(ds, sample_interval=sample_interval, accumulation_interval=accumulation_interval)
|
|
204
|
+
ds = add_sample_interval(ds, sample_interval=accumulation_interval)
|
|
205
|
+
ds.attrs = attrs
|
|
206
|
+
return ds
|
|
207
|
+
|
|
160
208
|
# --------------------------------------------------------------------------.
|
|
161
209
|
#### Preprocess the dataset
|
|
162
|
-
#
|
|
163
|
-
# - We assume that NaN corresponds to 0
|
|
164
|
-
# - When we regularize, we infill with NaN
|
|
210
|
+
# - Set timesteps with NaN in drop_number to zero (and set qc_resampling to 0)
|
|
165
211
|
# - When we aggregate with sum, we don't skip NaN
|
|
166
|
-
#
|
|
212
|
+
# --> Resampling over missing timesteps will result in NaN drop_number and qc_resampling = 1
|
|
213
|
+
# --> Resampling over timesteps with NaN in drop_number will result in finite drop_number but qc_resampling > 0
|
|
214
|
+
# - qc_resampling will inform on the amount of timesteps missing
|
|
167
215
|
|
|
168
|
-
|
|
169
|
-
# - This might alter integrated statistics if NaN in spectrum does not actually correspond to 0 !
|
|
170
|
-
# - TODO: NaN should not be set as 0 !
|
|
171
|
-
for var in ["drop_number", "raw_drop_number"]:
|
|
216
|
+
for var in ["drop_number", "raw_drop_number", "drop_counts", "drop_number_concentration"]:
|
|
172
217
|
if var in ds:
|
|
173
|
-
|
|
218
|
+
dims = set(ds[var].dims) - {"time"}
|
|
219
|
+
invalid_timesteps = np.isnan(ds[var]).any(dim=dims)
|
|
220
|
+
ds[var] = ds[var].where(~invalid_timesteps, 0)
|
|
221
|
+
ds["qc_resampling"] = ds["qc_resampling"].where(~invalid_timesteps, 0)
|
|
222
|
+
|
|
223
|
+
if np.all(invalid_timesteps).item():
|
|
224
|
+
raise ValueError("No timesteps with valid spectrum.")
|
|
174
225
|
|
|
175
226
|
# Ensure regular dataset without missing timesteps
|
|
176
227
|
# --> This adds NaN values for missing timesteps
|
|
177
|
-
ds = regularize_dataset(ds, freq=f"{sample_interval}s")
|
|
228
|
+
ds = regularize_dataset(ds, freq=f"{sample_interval}s", start_time=start_time, end_time=end_time)
|
|
229
|
+
ds["qc_resampling"] = ds["qc_resampling"].where(~np.isnan(ds["qc_resampling"]), 0)
|
|
178
230
|
|
|
179
231
|
# --------------------------------------------------------------------------.
|
|
180
232
|
# Define dataset attributes
|
|
181
|
-
attrs = ds.attrs.copy()
|
|
182
233
|
if rolling:
|
|
183
234
|
attrs["disdrodb_rolled_product"] = "True"
|
|
184
235
|
else:
|
|
185
236
|
attrs["disdrodb_rolled_product"] = "False"
|
|
186
237
|
|
|
187
|
-
if sample_interval == accumulation_interval:
|
|
188
|
-
attrs["disdrodb_aggregated_product"] = "False"
|
|
189
|
-
ds = add_sample_interval(ds, sample_interval=accumulation_interval)
|
|
190
|
-
ds.attrs = attrs
|
|
191
|
-
return ds
|
|
192
|
-
|
|
193
|
-
# --------------------------------------------------------------------------.
|
|
194
|
-
# Resample the dataset
|
|
195
238
|
attrs["disdrodb_aggregated_product"] = "True"
|
|
239
|
+
attrs["disdrodb_temporal_resolution"] = temporal_resolution
|
|
196
240
|
|
|
241
|
+
# --------------------------------------------------------------------------.
|
|
197
242
|
# Initialize resample dataset
|
|
198
243
|
ds_resampled = xr.Dataset()
|
|
199
244
|
|
|
200
245
|
# Retrieve variables to average/sum
|
|
246
|
+
# - ATTENTION: it will not resample non-dimensional time coordinates of the dataset !
|
|
201
247
|
var_to_average = ["fall_velocity"]
|
|
202
|
-
var_to_cumulate = [
|
|
248
|
+
var_to_cumulate = [
|
|
249
|
+
"raw_drop_number",
|
|
250
|
+
"drop_number",
|
|
251
|
+
"drop_counts",
|
|
252
|
+
"drop_number_concentration",
|
|
253
|
+
"N",
|
|
254
|
+
"Nraw",
|
|
255
|
+
"Nremoved",
|
|
256
|
+
"qc_resampling",
|
|
257
|
+
]
|
|
203
258
|
var_to_min = ["Dmin"]
|
|
204
|
-
var_to_max = ["Dmax"]
|
|
259
|
+
var_to_max = ["Dmax", "time_qc"]
|
|
205
260
|
|
|
206
261
|
# Retrieve available variables
|
|
207
262
|
var_to_average = [var for var in var_to_average if var in ds]
|
|
@@ -209,11 +264,6 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
209
264
|
var_to_min = [var for var in var_to_min if var in ds]
|
|
210
265
|
var_to_max = [var for var in var_to_max if var in ds]
|
|
211
266
|
|
|
212
|
-
# TODO Define custom processing
|
|
213
|
-
# - quality_flag --> take worst
|
|
214
|
-
# - skipna if less than fraction (to not waste lot of data when aggregating over i.e. hours)
|
|
215
|
-
# - Add tolerance on fraction of missing timesteps for large accumulation_intervals
|
|
216
|
-
|
|
217
267
|
# Resample the dataset
|
|
218
268
|
# - Rolling currently does not allow direct rolling forward.
|
|
219
269
|
# - We currently use center=False which means search for data backward (right-aligned) !
|
|
@@ -239,6 +289,19 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
239
289
|
{"time": ds_resampled["time"].data[: -window_size + 1]},
|
|
240
290
|
)
|
|
241
291
|
|
|
292
|
+
# Finalize qc_resampling
|
|
293
|
+
ds_resampled = _finalize_qc_resampling(
|
|
294
|
+
ds_resampled,
|
|
295
|
+
sample_interval=sample_interval,
|
|
296
|
+
accumulation_interval=accumulation_interval,
|
|
297
|
+
)
|
|
298
|
+
# Set to NaN timesteps where qc_resampling == 1
|
|
299
|
+
# --> This occurs for missing timesteps in input dataset or all NaN drop_number arrays
|
|
300
|
+
variables = list(set(ds_resampled.data_vars) - {"qc_resampling"})
|
|
301
|
+
mask_missing_timesteps = ds_resampled["qc_resampling"] != 1
|
|
302
|
+
for var in variables:
|
|
303
|
+
ds_resampled[var] = ds_resampled[var].where(mask_missing_timesteps)
|
|
304
|
+
|
|
242
305
|
# Add attributes
|
|
243
306
|
ds_resampled.attrs = attrs
|
|
244
307
|
|
disdrodb/l1_env/routines.py
CHANGED
|
@@ -15,39 +15,68 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Core functions for DISDRODB ENV production."""
|
|
18
|
+
import numpy as np
|
|
18
19
|
import xarray as xr
|
|
19
20
|
|
|
20
21
|
from disdrodb.constants import GEOLOCATION_COORDS
|
|
22
|
+
from disdrodb.l0.l0b_processing import ensure_valid_geolocation
|
|
23
|
+
from disdrodb.utils.logger import log_warning
|
|
24
|
+
|
|
25
|
+
DEFAULT_GEOLOCATION = {
|
|
26
|
+
"latitude": 46.159346,
|
|
27
|
+
"longitude": 8.774586,
|
|
28
|
+
"altitude": 0,
|
|
29
|
+
}
|
|
21
30
|
|
|
22
31
|
|
|
23
32
|
def get_default_environment_dataset():
|
|
24
33
|
"""Define defaults values for the ENV dataset."""
|
|
25
34
|
ds_env = xr.Dataset()
|
|
26
|
-
ds_env["sea_level_air_pressure"] = 101_325
|
|
27
|
-
ds_env["gas_constant_dry_air"] = 287.04
|
|
28
|
-
ds_env["lapse_rate"] = 0.0065
|
|
29
|
-
ds_env["relative_humidity"] = 0.95 #
|
|
30
|
-
ds_env["temperature"] = 20 + 273.15
|
|
35
|
+
ds_env["sea_level_air_pressure"] = 101_325 # Pa
|
|
36
|
+
ds_env["gas_constant_dry_air"] = 287.04 # J kg⁻¹ K⁻¹
|
|
37
|
+
ds_env["lapse_rate"] = 0.0065 # K m⁻¹
|
|
38
|
+
ds_env["relative_humidity"] = 0.95 # 0-1 !
|
|
39
|
+
ds_env["temperature"] = 20 + 273.15 # K
|
|
40
|
+
ds_env["water_density"] = 1000 # kg m⁻³ (T == 10 --> 999.7, T == 20 --> 998.2)
|
|
41
|
+
# get_water_density(temperature=temperature, air_pressure=air_pressure
|
|
31
42
|
return ds_env
|
|
32
43
|
|
|
33
44
|
|
|
34
|
-
def _assign_geolocation(ds_src, dst_dst):
|
|
45
|
+
def _assign_geolocation(ds_src, dst_dst, logger=None):
|
|
46
|
+
dict_coords = {}
|
|
47
|
+
for coord in GEOLOCATION_COORDS:
|
|
48
|
+
if coord in ds_src:
|
|
49
|
+
# Check geolocation validity
|
|
50
|
+
ds_src = ensure_valid_geolocation(ds_src, coord=coord, errors="coerce")
|
|
51
|
+
# Assign valid geolocation (or default one if invalid)
|
|
52
|
+
if "time" not in ds_src[coord].dims:
|
|
53
|
+
dict_coords[coord] = ds_src[coord] if not np.isnan(ds_src[coord]) else DEFAULT_GEOLOCATION[coord]
|
|
54
|
+
else: # If coordinates varies over time, infill NaN over time with forward and backward filling
|
|
55
|
+
dict_coords[coord] = ds_src[coord].ffill(dim="time").bfill(dim="time")
|
|
56
|
+
else:
|
|
57
|
+
dict_coords[coord] = DEFAULT_GEOLOCATION[coord]
|
|
58
|
+
log_warning(
|
|
59
|
+
logger=logger,
|
|
60
|
+
msg=f"{coord} not available. Setting {coord}={DEFAULT_GEOLOCATION[coord]}",
|
|
61
|
+
verbose=False,
|
|
62
|
+
)
|
|
35
63
|
|
|
36
|
-
|
|
64
|
+
# Assign geolocation
|
|
37
65
|
dst_dst = dst_dst.assign_coords(dict_coords)
|
|
38
66
|
return dst_dst
|
|
39
67
|
|
|
40
68
|
|
|
41
|
-
def load_env_dataset(ds):
|
|
69
|
+
def load_env_dataset(ds=None, logger=None):
|
|
42
70
|
"""Load the ENV dataset."""
|
|
43
|
-
# TODO: Retrieve relative_humidity and temperature from
|
|
71
|
+
# TODO: Retrieve relative_humidity, lapse_rate and temperature from DISDRODB-ENV product
|
|
72
|
+
|
|
73
|
+
# Load default environment dataset
|
|
44
74
|
ds_env = get_default_environment_dataset()
|
|
45
|
-
|
|
46
|
-
#
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
#
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env)
|
|
75
|
+
|
|
76
|
+
# Assign geolocation if input dataset provided
|
|
77
|
+
if ds is not None:
|
|
78
|
+
ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env, logger=logger)
|
|
79
|
+
# Otherwise add default geolocation
|
|
80
|
+
else:
|
|
81
|
+
ds_env = ds_env.assign_coords(DEFAULT_GEOLOCATION)
|
|
53
82
|
return ds_env
|
disdrodb/l2/empirical_dsd.py
CHANGED
|
@@ -236,6 +236,12 @@ def get_effective_sampling_area(sensor_name, diameter):
|
|
|
236
236
|
if sensor_name == "RD80":
|
|
237
237
|
sampling_area = 0.005 # m2
|
|
238
238
|
return sampling_area
|
|
239
|
+
if sensor_name == "SWS250": # TODO: L * (B - diameter / 2) ?
|
|
240
|
+
# Table 29 of the manual that the sample volume is 400cm3, path length?
|
|
241
|
+
# Distance between the end of the hood heaters is 291 mm.
|
|
242
|
+
# Adding a factor of 1.5 for better representation of the Tx-Rx distance: L= 436 mm.
|
|
243
|
+
sampling_area = 0.0091 # m2
|
|
244
|
+
return sampling_area
|
|
239
245
|
raise NotImplementedError(f"Effective sampling area for {sensor_name} must yet to be specified in the software.")
|
|
240
246
|
|
|
241
247
|
|
disdrodb/l2/processing.py
CHANGED
|
@@ -27,7 +27,6 @@ from disdrodb.l2.empirical_dsd import (
|
|
|
27
27
|
add_bins_metrics,
|
|
28
28
|
compute_integral_parameters,
|
|
29
29
|
compute_spectrum_parameters,
|
|
30
|
-
get_drop_average_velocity,
|
|
31
30
|
get_drop_number_concentration,
|
|
32
31
|
get_effective_sampling_area,
|
|
33
32
|
get_kinetic_energy_variables_from_drop_number,
|
|
@@ -273,6 +272,8 @@ def generate_l2e(
|
|
|
273
272
|
"Dmin",
|
|
274
273
|
"Dmax",
|
|
275
274
|
"fall_velocity",
|
|
275
|
+
"qc_resampling",
|
|
276
|
+
"time_qc",
|
|
276
277
|
]
|
|
277
278
|
|
|
278
279
|
variables = [var for var in variables if var in ds]
|
|
@@ -282,8 +283,8 @@ def generate_l2e(
|
|
|
282
283
|
# -------------------------------------------------------------------------------------------
|
|
283
284
|
# Compute and add drop average velocity if an optical disdrometer (i.e OTT Parsivel or ThiesLPM)
|
|
284
285
|
# - We recompute it because if the input dataset is aggregated, it must be updated !
|
|
285
|
-
if has_velocity_dimension:
|
|
286
|
-
|
|
286
|
+
# if has_velocity_dimension:
|
|
287
|
+
# ds["drop_average_velocity"] = get_drop_average_velocity(ds["drop_number"])
|
|
287
288
|
|
|
288
289
|
# -------------------------------------------------------------------------------------------
|
|
289
290
|
# Define velocity array with dimension 'velocity_method'
|
|
@@ -441,7 +442,7 @@ def generate_l2m(
|
|
|
441
442
|
diameter_spacing=0.05,
|
|
442
443
|
# Processing options
|
|
443
444
|
ds_env=None,
|
|
444
|
-
|
|
445
|
+
fall_velocity_model="Beard1976",
|
|
445
446
|
# Filtering options
|
|
446
447
|
minimum_ndrops=1,
|
|
447
448
|
minimum_nbins=3,
|
|
@@ -548,7 +549,7 @@ def generate_l2m(
|
|
|
548
549
|
drop_number_concentration = psd(diameter)
|
|
549
550
|
|
|
550
551
|
# Retrieve fall velocity for each new diameter bin
|
|
551
|
-
velocity = get_raindrop_fall_velocity(diameter=diameter,
|
|
552
|
+
velocity = get_raindrop_fall_velocity(diameter=diameter, model=fall_velocity_model, ds_env=ds_env) # mm
|
|
552
553
|
|
|
553
554
|
# Compute integral parameters
|
|
554
555
|
ds_params = compute_integral_parameters(
|
disdrodb/metadata/geolocation.py
CHANGED
disdrodb/metadata/search.py
CHANGED
|
@@ -102,10 +102,9 @@ def get_list_metadata(
|
|
|
102
102
|
Path to the root of the DISDRODB Metadata Archive. Format: ``<...>/DISDRODB``
|
|
103
103
|
If None, the``metadata_archive_dir`` path specified in the DISDRODB active configuratio. The default is None.
|
|
104
104
|
**product_kwargs : dict, optional
|
|
105
|
-
Additional arguments required for
|
|
106
|
-
For
|
|
107
|
-
|
|
108
|
-
the ``model_name``.
|
|
105
|
+
Additional arguments required for DISDRODB products L1, L2E and L2M.
|
|
106
|
+
For the L1, L2E and L2M products, ``temporal_resolution`` is required.
|
|
107
|
+
FOr the L2M product, ``model_name`` is required.
|
|
109
108
|
|
|
110
109
|
Returns
|
|
111
110
|
-------
|
disdrodb/psd/fitting.py
CHANGED
|
@@ -23,7 +23,7 @@ from scipy.optimize import minimize
|
|
|
23
23
|
from scipy.special import gamma, gammaln # Regularized lower incomplete gamma function
|
|
24
24
|
|
|
25
25
|
from disdrodb.constants import DIAMETER_DIMENSION
|
|
26
|
-
from disdrodb.l1.fall_velocity import
|
|
26
|
+
from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity_from_ds
|
|
27
27
|
from disdrodb.l2.empirical_dsd import (
|
|
28
28
|
get_median_volume_drop_diameter,
|
|
29
29
|
get_moment,
|
|
@@ -607,7 +607,7 @@ def estimate_gamma_parameters(
|
|
|
607
607
|
|
|
608
608
|
"""
|
|
609
609
|
# Define initial guess for parameters
|
|
610
|
-
a = mu + 1 # (mu = a-1, a = mu+1)
|
|
610
|
+
a = mu + 1 # (mu = a-1, a = mu+1) (a > 0 --> mu=-1)
|
|
611
611
|
scale = 1 / Lambda
|
|
612
612
|
initial_params = [a, scale]
|
|
613
613
|
|
|
@@ -1208,13 +1208,13 @@ def apply_gamma_gs(
|
|
|
1208
1208
|
):
|
|
1209
1209
|
"""Estimate GammaPSD model parameters using Grid Search."""
|
|
1210
1210
|
# Define parameters bounds
|
|
1211
|
-
mu_bounds = (
|
|
1212
|
-
lambda_bounds = (0
|
|
1211
|
+
mu_bounds = (-1, 40)
|
|
1212
|
+
lambda_bounds = (0, 60)
|
|
1213
1213
|
|
|
1214
1214
|
# Define initial set of parameters
|
|
1215
|
-
mu_step = 0.
|
|
1215
|
+
mu_step = 0.25
|
|
1216
1216
|
lambda_step = 0.5
|
|
1217
|
-
mu_values = np.arange(0
|
|
1217
|
+
mu_values = np.arange(0, 40, step=mu_step)
|
|
1218
1218
|
lambda_values = np.arange(0, 60, step=lambda_step)
|
|
1219
1219
|
|
|
1220
1220
|
# First round of GS
|
|
@@ -1304,15 +1304,17 @@ def apply_lognormal_gs(
|
|
|
1304
1304
|
"""Estimate LognormalPSD model parameters using Grid Search."""
|
|
1305
1305
|
# Define parameters bounds
|
|
1306
1306
|
sigma_bounds = (0, np.inf) # > 0
|
|
1307
|
-
scale_bounds = (0
|
|
1307
|
+
scale_bounds = (0, np.inf) # > 0
|
|
1308
1308
|
# mu_bounds = (- np.inf, np.inf) # mu = np.log(scale)
|
|
1309
1309
|
|
|
1310
1310
|
# Define initial set of parameters
|
|
1311
|
+
# --> Typically sigma between 0 and 3
|
|
1312
|
+
# --> Typically mu between -2 and 2
|
|
1311
1313
|
scale_step = 0.2
|
|
1312
1314
|
sigma_step = 0.2
|
|
1313
|
-
scale_values = np.arange(
|
|
1314
|
-
mu_values = np.log(scale_values)
|
|
1315
|
-
sigma_values = np.arange(0,
|
|
1315
|
+
scale_values = np.arange(scale_step, 20, step=scale_step)
|
|
1316
|
+
mu_values = np.log(scale_values)
|
|
1317
|
+
sigma_values = np.arange(0, 3, step=sigma_step)
|
|
1316
1318
|
|
|
1317
1319
|
# First round of GS
|
|
1318
1320
|
Nt, mu, sigma = _apply_lognormal_gs(
|
|
@@ -1333,7 +1335,8 @@ def apply_lognormal_gs(
|
|
|
1333
1335
|
# Second round of GS
|
|
1334
1336
|
sigma_values = define_param_range(sigma, sigma_step, bounds=sigma_bounds)
|
|
1335
1337
|
scale_values = define_param_range(np.exp(mu), scale_step, bounds=scale_bounds)
|
|
1336
|
-
|
|
1338
|
+
with suppress_warnings():
|
|
1339
|
+
mu_values = np.log(scale_values)
|
|
1337
1340
|
Nt, mu, sigma = _apply_lognormal_gs(
|
|
1338
1341
|
mu_values=mu_values,
|
|
1339
1342
|
sigma_values=sigma_values,
|
|
@@ -1365,7 +1368,7 @@ def apply_normalized_gamma_gs(
|
|
|
1365
1368
|
):
|
|
1366
1369
|
"""Estimate NormalizedGammaPSD model parameters using Grid Search."""
|
|
1367
1370
|
# Define set of mu values
|
|
1368
|
-
mu_arr = np.arange(
|
|
1371
|
+
mu_arr = np.arange(-4, 30, step=0.01)
|
|
1369
1372
|
|
|
1370
1373
|
# Perform grid search
|
|
1371
1374
|
with suppress_warnings():
|
|
@@ -2353,7 +2356,7 @@ def get_gs_parameters(ds, psd_model, target="ND", transformation="log", error_or
|
|
|
2353
2356
|
|
|
2354
2357
|
# Check fall velocity is available if target R
|
|
2355
2358
|
if "fall_velocity" not in ds:
|
|
2356
|
-
ds["fall_velocity"] =
|
|
2359
|
+
ds["fall_velocity"] = get_raindrop_fall_velocity_from_ds(ds)
|
|
2357
2360
|
|
|
2358
2361
|
# Retrieve estimation function
|
|
2359
2362
|
func = OPTIMIZATION_ROUTINES_DICT["GS"][psd_model]
|
disdrodb/routines/l0.py
CHANGED
|
@@ -50,7 +50,7 @@ from disdrodb.l0.l0b_nc_processing import sanitize_ds
|
|
|
50
50
|
from disdrodb.l0.l0b_processing import generate_l0b
|
|
51
51
|
from disdrodb.l0.l0c_processing import TOLERANCE_SECONDS, create_l0c_datasets
|
|
52
52
|
from disdrodb.metadata import read_station_metadata
|
|
53
|
-
from disdrodb.utils.archiving import
|
|
53
|
+
from disdrodb.utils.archiving import group_files_by_time_block
|
|
54
54
|
from disdrodb.utils.dask import execute_tasks_safely
|
|
55
55
|
from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
|
|
56
56
|
|
|
@@ -928,7 +928,7 @@ def run_l0c_station(
|
|
|
928
928
|
# -------------------------------------------------------------------------.
|
|
929
929
|
# Retrieve dictionary with the required files per time block
|
|
930
930
|
# TODO: allow customizing this in config file, but risk of out of memory !
|
|
931
|
-
list_event_info =
|
|
931
|
+
list_event_info = group_files_by_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
|
|
932
932
|
|
|
933
933
|
# -----------------------------------------------------------------.
|
|
934
934
|
# Generate L0C files
|