disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +64 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +139 -9
- disdrodb/api/configs.py +4 -2
- disdrodb/api/info.py +10 -10
- disdrodb/api/io.py +237 -18
- disdrodb/api/path.py +81 -75
- disdrodb/api/search.py +6 -6
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
- disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +5 -5
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +30 -30
- disdrodb/l0/l0b_nc_processing.py +108 -2
- disdrodb/l0/l0b_processing.py +4 -4
- disdrodb/l0/l0c_processing.py +5 -13
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l0/routines.py +105 -14
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +45 -44
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1/routines.py +35 -43
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/event.py +27 -120
- disdrodb/l2/processing.py +267 -116
- disdrodb/l2/routines.py +618 -254
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +463 -144
- disdrodb/psd/models.py +8 -5
- disdrodb/routines.py +3 -3
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +56 -36
- disdrodb/scattering/permittivity.py +486 -0
- disdrodb/scattering/routines.py +701 -159
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4120 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +59 -8
- disdrodb/utils/dataframe.py +61 -7
- disdrodb/utils/directories.py +35 -15
- disdrodb/utils/encoding.py +33 -19
- disdrodb/utils/logger.py +13 -6
- disdrodb/utils/manipulations.py +71 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +165 -19
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +2 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +327 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
- disdrodb/l1/encoding_attrs.py +0 -642
- disdrodb/l2/processing_options.py +0 -213
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
disdrodb/l1/resampling.py
CHANGED
|
@@ -15,12 +15,11 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Utilities for temporal resampling."""
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
import numpy as np
|
|
20
19
|
import pandas as pd
|
|
21
20
|
import xarray as xr
|
|
22
21
|
|
|
23
|
-
from disdrodb.utils.time import regularize_dataset
|
|
22
|
+
from disdrodb.utils.time import ensure_sample_interval_in_seconds, regularize_dataset
|
|
24
23
|
|
|
25
24
|
DEFAULT_ACCUMULATIONS = ["10s", "30s", "1min", "2min", "5min", "10min", "30min", "1hour"]
|
|
26
25
|
|
|
@@ -96,6 +95,24 @@ def define_window_size(sample_interval, accumulation_interval):
|
|
|
96
95
|
return window_size
|
|
97
96
|
|
|
98
97
|
|
|
98
|
+
def _resample(ds, variables, accumulation, op):
|
|
99
|
+
if not variables:
|
|
100
|
+
return {}
|
|
101
|
+
ds_subset = ds[variables]
|
|
102
|
+
if "time" in ds_subset.dims:
|
|
103
|
+
return getattr(ds_subset.resample({"time": accumulation}), op)(skipna=False)
|
|
104
|
+
return ds_subset
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _rolling(ds, variables, window_size, op):
|
|
108
|
+
if not variables:
|
|
109
|
+
return {}
|
|
110
|
+
ds_subset = ds[variables]
|
|
111
|
+
if "time" in ds_subset.dims:
|
|
112
|
+
return getattr(ds_subset.rolling(time=window_size, center=False), op)(skipna=False)
|
|
113
|
+
return ds_subset
|
|
114
|
+
|
|
115
|
+
|
|
99
116
|
def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
100
117
|
"""
|
|
101
118
|
Resample the dataset to a specified accumulation interval.
|
|
@@ -128,20 +145,61 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
128
145
|
- The function updates the dataset attributes and the sample_interval coordinate.
|
|
129
146
|
|
|
130
147
|
"""
|
|
131
|
-
#
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
148
|
+
# --------------------------------------------------------------------------.
|
|
149
|
+
# Ensure sample interval in seconds
|
|
150
|
+
sample_interval = int(ensure_sample_interval_in_seconds(sample_interval))
|
|
151
|
+
|
|
152
|
+
# --------------------------------------------------------------------------.
|
|
153
|
+
# Raise error if the accumulation_interval is less than the sample interval
|
|
154
|
+
if accumulation_interval < sample_interval:
|
|
155
|
+
raise ValueError("Expecting an accumulation_interval > sample interval.")
|
|
156
|
+
# Raise error if accumulation_interval is not multiple of sample_interval
|
|
157
|
+
if not accumulation_interval % sample_interval == 0:
|
|
158
|
+
raise ValueError("The accumulation_interval is not a multiple of sample interval.")
|
|
159
|
+
|
|
160
|
+
# --------------------------------------------------------------------------.
|
|
161
|
+
#### Preprocess the dataset
|
|
162
|
+
# Here we set NaN in the raw_drop_number to 0
|
|
163
|
+
# - We assume that NaN corresponds to 0
|
|
164
|
+
# - When we regularize, we infill with NaN
|
|
165
|
+
# - When we aggregate with sum, we don't skip NaN
|
|
166
|
+
# --> Aggregation with original missing timesteps currently results in NaN !
|
|
167
|
+
|
|
168
|
+
# Infill NaN values with zeros for drop_number and raw_drop_number
|
|
169
|
+
# - This might alter integrated statistics if NaN in spectrum does not actually correspond to 0 !
|
|
170
|
+
# - TODO: NaN should not be set as 0 !
|
|
171
|
+
for var in ["drop_number", "raw_drop_number"]:
|
|
172
|
+
if var in ds:
|
|
173
|
+
ds[var] = xr.where(np.isnan(ds[var]), 0, ds[var])
|
|
135
174
|
|
|
136
175
|
# Ensure regular dataset without missing timesteps
|
|
176
|
+
# --> This adds NaN values for missing timesteps
|
|
137
177
|
ds = regularize_dataset(ds, freq=f"{sample_interval}s")
|
|
138
178
|
|
|
179
|
+
# --------------------------------------------------------------------------.
|
|
180
|
+
# Define dataset attributes
|
|
181
|
+
attrs = ds.attrs.copy()
|
|
182
|
+
if rolling:
|
|
183
|
+
attrs["disdrodb_rolled_product"] = "True"
|
|
184
|
+
else:
|
|
185
|
+
attrs["disdrodb_rolled_product"] = "False"
|
|
186
|
+
|
|
187
|
+
if sample_interval == accumulation_interval:
|
|
188
|
+
attrs["disdrodb_aggregated_product"] = "False"
|
|
189
|
+
ds = add_sample_interval(ds, sample_interval=accumulation_interval)
|
|
190
|
+
ds.attrs = attrs
|
|
191
|
+
return ds
|
|
192
|
+
|
|
193
|
+
# --------------------------------------------------------------------------.
|
|
194
|
+
# Resample the dataset
|
|
195
|
+
attrs["disdrodb_aggregated_product"] = "True"
|
|
196
|
+
|
|
139
197
|
# Initialize resample dataset
|
|
140
198
|
ds_resampled = xr.Dataset()
|
|
141
199
|
|
|
142
200
|
# Retrieve variables to average/sum
|
|
143
201
|
var_to_average = ["fall_velocity"]
|
|
144
|
-
var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nremoved"]
|
|
202
|
+
var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nraw", "Nremoved"]
|
|
145
203
|
var_to_min = ["Dmin"]
|
|
146
204
|
var_to_max = ["Dmax"]
|
|
147
205
|
|
|
@@ -154,6 +212,7 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
154
212
|
# TODO Define custom processing
|
|
155
213
|
# - quality_flag --> take worst
|
|
156
214
|
# - skipna if less than fraction (to not waste lot of data when aggregating over i.e. hours)
|
|
215
|
+
# - Add tolerance on fraction of missing timesteps for large accumulation_intervals
|
|
157
216
|
|
|
158
217
|
# Resample the dataset
|
|
159
218
|
# - Rolling currently does not allow direct rolling forward.
|
|
@@ -163,74 +222,26 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
163
222
|
# - https://github.com/pydata/xarray/issues/8958
|
|
164
223
|
if not rolling:
|
|
165
224
|
# Resample
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
ds_resampled.update(
|
|
172
|
-
ds[var_to_cumulate].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).sum(skipna=False),
|
|
173
|
-
)
|
|
174
|
-
if len(var_to_min) > 0:
|
|
175
|
-
ds_resampled.update(
|
|
176
|
-
ds[var_to_min].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).min(skipna=False),
|
|
177
|
-
)
|
|
178
|
-
if len(var_to_max) > 0:
|
|
179
|
-
ds_resampled.update(
|
|
180
|
-
ds[var_to_max].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).max(skipna=False),
|
|
181
|
-
)
|
|
182
|
-
|
|
225
|
+
accumulation = pd.Timedelta(seconds=accumulation_interval)
|
|
226
|
+
ds_resampled.update(_resample(ds=ds, variables=var_to_average, accumulation=accumulation, op="mean"))
|
|
227
|
+
ds_resampled.update(_resample(ds=ds, variables=var_to_cumulate, accumulation=accumulation, op="sum"))
|
|
228
|
+
ds_resampled.update(_resample(ds=ds, variables=var_to_min, accumulation=accumulation, op="min"))
|
|
229
|
+
ds_resampled.update(_resample(ds=ds, variables=var_to_max, accumulation=accumulation, op="max"))
|
|
183
230
|
else:
|
|
184
231
|
# Roll and Resample
|
|
185
232
|
window_size = define_window_size(sample_interval=sample_interval, accumulation_interval=accumulation_interval)
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
if len(var_to_min) > 0:
|
|
192
|
-
ds_resampled.update(ds[var_to_min].rolling({"time": window_size}, center=False).min(skipna=False))
|
|
193
|
-
if len(var_to_max) > 0:
|
|
194
|
-
ds_resampled.update(ds[var_to_max].rolling({"time": window_size}, center=False).max(skipna=False))
|
|
195
|
-
# Ensure time to correspond to the start time of the integration
|
|
233
|
+
ds_resampled.update(_rolling(ds=ds, variables=var_to_average, window_size=window_size, op="mean"))
|
|
234
|
+
ds_resampled.update(_rolling(ds=ds, variables=var_to_cumulate, window_size=window_size, op="sum"))
|
|
235
|
+
ds_resampled.update(_rolling(ds=ds, variables=var_to_min, window_size=window_size, op="min"))
|
|
236
|
+
ds_resampled.update(_rolling(ds=ds, variables=var_to_max, window_size=window_size, op="max"))
|
|
237
|
+
# Ensure time to correspond to the start time of the measurement period
|
|
196
238
|
ds_resampled = ds_resampled.isel(time=slice(window_size - 1, None)).assign_coords(
|
|
197
239
|
{"time": ds_resampled["time"].data[: -window_size + 1]},
|
|
198
240
|
)
|
|
199
241
|
|
|
200
242
|
# Add attributes
|
|
201
243
|
ds_resampled.attrs = attrs
|
|
202
|
-
if rolling:
|
|
203
|
-
ds_resampled.attrs["rolled"] = "True"
|
|
204
|
-
else:
|
|
205
|
-
ds_resampled.attrs["rolled"] = "False"
|
|
206
244
|
|
|
207
245
|
# Add accumulation_interval as new sample_interval coordinate
|
|
208
246
|
ds_resampled = add_sample_interval(ds_resampled, sample_interval=accumulation_interval)
|
|
209
247
|
return ds_resampled
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def get_possible_accumulations(sample_interval, accumulations=None):
|
|
213
|
-
"""
|
|
214
|
-
Get a list of valid accumulation intervals based on the sampling time.
|
|
215
|
-
|
|
216
|
-
Parameters
|
|
217
|
-
----------
|
|
218
|
-
- sample_interval (int): The inferred sampling time in seconds.
|
|
219
|
-
- accumulations (list of int or string): List of desired accumulation intervals.
|
|
220
|
-
If provide integers, specify accumulation in seconds.
|
|
221
|
-
|
|
222
|
-
Returns
|
|
223
|
-
-------
|
|
224
|
-
- list of int: Valid accumulation intervals in seconds.
|
|
225
|
-
"""
|
|
226
|
-
# Select default accumulations
|
|
227
|
-
if accumulations is None:
|
|
228
|
-
accumulations = DEFAULT_ACCUMULATIONS
|
|
229
|
-
|
|
230
|
-
# Get accumulations in seconds
|
|
231
|
-
accumulations = [int(pd.Timedelta(acc).total_seconds()) if isinstance(acc, str) else acc for acc in accumulations]
|
|
232
|
-
|
|
233
|
-
# Filter candidate accumulations to include only those that are multiples of the sampling time
|
|
234
|
-
possible_accumulations = [acc for acc in accumulations if acc % sample_interval == 0]
|
|
235
|
-
|
|
236
|
-
return possible_accumulations
|
disdrodb/l1/routines.py
CHANGED
|
@@ -21,13 +21,14 @@
|
|
|
21
21
|
import datetime
|
|
22
22
|
import logging
|
|
23
23
|
import os
|
|
24
|
+
import shutil
|
|
24
25
|
import time
|
|
25
26
|
from typing import Optional
|
|
26
27
|
|
|
27
28
|
import dask
|
|
28
29
|
import xarray as xr
|
|
29
30
|
|
|
30
|
-
|
|
31
|
+
from disdrodb.api.checks import check_station_inputs
|
|
31
32
|
from disdrodb.api.create_directories import (
|
|
32
33
|
create_logs_directory,
|
|
33
34
|
create_product_directory,
|
|
@@ -38,7 +39,12 @@ from disdrodb.api.path import (
|
|
|
38
39
|
define_l1_filename,
|
|
39
40
|
)
|
|
40
41
|
from disdrodb.api.search import get_required_product
|
|
41
|
-
from disdrodb.configs import
|
|
42
|
+
from disdrodb.configs import (
|
|
43
|
+
get_data_archive_dir,
|
|
44
|
+
get_folder_partitioning,
|
|
45
|
+
get_metadata_archive_dir,
|
|
46
|
+
get_product_options,
|
|
47
|
+
)
|
|
42
48
|
from disdrodb.l1.processing import generate_l1
|
|
43
49
|
from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
|
|
44
50
|
|
|
@@ -55,42 +61,6 @@ from disdrodb.utils.writer import write_product
|
|
|
55
61
|
logger = logging.getLogger(__name__)
|
|
56
62
|
|
|
57
63
|
|
|
58
|
-
def get_l1_options():
|
|
59
|
-
"""Get L1 options."""
|
|
60
|
-
# - TODO: from YAML
|
|
61
|
-
# - TODO: as function of sensor name
|
|
62
|
-
|
|
63
|
-
# minimum_diameter
|
|
64
|
-
# --> PWS100: 0 (0.05)
|
|
65
|
-
# --> PARSIVEL: 0.2495 (0.312)
|
|
66
|
-
# --> RD80: 0.313 (0.359)
|
|
67
|
-
# --> LPM: 0.125 (0.1875) (we currently discard first bin with default settings !)
|
|
68
|
-
|
|
69
|
-
# maximum_diameter
|
|
70
|
-
# LPM: 9 (10) mm
|
|
71
|
-
# RD80: 5.373 (5.6) mm
|
|
72
|
-
# OTT: 24.5 (26) mm
|
|
73
|
-
# PWS100: 27.2 (28.8) mm
|
|
74
|
-
|
|
75
|
-
l1_options = {
|
|
76
|
-
# Fall velocity option
|
|
77
|
-
"fall_velocity_method": "Beard1976",
|
|
78
|
-
# Diameter-Velocity Filtering Options
|
|
79
|
-
"minimum_diameter": 0.2495, # OTT PARSIVEL first two bin no data !
|
|
80
|
-
"maximum_diameter": 10,
|
|
81
|
-
"minimum_velocity": 0,
|
|
82
|
-
"maximum_velocity": 12,
|
|
83
|
-
"above_velocity_fraction": 0.5,
|
|
84
|
-
"above_velocity_tolerance": None,
|
|
85
|
-
"below_velocity_fraction": 0.5,
|
|
86
|
-
"below_velocity_tolerance": None,
|
|
87
|
-
"small_diameter_threshold": 1, # 2
|
|
88
|
-
"small_velocity_threshold": 2.5, # 3
|
|
89
|
-
"maintain_smallest_drops": True,
|
|
90
|
-
}
|
|
91
|
-
return l1_options
|
|
92
|
-
|
|
93
|
-
|
|
94
64
|
@delayed_if_parallel
|
|
95
65
|
@single_threaded_if_parallel
|
|
96
66
|
def _generate_l1(
|
|
@@ -153,16 +123,16 @@ def _generate_l1(
|
|
|
153
123
|
# Log start processing
|
|
154
124
|
msg = f"{product} processing of {filename} has started."
|
|
155
125
|
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
156
|
-
|
|
126
|
+
success_flag = False
|
|
157
127
|
##------------------------------------------------------------------------.
|
|
158
128
|
# Retrieve L1 configurations
|
|
159
|
-
l1_options =
|
|
129
|
+
l1_options = get_product_options("L1").get("product_options")
|
|
160
130
|
|
|
161
131
|
##------------------------------------------------------------------------.
|
|
162
132
|
### Core computation
|
|
163
133
|
try:
|
|
164
134
|
# Open the raw netCDF
|
|
165
|
-
with xr.open_dataset(filepath, chunks
|
|
135
|
+
with xr.open_dataset(filepath, chunks=-1, decode_timedelta=False, cache=False) as ds:
|
|
166
136
|
ds = ds[["raw_drop_number"]].load()
|
|
167
137
|
|
|
168
138
|
# Produce L1 dataset
|
|
@@ -175,7 +145,13 @@ def _generate_l1(
|
|
|
175
145
|
folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
|
|
176
146
|
filepath = os.path.join(folder_path, filename)
|
|
177
147
|
# Write to disk
|
|
178
|
-
write_product(ds,
|
|
148
|
+
write_product(ds, filepath=filepath, force=force)
|
|
149
|
+
|
|
150
|
+
##--------------------------------------------------------------------.
|
|
151
|
+
#### - Define logger file final directory
|
|
152
|
+
if folder_partitioning != "":
|
|
153
|
+
log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
|
|
154
|
+
os.makedirs(log_dst_dir, exist_ok=True)
|
|
179
155
|
|
|
180
156
|
##--------------------------------------------------------------------.
|
|
181
157
|
# Clean environment
|
|
@@ -184,6 +160,7 @@ def _generate_l1(
|
|
|
184
160
|
# Log end processing
|
|
185
161
|
msg = f"{product} processing of {filename} has ended."
|
|
186
162
|
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
163
|
+
success_flag = True
|
|
187
164
|
|
|
188
165
|
##--------------------------------------------------------------------.
|
|
189
166
|
# Otherwise log the error
|
|
@@ -195,6 +172,13 @@ def _generate_l1(
|
|
|
195
172
|
# Close the file logger
|
|
196
173
|
close_logger(logger)
|
|
197
174
|
|
|
175
|
+
# Move logger file to correct partitioning directory
|
|
176
|
+
if success_flag and folder_partitioning != "" and logger_filepath is not None:
|
|
177
|
+
# Move logger file to correct partitioning directory
|
|
178
|
+
dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
|
|
179
|
+
shutil.move(logger_filepath, dst_filepath)
|
|
180
|
+
logger_filepath = dst_filepath
|
|
181
|
+
|
|
198
182
|
# Return the logger file path
|
|
199
183
|
return logger_filepath
|
|
200
184
|
|
|
@@ -260,6 +244,14 @@ def run_l1_station(
|
|
|
260
244
|
# Retrieve DISDRODB Metadata Archive directory
|
|
261
245
|
metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
|
|
262
246
|
|
|
247
|
+
# Check valid data_source, campaign_name, and station_name
|
|
248
|
+
check_station_inputs(
|
|
249
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
250
|
+
data_source=data_source,
|
|
251
|
+
campaign_name=campaign_name,
|
|
252
|
+
station_name=station_name,
|
|
253
|
+
)
|
|
254
|
+
|
|
263
255
|
# Define logs directory
|
|
264
256
|
logs_dir = create_logs_directory(
|
|
265
257
|
product=product,
|
|
@@ -310,7 +302,7 @@ def run_l1_station(
|
|
|
310
302
|
# If no data available, print error message and return None
|
|
311
303
|
if flag_not_available_data:
|
|
312
304
|
msg = (
|
|
313
|
-
f"{product} processing of {data_source} {campaign_name} {station_name}"
|
|
305
|
+
f"{product} processing of {data_source} {campaign_name} {station_name} "
|
|
314
306
|
+ f"has not been launched because of missing {required_product} data."
|
|
315
307
|
)
|
|
316
308
|
print(msg)
|
disdrodb/l1_env/routines.py
CHANGED
|
@@ -15,9 +15,10 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Core functions for DISDRODB ENV production."""
|
|
18
|
-
|
|
19
18
|
import xarray as xr
|
|
20
19
|
|
|
20
|
+
from disdrodb.constants import GEOLOCATION_COORDS
|
|
21
|
+
|
|
21
22
|
|
|
22
23
|
def get_default_environment_dataset():
|
|
23
24
|
"""Define defaults values for the ENV dataset."""
|
|
@@ -30,9 +31,23 @@ def get_default_environment_dataset():
|
|
|
30
31
|
return ds_env
|
|
31
32
|
|
|
32
33
|
|
|
34
|
+
def _assign_geolocation(ds_src, dst_dst):
|
|
35
|
+
|
|
36
|
+
dict_coords = {coord: ds_src[coord] for coord in GEOLOCATION_COORDS if coord in ds_src}
|
|
37
|
+
dst_dst = dst_dst.assign_coords(dict_coords)
|
|
38
|
+
return dst_dst
|
|
39
|
+
|
|
40
|
+
|
|
33
41
|
def load_env_dataset(ds):
|
|
34
42
|
"""Load the ENV dataset."""
|
|
35
|
-
# TODO
|
|
43
|
+
# TODO: Retrieve relative_humidity and temperature from L1-ENV
|
|
36
44
|
ds_env = get_default_environment_dataset()
|
|
37
|
-
|
|
45
|
+
# Compute water density
|
|
46
|
+
# get_water_density(
|
|
47
|
+
# temperature=temperature,
|
|
48
|
+
# air_pressure=air_pressure,
|
|
49
|
+
# )
|
|
50
|
+
# --> (T == 10 --> 999.7, T == 20 --> 998.2
|
|
51
|
+
ds_env["water_density"] = 1000 # kg / m3 # TODO as function of ENV (temperature, ...) ?
|
|
52
|
+
ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env)
|
|
38
53
|
return ds_env
|
disdrodb/l2/__init__.py
CHANGED
|
@@ -15,3 +15,10 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Module for DISDRODB L2 production."""
|
|
18
|
+
from disdrodb.l2.processing import generate_l2_radar, generate_l2e, generate_l2m
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"generate_l2_radar",
|
|
22
|
+
"generate_l2e",
|
|
23
|
+
"generate_l2m",
|
|
24
|
+
]
|
disdrodb/l2/empirical_dsd.py
CHANGED
|
@@ -23,8 +23,8 @@ Infinite values should be removed beforehand or otherwise are propagated through
|
|
|
23
23
|
import numpy as np
|
|
24
24
|
import xarray as xr
|
|
25
25
|
|
|
26
|
-
from disdrodb import DIAMETER_DIMENSION, VELOCITY_DIMENSION
|
|
27
26
|
from disdrodb.api.checks import check_sensor_name
|
|
27
|
+
from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
|
|
28
28
|
from disdrodb.utils.xarray import (
|
|
29
29
|
remove_diameter_coordinates,
|
|
30
30
|
remove_velocity_coordinates,
|
|
@@ -66,7 +66,7 @@ def get_drop_average_velocity(drop_number):
|
|
|
66
66
|
----------
|
|
67
67
|
drop_number : xarray.DataArray
|
|
68
68
|
Array of drop counts \\( n(D,v) \\) per diameter (and velocity, if available) bins
|
|
69
|
-
over the
|
|
69
|
+
over the measurement interval.
|
|
70
70
|
The DataArray must have the ``velocity_bin_center`` coordinate.
|
|
71
71
|
|
|
72
72
|
Returns
|
|
@@ -80,6 +80,7 @@ def get_drop_average_velocity(drop_number):
|
|
|
80
80
|
dim=VELOCITY_DIMENSION,
|
|
81
81
|
skipna=False,
|
|
82
82
|
)
|
|
83
|
+
average_velocity.name = "average_velocity"
|
|
83
84
|
return average_velocity
|
|
84
85
|
|
|
85
86
|
|
|
@@ -138,6 +139,9 @@ def _compute_qc_bins_metrics(arr):
|
|
|
138
139
|
return output
|
|
139
140
|
|
|
140
141
|
|
|
142
|
+
BINS_METRICS = ["Nbins", "Nbins_missing", "Nbins_missing_fraction", "Nbins_missing_consecutive"]
|
|
143
|
+
|
|
144
|
+
|
|
141
145
|
def compute_qc_bins_metrics(ds):
|
|
142
146
|
"""
|
|
143
147
|
Compute quality-control metrics for drop-count bins along the diameter dimension.
|
|
@@ -191,11 +195,19 @@ def compute_qc_bins_metrics(ds):
|
|
|
191
195
|
)
|
|
192
196
|
|
|
193
197
|
# Assign meaningful labels to the qc 'metric' dimension
|
|
194
|
-
|
|
195
|
-
ds_qc_bins = da_qc_bins.assign_coords(metric=variables).to_dataset(dim="metric")
|
|
198
|
+
ds_qc_bins = da_qc_bins.assign_coords(metric=BINS_METRICS).to_dataset(dim="metric")
|
|
196
199
|
return ds_qc_bins
|
|
197
200
|
|
|
198
201
|
|
|
202
|
+
def add_bins_metrics(ds):
|
|
203
|
+
"""Add bin metrics if missing."""
|
|
204
|
+
bins_metrics = BINS_METRICS
|
|
205
|
+
if not np.all(np.isin(bins_metrics, list(ds.data_vars))):
|
|
206
|
+
# Add bins statistics
|
|
207
|
+
ds.update(compute_qc_bins_metrics(ds))
|
|
208
|
+
return ds
|
|
209
|
+
|
|
210
|
+
|
|
199
211
|
####-------------------------------------------------------------------------------------------------------------------.
|
|
200
212
|
#### DSD Spectrum, Concentration, Moments
|
|
201
213
|
|
|
@@ -252,7 +264,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
|
|
|
252
264
|
Width of each diameter bin \\( \\Delta D \\) in millimeters (mm).
|
|
253
265
|
drop_number : xarray.DataArray
|
|
254
266
|
Array of drop counts \\( n(D) or n(D,v) \\) per diameter (and velocity if available)
|
|
255
|
-
bins over the
|
|
267
|
+
bins over the measurement interval.
|
|
256
268
|
sample_interval : float or xarray.DataArray
|
|
257
269
|
Time over which the drops are counted \\( \\Delta t \\) in seconds (s).
|
|
258
270
|
sampling_area : float or xarray.DataArray
|
|
@@ -277,7 +289,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
|
|
|
277
289
|
- \\( n(D,v) \\): Number of drops counted in diameter (and velocity) bins.
|
|
278
290
|
- \\( A_{\text{eff}}(D) \\): Effective sampling area of the sensor for diameter \\( D \\) in square meters (m²).
|
|
279
291
|
- \\( \\Delta D \\): Diameter bin width in millimeters (mm).
|
|
280
|
-
- \\( \\Delta t \\):
|
|
292
|
+
- \\( \\Delta t \\): Measurement interval in seconds (s).
|
|
281
293
|
- \\( v(D) \\): Fall velocity of drops in diameter bin \\( D \\) in meters per second (m/s).
|
|
282
294
|
|
|
283
295
|
The effective sampling area \\( A_{\text{eff}}(D) \\) depends on the sensor and may vary with drop diameter.
|
|
@@ -919,8 +931,7 @@ def get_min_max_diameter(drop_counts):
|
|
|
919
931
|
return min_drop_diameter, max_drop_diameter
|
|
920
932
|
|
|
921
933
|
|
|
922
|
-
def
|
|
923
|
-
"""Get raindrop diameter with highest occurrence."""
|
|
934
|
+
def _get_mode_diameter(drop_number_concentration, diameter):
|
|
924
935
|
# If all NaN, set to 0 otherwise argmax fail when all NaN data
|
|
925
936
|
idx_all_nan_mask = np.isnan(drop_number_concentration).all(dim=DIAMETER_DIMENSION)
|
|
926
937
|
drop_number_concentration = drop_number_concentration.where(~idx_all_nan_mask, 0)
|
|
@@ -939,6 +950,43 @@ def get_mode_diameter(drop_number_concentration, diameter):
|
|
|
939
950
|
return diameter_mode
|
|
940
951
|
|
|
941
952
|
|
|
953
|
+
def get_mode_diameter(
|
|
954
|
+
drop_number_concentration,
|
|
955
|
+
diameter,
|
|
956
|
+
):
|
|
957
|
+
"""Get raindrop diameter with highest occurrence.
|
|
958
|
+
|
|
959
|
+
Parameters
|
|
960
|
+
----------
|
|
961
|
+
drop_number_concentration : xarray.DataArray
|
|
962
|
+
The drop number concentration N(D) for each diameter bin, typically in units of
|
|
963
|
+
number per cubic meter per millimeter (m⁻³·mm⁻¹).
|
|
964
|
+
diameter : xarray.DataArray
|
|
965
|
+
The equivalent volume diameters D of the drops in each bin, in meters (m).
|
|
966
|
+
|
|
967
|
+
Returns
|
|
968
|
+
-------
|
|
969
|
+
xarray.DataArray
|
|
970
|
+
The diameter with the highest drop number concentration.
|
|
971
|
+
"""
|
|
972
|
+
# Use map_blocks if working with Dask arrays
|
|
973
|
+
if hasattr(drop_number_concentration.data, "chunks"):
|
|
974
|
+
# Define the template for output
|
|
975
|
+
template = remove_diameter_coordinates(drop_number_concentration.isel({DIAMETER_DIMENSION: 0}))
|
|
976
|
+
diameter_mode = xr.map_blocks(
|
|
977
|
+
_get_mode_diameter,
|
|
978
|
+
drop_number_concentration,
|
|
979
|
+
kwargs={"diameter": diameter.compute()},
|
|
980
|
+
template=template,
|
|
981
|
+
)
|
|
982
|
+
else:
|
|
983
|
+
diameter_mode = _get_mode_diameter(
|
|
984
|
+
drop_number_concentration=drop_number_concentration,
|
|
985
|
+
diameter=diameter,
|
|
986
|
+
)
|
|
987
|
+
return diameter_mode
|
|
988
|
+
|
|
989
|
+
|
|
942
990
|
####-------------------------------------------------------------------------------------------------------------------.
|
|
943
991
|
#### Mass Distribution Diameters
|
|
944
992
|
|
|
@@ -1369,7 +1417,7 @@ def get_normalized_intercept_parameter_from_moments(moment_3, moment_4):
|
|
|
1369
1417
|
[m⁻³·mm³] (number per cubic meter times diameter cubed).
|
|
1370
1418
|
|
|
1371
1419
|
moment_4 : float or array-like
|
|
1372
|
-
The
|
|
1420
|
+
The fourth moment of the drop size distribution, \\( M_3 \\), in units of
|
|
1373
1421
|
[m⁻³·mm4].
|
|
1374
1422
|
|
|
1375
1423
|
Returns
|
|
@@ -1581,7 +1629,7 @@ def get_kinetic_energy_variables_from_drop_number(
|
|
|
1581
1629
|
- \\( D_i \\) is the diameter of bin \\( i \\).
|
|
1582
1630
|
- \\( v_j \\) is the velocity of bin \\( j \\).
|
|
1583
1631
|
- \\( A \\) is the sampling area.
|
|
1584
|
-
- \\( \\Delta t \\) is the
|
|
1632
|
+
- \\( \\Delta t \\) is the measurement interval in seconds.
|
|
1585
1633
|
- \\( R \\) is the rainfall rate in mm/hr.
|
|
1586
1634
|
|
|
1587
1635
|
"""
|