disdrodb 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +64 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +139 -9
- disdrodb/api/configs.py +4 -2
- disdrodb/api/info.py +10 -10
- disdrodb/api/io.py +237 -18
- disdrodb/api/path.py +81 -75
- disdrodb/api/search.py +6 -6
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
- disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +145 -14
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +44 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +41 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +30 -30
- disdrodb/l0/l0b_nc_processing.py +108 -2
- disdrodb/l0/l0b_processing.py +4 -4
- disdrodb/l0/l0c_processing.py +5 -13
- disdrodb/l0/manuals/SWS250.pdf +0 -0
- disdrodb/l0/manuals/VPF730.pdf +0 -0
- disdrodb/l0/manuals/VPF750.pdf +0 -0
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +105 -0
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +128 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/{PARSIVEL → PARSIVEL2}/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → NCAR/FARM_PARSIVEL2.py} +43 -70
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +29 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +31 -14
- disdrodb/l0/routines.py +105 -14
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +45 -44
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1/routines.py +35 -42
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/event.py +27 -120
- disdrodb/l2/processing.py +267 -116
- disdrodb/l2/routines.py +618 -254
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +463 -144
- disdrodb/psd/models.py +8 -5
- disdrodb/routines.py +3 -3
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +56 -36
- disdrodb/scattering/permittivity.py +486 -0
- disdrodb/scattering/routines.py +701 -159
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4120 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +59 -8
- disdrodb/utils/dataframe.py +63 -9
- disdrodb/utils/directories.py +49 -17
- disdrodb/utils/encoding.py +33 -19
- disdrodb/utils/logger.py +13 -6
- disdrodb/utils/manipulations.py +71 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +165 -19
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +85 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +327 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/RECORD +127 -87
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
- disdrodb/l1/encoding_attrs.py +0 -635
- disdrodb/l2/processing_options.py +0 -213
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.1.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0
disdrodb/l1/processing.py
CHANGED
|
@@ -16,22 +16,19 @@
|
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Core functions for DISDRODB L1 production."""
|
|
18
18
|
|
|
19
|
-
|
|
20
19
|
import xarray as xr
|
|
21
20
|
|
|
22
|
-
from disdrodb import DIAMETER_DIMENSION, VELOCITY_DIMENSION
|
|
23
|
-
from disdrodb.l1.encoding_attrs import get_attrs_dict, get_encoding_dict
|
|
21
|
+
from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
|
|
24
22
|
from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity
|
|
25
23
|
from disdrodb.l1.filters import define_spectrum_mask, filter_diameter_bins, filter_velocity_bins
|
|
26
24
|
from disdrodb.l1.resampling import add_sample_interval
|
|
27
25
|
from disdrodb.l1_env.routines import load_env_dataset
|
|
28
26
|
from disdrodb.l2.empirical_dsd import ( # TODO: maybe move out of L2
|
|
29
|
-
|
|
27
|
+
add_bins_metrics,
|
|
30
28
|
get_min_max_diameter,
|
|
31
29
|
)
|
|
32
|
-
from disdrodb.utils.attrs import set_attrs
|
|
33
|
-
from disdrodb.utils.encoding import set_encodings
|
|
34
30
|
from disdrodb.utils.time import ensure_sample_interval_in_seconds, infer_sample_interval
|
|
31
|
+
from disdrodb.utils.writer import finalize_product
|
|
35
32
|
|
|
36
33
|
|
|
37
34
|
def generate_l1(
|
|
@@ -51,7 +48,7 @@ def generate_l1(
|
|
|
51
48
|
small_velocity_threshold=2.5, # 3
|
|
52
49
|
maintain_smallest_drops=True,
|
|
53
50
|
):
|
|
54
|
-
"""Generate
|
|
51
|
+
"""Generate DISDRODB L1 Dataset from DISDRODB L0C Dataset.
|
|
55
52
|
|
|
56
53
|
Parameters
|
|
57
54
|
----------
|
|
@@ -88,17 +85,17 @@ def generate_l1(
|
|
|
88
85
|
xarray.Dataset
|
|
89
86
|
DISRODB L1 dataset.
|
|
90
87
|
"""
|
|
91
|
-
# Take as input an L0 !
|
|
92
|
-
|
|
93
88
|
# Retrieve source attributes
|
|
94
89
|
attrs = ds.attrs.copy()
|
|
95
90
|
|
|
96
91
|
# Determine if the velocity dimension is available
|
|
97
92
|
has_velocity_dimension = VELOCITY_DIMENSION in ds.dims
|
|
98
93
|
|
|
99
|
-
#
|
|
100
|
-
|
|
94
|
+
# Retrieve sensor_name
|
|
95
|
+
# - If not present, don't drop Parsivels first two bins
|
|
96
|
+
sensor_name = attrs.get("sensor_name", "")
|
|
101
97
|
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
102
99
|
# Retrieve sample interval
|
|
103
100
|
# --> sample_interval is a coordinate of L0C products
|
|
104
101
|
if "sample_interval" in ds:
|
|
@@ -107,39 +104,52 @@ def generate_l1(
|
|
|
107
104
|
# This line is not called in the DISDRODB processing chain !
|
|
108
105
|
sample_interval = infer_sample_interval(ds, verbose=False)
|
|
109
106
|
|
|
110
|
-
# Re-add sample interval as coordinate (in seconds)
|
|
111
|
-
ds = add_sample_interval(ds, sample_interval=sample_interval)
|
|
112
|
-
|
|
113
107
|
# ---------------------------------------------------------------------------
|
|
114
108
|
# Retrieve ENV dataset or take defaults
|
|
115
109
|
# --> Used only for Beard fall velocity currently !
|
|
116
110
|
ds_env = load_env_dataset(ds)
|
|
117
111
|
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# Initialize L1 dataset
|
|
114
|
+
ds_l1 = xr.Dataset()
|
|
115
|
+
|
|
116
|
+
# Add raw_drop_number variable to L1 dataset
|
|
117
|
+
ds_l1["raw_drop_number"] = ds["raw_drop_number"]
|
|
118
|
+
|
|
119
|
+
# Add sample interval as coordinate (in seconds)
|
|
120
|
+
ds_l1 = add_sample_interval(ds_l1, sample_interval=sample_interval)
|
|
121
|
+
|
|
122
|
+
# Add L0C coordinates that might got lost
|
|
123
|
+
if "time_qc" in ds_l1:
|
|
124
|
+
ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
|
|
125
|
+
|
|
118
126
|
# -------------------------------------------------------------------------------------------
|
|
119
127
|
# Filter dataset by diameter and velocity bins
|
|
128
|
+
if sensor_name in ["PARSIVEL", "PARSIVEL2"]:
|
|
129
|
+
# - Remove first two bins because never reports data !
|
|
130
|
+
# - If not removed, can alter e.g. L2M model fitting
|
|
131
|
+
ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=0.312) # it includes the 0.2495-0.3745 bin
|
|
132
|
+
|
|
120
133
|
# - Filter diameter bins
|
|
121
|
-
|
|
134
|
+
ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=minimum_diameter, maximum_diameter=maximum_diameter)
|
|
122
135
|
# - Filter velocity bins
|
|
123
136
|
if has_velocity_dimension:
|
|
124
|
-
|
|
137
|
+
ds_l1 = filter_velocity_bins(ds=ds_l1, minimum_velocity=minimum_velocity, maximum_velocity=maximum_velocity)
|
|
125
138
|
|
|
126
139
|
# -------------------------------------------------------------------------------------------
|
|
127
140
|
# Compute fall velocity
|
|
128
|
-
fall_velocity = get_raindrop_fall_velocity(
|
|
129
|
-
diameter=
|
|
141
|
+
ds_l1["fall_velocity"] = get_raindrop_fall_velocity(
|
|
142
|
+
diameter=ds_l1["diameter_bin_center"],
|
|
130
143
|
method=fall_velocity_method,
|
|
131
144
|
ds_env=ds_env, # mm
|
|
132
145
|
)
|
|
133
146
|
|
|
134
|
-
# Add fall velocity
|
|
135
|
-
ds_l1["fall_velocity"] = fall_velocity
|
|
136
|
-
|
|
137
147
|
# -------------------------------------------------------------------------------------------
|
|
138
148
|
# Define filtering mask according to fall velocity
|
|
139
149
|
if has_velocity_dimension:
|
|
140
150
|
mask = define_spectrum_mask(
|
|
141
|
-
drop_number=
|
|
142
|
-
fall_velocity=fall_velocity,
|
|
151
|
+
drop_number=ds_l1["raw_drop_number"],
|
|
152
|
+
fall_velocity=ds_l1["fall_velocity"],
|
|
143
153
|
above_velocity_fraction=above_velocity_fraction,
|
|
144
154
|
above_velocity_tolerance=above_velocity_tolerance,
|
|
145
155
|
below_velocity_fraction=below_velocity_fraction,
|
|
@@ -152,14 +162,14 @@ def generate_l1(
|
|
|
152
162
|
# -------------------------------------------------------------------------------------------
|
|
153
163
|
# Retrieve drop number and drop_counts arrays
|
|
154
164
|
if has_velocity_dimension:
|
|
155
|
-
drop_number =
|
|
165
|
+
drop_number = ds_l1["raw_drop_number"].where(mask) # 2D (diameter, velocity)
|
|
156
166
|
drop_counts = drop_number.sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
|
|
157
|
-
drop_counts_raw =
|
|
167
|
+
drop_counts_raw = ds_l1["raw_drop_number"].sum(dim=VELOCITY_DIMENSION) # 1D (diameter)
|
|
158
168
|
|
|
159
169
|
else:
|
|
160
|
-
drop_number =
|
|
161
|
-
drop_counts =
|
|
162
|
-
drop_counts_raw =
|
|
170
|
+
drop_number = ds_l1["raw_drop_number"] # 1D (diameter)
|
|
171
|
+
drop_counts = ds_l1["raw_drop_number"] # 1D (diameter)
|
|
172
|
+
drop_counts_raw = ds_l1["raw_drop_number"]
|
|
163
173
|
|
|
164
174
|
# Add drop number and drop_counts
|
|
165
175
|
ds_l1["drop_number"] = drop_number
|
|
@@ -173,30 +183,21 @@ def generate_l1(
|
|
|
173
183
|
ds_l1["Dmin"] = min_drop_diameter
|
|
174
184
|
ds_l1["Dmax"] = max_drop_diameter
|
|
175
185
|
ds_l1["N"] = drop_counts.sum(dim=DIAMETER_DIMENSION)
|
|
176
|
-
ds_l1["
|
|
186
|
+
ds_l1["Nraw"] = drop_counts_raw.sum(dim=DIAMETER_DIMENSION)
|
|
187
|
+
ds_l1["Nremoved"] = ds_l1["Nraw"] - ds_l1["N"]
|
|
177
188
|
|
|
178
189
|
# Add bins statistics
|
|
179
|
-
ds_l1
|
|
190
|
+
ds_l1 = add_bins_metrics(ds_l1)
|
|
180
191
|
|
|
181
192
|
# -------------------------------------------------------------------------------------------
|
|
182
193
|
# Add quality flags
|
|
183
194
|
# TODO: snow_flags, insects_flag, ...
|
|
184
195
|
|
|
185
|
-
# -------------------------------------------------------------------------------------------
|
|
186
|
-
#### Add L0C coordinates that might got lost
|
|
187
|
-
if "time_qc" in ds:
|
|
188
|
-
ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
|
|
189
|
-
|
|
190
196
|
#### ----------------------------------------------------------------------------.
|
|
191
|
-
####
|
|
192
|
-
# Add variables attributes
|
|
193
|
-
attrs_dict = get_attrs_dict()
|
|
194
|
-
ds_l1 = set_attrs(ds_l1, attrs_dict=attrs_dict)
|
|
195
|
-
|
|
196
|
-
# Add variables encoding
|
|
197
|
-
encoding_dict = get_encoding_dict()
|
|
198
|
-
ds_l1 = set_encodings(ds_l1, encoding_dict=encoding_dict)
|
|
199
|
-
|
|
197
|
+
#### Finalize dataset
|
|
200
198
|
# Add global attributes
|
|
201
199
|
ds_l1.attrs = attrs
|
|
200
|
+
|
|
201
|
+
# Add variables attributes and encodings
|
|
202
|
+
ds_l1 = finalize_product(ds_l1, product="L1")
|
|
202
203
|
return ds_l1
|
disdrodb/l1/resampling.py
CHANGED
|
@@ -15,12 +15,11 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Utilities for temporal resampling."""
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
import numpy as np
|
|
20
19
|
import pandas as pd
|
|
21
20
|
import xarray as xr
|
|
22
21
|
|
|
23
|
-
from disdrodb.utils.time import regularize_dataset
|
|
22
|
+
from disdrodb.utils.time import ensure_sample_interval_in_seconds, regularize_dataset
|
|
24
23
|
|
|
25
24
|
DEFAULT_ACCUMULATIONS = ["10s", "30s", "1min", "2min", "5min", "10min", "30min", "1hour"]
|
|
26
25
|
|
|
@@ -96,6 +95,24 @@ def define_window_size(sample_interval, accumulation_interval):
|
|
|
96
95
|
return window_size
|
|
97
96
|
|
|
98
97
|
|
|
98
|
+
def _resample(ds, variables, accumulation, op):
|
|
99
|
+
if not variables:
|
|
100
|
+
return {}
|
|
101
|
+
ds_subset = ds[variables]
|
|
102
|
+
if "time" in ds_subset.dims:
|
|
103
|
+
return getattr(ds_subset.resample({"time": accumulation}), op)(skipna=False)
|
|
104
|
+
return ds_subset
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _rolling(ds, variables, window_size, op):
|
|
108
|
+
if not variables:
|
|
109
|
+
return {}
|
|
110
|
+
ds_subset = ds[variables]
|
|
111
|
+
if "time" in ds_subset.dims:
|
|
112
|
+
return getattr(ds_subset.rolling(time=window_size, center=False), op)(skipna=False)
|
|
113
|
+
return ds_subset
|
|
114
|
+
|
|
115
|
+
|
|
99
116
|
def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
100
117
|
"""
|
|
101
118
|
Resample the dataset to a specified accumulation interval.
|
|
@@ -128,20 +145,61 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
128
145
|
- The function updates the dataset attributes and the sample_interval coordinate.
|
|
129
146
|
|
|
130
147
|
"""
|
|
131
|
-
#
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
148
|
+
# --------------------------------------------------------------------------.
|
|
149
|
+
# Ensure sample interval in seconds
|
|
150
|
+
sample_interval = int(ensure_sample_interval_in_seconds(sample_interval))
|
|
151
|
+
|
|
152
|
+
# --------------------------------------------------------------------------.
|
|
153
|
+
# Raise error if the accumulation_interval is less than the sample interval
|
|
154
|
+
if accumulation_interval < sample_interval:
|
|
155
|
+
raise ValueError("Expecting an accumulation_interval > sample interval.")
|
|
156
|
+
# Raise error if accumulation_interval is not multiple of sample_interval
|
|
157
|
+
if not accumulation_interval % sample_interval == 0:
|
|
158
|
+
raise ValueError("The accumulation_interval is not a multiple of sample interval.")
|
|
159
|
+
|
|
160
|
+
# --------------------------------------------------------------------------.
|
|
161
|
+
#### Preprocess the dataset
|
|
162
|
+
# Here we set NaN in the raw_drop_number to 0
|
|
163
|
+
# - We assume that NaN corresponds to 0
|
|
164
|
+
# - When we regularize, we infill with NaN
|
|
165
|
+
# - When we aggregate with sum, we don't skip NaN
|
|
166
|
+
# --> Aggregation with original missing timesteps currently results in NaN !
|
|
167
|
+
|
|
168
|
+
# Infill NaN values with zeros for drop_number and raw_drop_number
|
|
169
|
+
# - This might alter integrated statistics if NaN in spectrum does not actually correspond to 0 !
|
|
170
|
+
# - TODO: NaN should not be set as 0 !
|
|
171
|
+
for var in ["drop_number", "raw_drop_number"]:
|
|
172
|
+
if var in ds:
|
|
173
|
+
ds[var] = xr.where(np.isnan(ds[var]), 0, ds[var])
|
|
135
174
|
|
|
136
175
|
# Ensure regular dataset without missing timesteps
|
|
176
|
+
# --> This adds NaN values for missing timesteps
|
|
137
177
|
ds = regularize_dataset(ds, freq=f"{sample_interval}s")
|
|
138
178
|
|
|
179
|
+
# --------------------------------------------------------------------------.
|
|
180
|
+
# Define dataset attributes
|
|
181
|
+
attrs = ds.attrs.copy()
|
|
182
|
+
if rolling:
|
|
183
|
+
attrs["disdrodb_rolled_product"] = "True"
|
|
184
|
+
else:
|
|
185
|
+
attrs["disdrodb_rolled_product"] = "False"
|
|
186
|
+
|
|
187
|
+
if sample_interval == accumulation_interval:
|
|
188
|
+
attrs["disdrodb_aggregated_product"] = "False"
|
|
189
|
+
ds = add_sample_interval(ds, sample_interval=accumulation_interval)
|
|
190
|
+
ds.attrs = attrs
|
|
191
|
+
return ds
|
|
192
|
+
|
|
193
|
+
# --------------------------------------------------------------------------.
|
|
194
|
+
# Resample the dataset
|
|
195
|
+
attrs["disdrodb_aggregated_product"] = "True"
|
|
196
|
+
|
|
139
197
|
# Initialize resample dataset
|
|
140
198
|
ds_resampled = xr.Dataset()
|
|
141
199
|
|
|
142
200
|
# Retrieve variables to average/sum
|
|
143
201
|
var_to_average = ["fall_velocity"]
|
|
144
|
-
var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nremoved"]
|
|
202
|
+
var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nraw", "Nremoved"]
|
|
145
203
|
var_to_min = ["Dmin"]
|
|
146
204
|
var_to_max = ["Dmax"]
|
|
147
205
|
|
|
@@ -154,6 +212,7 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
154
212
|
# TODO Define custom processing
|
|
155
213
|
# - quality_flag --> take worst
|
|
156
214
|
# - skipna if less than fraction (to not waste lot of data when aggregating over i.e. hours)
|
|
215
|
+
# - Add tolerance on fraction of missing timesteps for large accumulation_intervals
|
|
157
216
|
|
|
158
217
|
# Resample the dataset
|
|
159
218
|
# - Rolling currently does not allow direct rolling forward.
|
|
@@ -163,74 +222,26 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
|
|
|
163
222
|
# - https://github.com/pydata/xarray/issues/8958
|
|
164
223
|
if not rolling:
|
|
165
224
|
# Resample
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
ds_resampled.update(
|
|
172
|
-
ds[var_to_cumulate].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).sum(skipna=False),
|
|
173
|
-
)
|
|
174
|
-
if len(var_to_min) > 0:
|
|
175
|
-
ds_resampled.update(
|
|
176
|
-
ds[var_to_min].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).min(skipna=False),
|
|
177
|
-
)
|
|
178
|
-
if len(var_to_max) > 0:
|
|
179
|
-
ds_resampled.update(
|
|
180
|
-
ds[var_to_max].resample({"time": pd.Timedelta(seconds=accumulation_interval)}).max(skipna=False),
|
|
181
|
-
)
|
|
182
|
-
|
|
225
|
+
accumulation = pd.Timedelta(seconds=accumulation_interval)
|
|
226
|
+
ds_resampled.update(_resample(ds=ds, variables=var_to_average, accumulation=accumulation, op="mean"))
|
|
227
|
+
ds_resampled.update(_resample(ds=ds, variables=var_to_cumulate, accumulation=accumulation, op="sum"))
|
|
228
|
+
ds_resampled.update(_resample(ds=ds, variables=var_to_min, accumulation=accumulation, op="min"))
|
|
229
|
+
ds_resampled.update(_resample(ds=ds, variables=var_to_max, accumulation=accumulation, op="max"))
|
|
183
230
|
else:
|
|
184
231
|
# Roll and Resample
|
|
185
232
|
window_size = define_window_size(sample_interval=sample_interval, accumulation_interval=accumulation_interval)
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
if len(var_to_min) > 0:
|
|
192
|
-
ds_resampled.update(ds[var_to_min].rolling({"time": window_size}, center=False).min(skipna=False))
|
|
193
|
-
if len(var_to_max) > 0:
|
|
194
|
-
ds_resampled.update(ds[var_to_max].rolling({"time": window_size}, center=False).max(skipna=False))
|
|
195
|
-
# Ensure time to correspond to the start time of the integration
|
|
233
|
+
ds_resampled.update(_rolling(ds=ds, variables=var_to_average, window_size=window_size, op="mean"))
|
|
234
|
+
ds_resampled.update(_rolling(ds=ds, variables=var_to_cumulate, window_size=window_size, op="sum"))
|
|
235
|
+
ds_resampled.update(_rolling(ds=ds, variables=var_to_min, window_size=window_size, op="min"))
|
|
236
|
+
ds_resampled.update(_rolling(ds=ds, variables=var_to_max, window_size=window_size, op="max"))
|
|
237
|
+
# Ensure time to correspond to the start time of the measurement period
|
|
196
238
|
ds_resampled = ds_resampled.isel(time=slice(window_size - 1, None)).assign_coords(
|
|
197
239
|
{"time": ds_resampled["time"].data[: -window_size + 1]},
|
|
198
240
|
)
|
|
199
241
|
|
|
200
242
|
# Add attributes
|
|
201
243
|
ds_resampled.attrs = attrs
|
|
202
|
-
if rolling:
|
|
203
|
-
ds_resampled.attrs["rolled"] = "True"
|
|
204
|
-
else:
|
|
205
|
-
ds_resampled.attrs["rolled"] = "False"
|
|
206
244
|
|
|
207
245
|
# Add accumulation_interval as new sample_interval coordinate
|
|
208
246
|
ds_resampled = add_sample_interval(ds_resampled, sample_interval=accumulation_interval)
|
|
209
247
|
return ds_resampled
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def get_possible_accumulations(sample_interval, accumulations=None):
|
|
213
|
-
"""
|
|
214
|
-
Get a list of valid accumulation intervals based on the sampling time.
|
|
215
|
-
|
|
216
|
-
Parameters
|
|
217
|
-
----------
|
|
218
|
-
- sample_interval (int): The inferred sampling time in seconds.
|
|
219
|
-
- accumulations (list of int or string): List of desired accumulation intervals.
|
|
220
|
-
If provide integers, specify accumulation in seconds.
|
|
221
|
-
|
|
222
|
-
Returns
|
|
223
|
-
-------
|
|
224
|
-
- list of int: Valid accumulation intervals in seconds.
|
|
225
|
-
"""
|
|
226
|
-
# Select default accumulations
|
|
227
|
-
if accumulations is None:
|
|
228
|
-
accumulations = DEFAULT_ACCUMULATIONS
|
|
229
|
-
|
|
230
|
-
# Get accumulations in seconds
|
|
231
|
-
accumulations = [int(pd.Timedelta(acc).total_seconds()) if isinstance(acc, str) else acc for acc in accumulations]
|
|
232
|
-
|
|
233
|
-
# Filter candidate accumulations to include only those that are multiples of the sampling time
|
|
234
|
-
possible_accumulations = [acc for acc in accumulations if acc % sample_interval == 0]
|
|
235
|
-
|
|
236
|
-
return possible_accumulations
|
disdrodb/l1/routines.py
CHANGED
|
@@ -21,13 +21,14 @@
|
|
|
21
21
|
import datetime
|
|
22
22
|
import logging
|
|
23
23
|
import os
|
|
24
|
+
import shutil
|
|
24
25
|
import time
|
|
25
26
|
from typing import Optional
|
|
26
27
|
|
|
27
28
|
import dask
|
|
28
29
|
import xarray as xr
|
|
29
30
|
|
|
30
|
-
|
|
31
|
+
from disdrodb.api.checks import check_station_inputs
|
|
31
32
|
from disdrodb.api.create_directories import (
|
|
32
33
|
create_logs_directory,
|
|
33
34
|
create_product_directory,
|
|
@@ -38,7 +39,12 @@ from disdrodb.api.path import (
|
|
|
38
39
|
define_l1_filename,
|
|
39
40
|
)
|
|
40
41
|
from disdrodb.api.search import get_required_product
|
|
41
|
-
from disdrodb.configs import
|
|
42
|
+
from disdrodb.configs import (
|
|
43
|
+
get_data_archive_dir,
|
|
44
|
+
get_folder_partitioning,
|
|
45
|
+
get_metadata_archive_dir,
|
|
46
|
+
get_product_options,
|
|
47
|
+
)
|
|
42
48
|
from disdrodb.l1.processing import generate_l1
|
|
43
49
|
from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
|
|
44
50
|
|
|
@@ -55,41 +61,6 @@ from disdrodb.utils.writer import write_product
|
|
|
55
61
|
logger = logging.getLogger(__name__)
|
|
56
62
|
|
|
57
63
|
|
|
58
|
-
def get_l1_options():
|
|
59
|
-
"""Get L1 options."""
|
|
60
|
-
# - TODO: from YAML
|
|
61
|
-
# - TODO: as function of sensor name
|
|
62
|
-
|
|
63
|
-
# minimum_diameter
|
|
64
|
-
# --> PWS100: 0.05
|
|
65
|
-
# --> PARSIVEL: 0.2495
|
|
66
|
-
# --> RD80: 0.313
|
|
67
|
-
# --> LPM: 0.125 (we currently discard first bin with this setting)
|
|
68
|
-
|
|
69
|
-
# maximum_diameter
|
|
70
|
-
# LPM: 8 mm
|
|
71
|
-
# RD80: 5.6 mm
|
|
72
|
-
# OTT: 26 mm
|
|
73
|
-
|
|
74
|
-
l1_options = {
|
|
75
|
-
# Fall velocity option
|
|
76
|
-
"fall_velocity_method": "Beard1976",
|
|
77
|
-
# Diameter-Velocity Filtering Options
|
|
78
|
-
"minimum_diameter": 0.2495, # OTT PARSIVEL first two bin no data !
|
|
79
|
-
"maximum_diameter": 10,
|
|
80
|
-
"minimum_velocity": 0,
|
|
81
|
-
"maximum_velocity": 12,
|
|
82
|
-
"above_velocity_fraction": 0.5,
|
|
83
|
-
"above_velocity_tolerance": None,
|
|
84
|
-
"below_velocity_fraction": 0.5,
|
|
85
|
-
"below_velocity_tolerance": None,
|
|
86
|
-
"small_diameter_threshold": 1, # 2
|
|
87
|
-
"small_velocity_threshold": 2.5, # 3
|
|
88
|
-
"maintain_smallest_drops": True,
|
|
89
|
-
}
|
|
90
|
-
return l1_options
|
|
91
|
-
|
|
92
|
-
|
|
93
64
|
@delayed_if_parallel
|
|
94
65
|
@single_threaded_if_parallel
|
|
95
66
|
def _generate_l1(
|
|
@@ -152,16 +123,16 @@ def _generate_l1(
|
|
|
152
123
|
# Log start processing
|
|
153
124
|
msg = f"{product} processing of {filename} has started."
|
|
154
125
|
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
155
|
-
|
|
126
|
+
success_flag = False
|
|
156
127
|
##------------------------------------------------------------------------.
|
|
157
128
|
# Retrieve L1 configurations
|
|
158
|
-
l1_options =
|
|
129
|
+
l1_options = get_product_options("L1").get("product_options")
|
|
159
130
|
|
|
160
131
|
##------------------------------------------------------------------------.
|
|
161
132
|
### Core computation
|
|
162
133
|
try:
|
|
163
134
|
# Open the raw netCDF
|
|
164
|
-
with xr.open_dataset(filepath, chunks
|
|
135
|
+
with xr.open_dataset(filepath, chunks=-1, decode_timedelta=False, cache=False) as ds:
|
|
165
136
|
ds = ds[["raw_drop_number"]].load()
|
|
166
137
|
|
|
167
138
|
# Produce L1 dataset
|
|
@@ -174,7 +145,13 @@ def _generate_l1(
|
|
|
174
145
|
folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
|
|
175
146
|
filepath = os.path.join(folder_path, filename)
|
|
176
147
|
# Write to disk
|
|
177
|
-
write_product(ds,
|
|
148
|
+
write_product(ds, filepath=filepath, force=force)
|
|
149
|
+
|
|
150
|
+
##--------------------------------------------------------------------.
|
|
151
|
+
#### - Define logger file final directory
|
|
152
|
+
if folder_partitioning != "":
|
|
153
|
+
log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
|
|
154
|
+
os.makedirs(log_dst_dir, exist_ok=True)
|
|
178
155
|
|
|
179
156
|
##--------------------------------------------------------------------.
|
|
180
157
|
# Clean environment
|
|
@@ -183,6 +160,7 @@ def _generate_l1(
|
|
|
183
160
|
# Log end processing
|
|
184
161
|
msg = f"{product} processing of {filename} has ended."
|
|
185
162
|
log_info(logger=logger, msg=msg, verbose=verbose)
|
|
163
|
+
success_flag = True
|
|
186
164
|
|
|
187
165
|
##--------------------------------------------------------------------.
|
|
188
166
|
# Otherwise log the error
|
|
@@ -194,6 +172,13 @@ def _generate_l1(
|
|
|
194
172
|
# Close the file logger
|
|
195
173
|
close_logger(logger)
|
|
196
174
|
|
|
175
|
+
# Move logger file to correct partitioning directory
|
|
176
|
+
if success_flag and folder_partitioning != "" and logger_filepath is not None:
|
|
177
|
+
# Move logger file to correct partitioning directory
|
|
178
|
+
dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
|
|
179
|
+
shutil.move(logger_filepath, dst_filepath)
|
|
180
|
+
logger_filepath = dst_filepath
|
|
181
|
+
|
|
197
182
|
# Return the logger file path
|
|
198
183
|
return logger_filepath
|
|
199
184
|
|
|
@@ -259,6 +244,14 @@ def run_l1_station(
|
|
|
259
244
|
# Retrieve DISDRODB Metadata Archive directory
|
|
260
245
|
metadata_archive_dir = get_metadata_archive_dir(metadata_archive_dir)
|
|
261
246
|
|
|
247
|
+
# Check valid data_source, campaign_name, and station_name
|
|
248
|
+
check_station_inputs(
|
|
249
|
+
metadata_archive_dir=metadata_archive_dir,
|
|
250
|
+
data_source=data_source,
|
|
251
|
+
campaign_name=campaign_name,
|
|
252
|
+
station_name=station_name,
|
|
253
|
+
)
|
|
254
|
+
|
|
262
255
|
# Define logs directory
|
|
263
256
|
logs_dir = create_logs_directory(
|
|
264
257
|
product=product,
|
|
@@ -309,7 +302,7 @@ def run_l1_station(
|
|
|
309
302
|
# If no data available, print error message and return None
|
|
310
303
|
if flag_not_available_data:
|
|
311
304
|
msg = (
|
|
312
|
-
f"{product} processing of {data_source} {campaign_name} {station_name}"
|
|
305
|
+
f"{product} processing of {data_source} {campaign_name} {station_name} "
|
|
313
306
|
+ f"has not been launched because of missing {required_product} data."
|
|
314
307
|
)
|
|
315
308
|
print(msg)
|
disdrodb/l1_env/routines.py
CHANGED
|
@@ -15,9 +15,10 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Core functions for DISDRODB ENV production."""
|
|
18
|
-
|
|
19
18
|
import xarray as xr
|
|
20
19
|
|
|
20
|
+
from disdrodb.constants import GEOLOCATION_COORDS
|
|
21
|
+
|
|
21
22
|
|
|
22
23
|
def get_default_environment_dataset():
|
|
23
24
|
"""Define defaults values for the ENV dataset."""
|
|
@@ -30,9 +31,23 @@ def get_default_environment_dataset():
|
|
|
30
31
|
return ds_env
|
|
31
32
|
|
|
32
33
|
|
|
34
|
+
def _assign_geolocation(ds_src, dst_dst):
|
|
35
|
+
|
|
36
|
+
dict_coords = {coord: ds_src[coord] for coord in GEOLOCATION_COORDS if coord in ds_src}
|
|
37
|
+
dst_dst = dst_dst.assign_coords(dict_coords)
|
|
38
|
+
return dst_dst
|
|
39
|
+
|
|
40
|
+
|
|
33
41
|
def load_env_dataset(ds):
|
|
34
42
|
"""Load the ENV dataset."""
|
|
35
|
-
# TODO
|
|
43
|
+
# TODO: Retrieve relative_humidity and temperature from L1-ENV
|
|
36
44
|
ds_env = get_default_environment_dataset()
|
|
37
|
-
|
|
45
|
+
# Compute water density
|
|
46
|
+
# get_water_density(
|
|
47
|
+
# temperature=temperature,
|
|
48
|
+
# air_pressure=air_pressure,
|
|
49
|
+
# )
|
|
50
|
+
# --> (T == 10 --> 999.7, T == 20 --> 998.2
|
|
51
|
+
ds_env["water_density"] = 1000 # kg / m3 # TODO as function of ENV (temperature, ...) ?
|
|
52
|
+
ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env)
|
|
38
53
|
return ds_env
|
disdrodb/l2/__init__.py
CHANGED
|
@@ -15,3 +15,10 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Module for DISDRODB L2 production."""
|
|
18
|
+
from disdrodb.l2.processing import generate_l2_radar, generate_l2e, generate_l2m
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"generate_l2_radar",
|
|
22
|
+
"generate_l2e",
|
|
23
|
+
"generate_l2m",
|
|
24
|
+
]
|