disdrodb 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +4 -0
- disdrodb/_version.py +2 -2
- disdrodb/accessor/methods.py +14 -0
- disdrodb/api/checks.py +8 -7
- disdrodb/api/io.py +81 -29
- disdrodb/api/path.py +17 -14
- disdrodb/api/search.py +15 -18
- disdrodb/cli/disdrodb_open_products_options.py +38 -0
- disdrodb/cli/disdrodb_run.py +2 -2
- disdrodb/cli/disdrodb_run_station.py +4 -4
- disdrodb/configs.py +1 -1
- disdrodb/data_transfer/download_data.py +70 -1
- disdrodb/etc/configs/attributes.yaml +62 -8
- disdrodb/etc/configs/encodings.yaml +28 -0
- disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/GAMMA_ML.yaml +1 -1
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +1 -1
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/global.yaml +4 -4
- disdrodb/fall_velocity/graupel.py +8 -8
- disdrodb/fall_velocity/hail.py +2 -2
- disdrodb/fall_velocity/rain.py +33 -5
- disdrodb/issue/checks.py +1 -1
- disdrodb/l0/l0_reader.py +1 -1
- disdrodb/l0/l0a_processing.py +2 -2
- disdrodb/l0/l0b_nc_processing.py +5 -5
- disdrodb/l0/l0b_processing.py +20 -24
- disdrodb/l0/l0c_processing.py +18 -13
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/VIETNAM/IGE_PARSIVEL2.py +239 -0
- disdrodb/l0/template_tools.py +13 -13
- disdrodb/l1/classification.py +10 -6
- disdrodb/l2/empirical_dsd.py +25 -15
- disdrodb/l2/processing.py +32 -14
- disdrodb/metadata/download.py +1 -1
- disdrodb/metadata/geolocation.py +4 -4
- disdrodb/metadata/reader.py +3 -3
- disdrodb/metadata/search.py +10 -8
- disdrodb/psd/__init__.py +4 -0
- disdrodb/psd/fitting.py +2660 -592
- disdrodb/psd/gof_metrics.py +389 -0
- disdrodb/psd/grid_search.py +1066 -0
- disdrodb/psd/models.py +1281 -145
- disdrodb/routines/l2.py +6 -6
- disdrodb/routines/options_validation.py +8 -8
- disdrodb/scattering/axis_ratio.py +70 -2
- disdrodb/scattering/permittivity.py +13 -10
- disdrodb/scattering/routines.py +10 -10
- disdrodb/summary/routines.py +23 -20
- disdrodb/utils/archiving.py +29 -22
- disdrodb/utils/attrs.py +6 -4
- disdrodb/utils/dataframe.py +4 -4
- disdrodb/utils/encoding.py +3 -1
- disdrodb/utils/event.py +9 -9
- disdrodb/utils/logger.py +4 -7
- disdrodb/utils/manipulations.py +2 -2
- disdrodb/utils/subsetting.py +1 -1
- disdrodb/utils/time.py +8 -7
- disdrodb/viz/plots.py +25 -17
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/METADATA +44 -33
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/RECORD +68 -66
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/WHEEL +1 -1
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/entry_points.txt +1 -0
- disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_Z_MAE.yaml +0 -6
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -----------------------------------------------------------------------------.
|
|
3
|
+
# Copyright (c) 2021-2026 DISDRODB developers
|
|
4
|
+
#
|
|
5
|
+
# This program is free software: you can redistribute it and/or modify
|
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
8
|
+
# (at your option) any later version.
|
|
9
|
+
#
|
|
10
|
+
# This program is distributed in the hope that it will be useful,
|
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
# GNU General Public License for more details.
|
|
14
|
+
#
|
|
15
|
+
# You should have received a copy of the GNU General Public License
|
|
16
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
17
|
+
# -----------------------------------------------------------------------------.
|
|
18
|
+
import os
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring
|
|
23
|
+
from disdrodb.l0.l0a_processing import read_raw_text_file
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def reader_parsivel(filepath, logger):
|
|
27
|
+
"""Reader for Parsivel CR1000 Data Logger file."""
|
|
28
|
+
##------------------------------------------------------------------------.
|
|
29
|
+
#### Define reader options
|
|
30
|
+
reader_kwargs = {}
|
|
31
|
+
# - Define delimiter
|
|
32
|
+
reader_kwargs["delimiter"] = "\\n"
|
|
33
|
+
# - Skip first row as columns names
|
|
34
|
+
reader_kwargs["header"] = None
|
|
35
|
+
# - Skip first 3 rows
|
|
36
|
+
reader_kwargs["skiprows"] = 0
|
|
37
|
+
# - Define encoding
|
|
38
|
+
reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
|
|
39
|
+
# - Avoid first column to become df index !!!
|
|
40
|
+
reader_kwargs["index_col"] = False
|
|
41
|
+
# - Define behaviour when encountering bad lines
|
|
42
|
+
reader_kwargs["on_bad_lines"] = "skip"
|
|
43
|
+
# - Define reader engine
|
|
44
|
+
# - C engine is faster
|
|
45
|
+
# - Python engine is more feature-complete
|
|
46
|
+
reader_kwargs["engine"] = "python"
|
|
47
|
+
# - Define on-the-fly decompression of on-disk data
|
|
48
|
+
# - Available: gzip, bz2, zip
|
|
49
|
+
reader_kwargs["compression"] = "infer"
|
|
50
|
+
# - Strings to recognize as NA/NaN and replace with standard NA flags
|
|
51
|
+
# - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
|
|
52
|
+
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
|
|
53
|
+
# 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
|
|
54
|
+
reader_kwargs["na_values"] = ["na", "", "error"]
|
|
55
|
+
|
|
56
|
+
##------------------------------------------------------------------------.
|
|
57
|
+
#### Read the data
|
|
58
|
+
df_raw = read_raw_text_file(
|
|
59
|
+
filepath=filepath,
|
|
60
|
+
column_names=["TO_PARSE"],
|
|
61
|
+
reader_kwargs=reader_kwargs,
|
|
62
|
+
logger=logger,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Retrieve header, number of columns and starting rows
|
|
66
|
+
# - Search in the first 3 rows where "TIMESTAMP" occurs
|
|
67
|
+
# - Once identified the row, strip away everything before TIMESTAMP
|
|
68
|
+
# - Then identify start_row_idx as the row where "TIMESTAMP" occurs + 2
|
|
69
|
+
for i in range(3):
|
|
70
|
+
line = df_raw.iloc[i]["TO_PARSE"]
|
|
71
|
+
if "TIMESTAMP" in line:
|
|
72
|
+
# Remove double and single quotes
|
|
73
|
+
line = line.replace('""', '"').replace('"', "")
|
|
74
|
+
# Define header
|
|
75
|
+
timestamp_idx = line.find("TIMESTAMP")
|
|
76
|
+
header_str = line[timestamp_idx:]
|
|
77
|
+
header = header_str.split(",")
|
|
78
|
+
# Define number of columns
|
|
79
|
+
n_columns = len(header)
|
|
80
|
+
# Define start row with data
|
|
81
|
+
start_row_idx = i + 3
|
|
82
|
+
break
|
|
83
|
+
else:
|
|
84
|
+
# start_row_idx = 0
|
|
85
|
+
# n_columns = len(df_raw["TO_PARSE"].iloc[0].split(","))
|
|
86
|
+
raise ValueError("Could not find 'TIMESTAMP' in the first 3 rows of the file.")
|
|
87
|
+
|
|
88
|
+
# Retrieve rows with actual data
|
|
89
|
+
df = df_raw.iloc[start_row_idx:]
|
|
90
|
+
|
|
91
|
+
# Expand dataframe
|
|
92
|
+
df = df["TO_PARSE"].str.split(",", expand=True, n=n_columns - 1)
|
|
93
|
+
|
|
94
|
+
#### Define column names
|
|
95
|
+
column_names = [
|
|
96
|
+
"time",
|
|
97
|
+
"RECORD",
|
|
98
|
+
"rainfall_rate_32bit",
|
|
99
|
+
"rainfall_accumulated_32bit",
|
|
100
|
+
"weather_code_synop_4680",
|
|
101
|
+
"weather_code_synop_4677",
|
|
102
|
+
"reflectivity_32bit",
|
|
103
|
+
"mor_visibility",
|
|
104
|
+
"laser_amplitude",
|
|
105
|
+
"number_particles",
|
|
106
|
+
"sensor_temperature",
|
|
107
|
+
"sensor_heating_current",
|
|
108
|
+
"sensor_battery_voltage",
|
|
109
|
+
"sample_interval",
|
|
110
|
+
"sensor_status",
|
|
111
|
+
"rain_kinetic_energy",
|
|
112
|
+
"sensor_temperature_receiver",
|
|
113
|
+
"sensor_temperature_trasmitter",
|
|
114
|
+
"V_Batt_Min",
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
##------------------------------------------------------------------------.
|
|
118
|
+
#### Assign column names
|
|
119
|
+
df.columns = column_names
|
|
120
|
+
|
|
121
|
+
##------------------------------------------------------------------------.
|
|
122
|
+
#### Adapt the dataframe to adhere to DISDRODB L0 standards
|
|
123
|
+
# Define time as datetime column
|
|
124
|
+
df["time"] = pd.to_datetime(df["time"].str.strip('"'), format="%Y-%m-%d %H:%M:%S", errors="coerce")
|
|
125
|
+
|
|
126
|
+
# Drop columns not agreeing with DISDRODB L0 standards
|
|
127
|
+
columns_to_drop = [
|
|
128
|
+
"RECORD",
|
|
129
|
+
"V_Batt_Min",
|
|
130
|
+
]
|
|
131
|
+
df = df.drop(columns=columns_to_drop, errors="ignore")
|
|
132
|
+
return df
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def reader_spectrum(filepath, logger):
|
|
136
|
+
"""Reader for Spectrum CR1000 Data Logger file."""
|
|
137
|
+
##------------------------------------------------------------------------.
|
|
138
|
+
#### Define column names
|
|
139
|
+
column_names = ["TO_PARSE"]
|
|
140
|
+
|
|
141
|
+
##------------------------------------------------------------------------.
|
|
142
|
+
#### Define reader options
|
|
143
|
+
reader_kwargs = {}
|
|
144
|
+
# - Define delimiter
|
|
145
|
+
reader_kwargs["delimiter"] = "\\n"
|
|
146
|
+
# - Skip first row as columns names
|
|
147
|
+
reader_kwargs["header"] = None
|
|
148
|
+
# - Skip first 3 rows
|
|
149
|
+
reader_kwargs["skiprows"] = 4
|
|
150
|
+
# - Define encoding
|
|
151
|
+
reader_kwargs["encoding"] = "latin" # "ISO-8859-1"
|
|
152
|
+
# - Avoid first column to become df index !!!
|
|
153
|
+
reader_kwargs["index_col"] = False
|
|
154
|
+
# - Define behaviour when encountering bad lines
|
|
155
|
+
reader_kwargs["on_bad_lines"] = "skip"
|
|
156
|
+
# - Define reader engine
|
|
157
|
+
# - C engine is faster
|
|
158
|
+
# - Python engine is more feature-complete
|
|
159
|
+
reader_kwargs["engine"] = "python"
|
|
160
|
+
# - Define on-the-fly decompression of on-disk data
|
|
161
|
+
# - Available: gzip, bz2, zip
|
|
162
|
+
reader_kwargs["compression"] = "infer"
|
|
163
|
+
# - Strings to recognize as NA/NaN and replace with standard NA flags
|
|
164
|
+
# - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
|
|
165
|
+
# '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
|
|
166
|
+
# 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
|
|
167
|
+
reader_kwargs["na_values"] = ["na", "", "error"]
|
|
168
|
+
|
|
169
|
+
##------------------------------------------------------------------------.
|
|
170
|
+
#### Read the data
|
|
171
|
+
df = read_raw_text_file(
|
|
172
|
+
filepath=filepath,
|
|
173
|
+
column_names=column_names,
|
|
174
|
+
reader_kwargs=reader_kwargs,
|
|
175
|
+
logger=logger,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
##------------------------------------------------------------------------.
|
|
179
|
+
#### Adapt the dataframe to adhere to DISDRODB L0 standards
|
|
180
|
+
# Split and assign columns
|
|
181
|
+
df = df["TO_PARSE"].str.split(",", n=2, expand=True)
|
|
182
|
+
df.columns = ["time", "RECORD", "TO_PARSE"]
|
|
183
|
+
|
|
184
|
+
# Define time in datetime format
|
|
185
|
+
df["time"] = pd.to_datetime(df["time"].str.strip('"'), format="%Y-%m-%d %H:%M:%S", errors="coerce")
|
|
186
|
+
|
|
187
|
+
# Keep only rows with valid number of values
|
|
188
|
+
df = df[df["TO_PARSE"].str.count(",") == 1085]
|
|
189
|
+
|
|
190
|
+
# Retrieve arrays
|
|
191
|
+
df_split = df["TO_PARSE"].str.split(",", expand=True)
|
|
192
|
+
raw_drop_concentration = df_split.iloc[:, :32].agg(",".join, axis=1).str.replace("-10", "0")
|
|
193
|
+
raw_drop_average_velocity = "0,0," + df_split.iloc[:, 32:62].agg(",".join, axis=1)
|
|
194
|
+
raw_drop_number = df_split.iloc[:, 62:].agg(",".join, axis=1)
|
|
195
|
+
df["raw_drop_concentration"] = raw_drop_concentration
|
|
196
|
+
df["raw_drop_average_velocity"] = raw_drop_average_velocity
|
|
197
|
+
df["raw_drop_number"] = raw_drop_number
|
|
198
|
+
|
|
199
|
+
# Drop columns not agreeing with DISDRODB L0 standards
|
|
200
|
+
df = df.drop(columns=["TO_PARSE", "RECORD"])
|
|
201
|
+
return df
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@is_documented_by(reader_generic_docstring)
|
|
205
|
+
def reader(
|
|
206
|
+
filepath,
|
|
207
|
+
logger=None,
|
|
208
|
+
):
|
|
209
|
+
"""Reader."""
|
|
210
|
+
# Retrieve spectrum filepath
|
|
211
|
+
spectrum_filepath = filepath.replace("parsivel", "spectre")
|
|
212
|
+
|
|
213
|
+
# Read integral variables
|
|
214
|
+
df = reader_parsivel(filepath, logger=logger)
|
|
215
|
+
|
|
216
|
+
# Drop duplicates timesteps
|
|
217
|
+
df = df.drop_duplicates(subset="time", keep="first")
|
|
218
|
+
|
|
219
|
+
# Initialize empty arrays
|
|
220
|
+
# --> 0 values array produced in L0B
|
|
221
|
+
arrays_columns = ["raw_drop_concentration", "raw_drop_average_velocity", "raw_drop_number"]
|
|
222
|
+
for c in arrays_columns:
|
|
223
|
+
if c not in df:
|
|
224
|
+
df[c] = ""
|
|
225
|
+
|
|
226
|
+
# Add raw spectrum if available
|
|
227
|
+
if os.path.exists(spectrum_filepath):
|
|
228
|
+
# Read raw spectrum for corresponding timesteps
|
|
229
|
+
df_raw_spectrum = reader_spectrum(spectrum_filepath, logger=logger)
|
|
230
|
+
df_raw_spectrum = df_raw_spectrum.drop_duplicates(subset="time", keep="first")
|
|
231
|
+
# Add raw array to df
|
|
232
|
+
df = df.set_index("time")
|
|
233
|
+
df_raw_spectrum = df_raw_spectrum.set_index("time")
|
|
234
|
+
df.update(df_raw_spectrum)
|
|
235
|
+
# Set back time as column
|
|
236
|
+
df = df.reset_index()
|
|
237
|
+
|
|
238
|
+
# Return the dataframe adhering to DISDRODB L0 standards
|
|
239
|
+
return df
|
disdrodb/l0/template_tools.py
CHANGED
|
@@ -162,10 +162,11 @@ def _print_df_summary(df, indices, columns, print_column_names):
|
|
|
162
162
|
df_summary = df_summary.loc[summary_stats]
|
|
163
163
|
# Print summary stats
|
|
164
164
|
for i, column in zip(indices, columns, strict=True):
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
165
|
+
if column in df_summary:
|
|
166
|
+
tmp_df = df_summary[[column]]
|
|
167
|
+
tmp_df.columns = [""]
|
|
168
|
+
_print_column_index(i, column_name=column, print_column_names=print_column_names)
|
|
169
|
+
_print_value(tmp_df)
|
|
169
170
|
|
|
170
171
|
|
|
171
172
|
def print_df_summary_stats(
|
|
@@ -192,19 +193,18 @@ def print_df_summary_stats(
|
|
|
192
193
|
"""
|
|
193
194
|
# Define columns of interest
|
|
194
195
|
_, columns_of_interest = _get_selected_column_names(df, column_indices)
|
|
195
|
-
#
|
|
196
|
-
|
|
197
|
-
indices =
|
|
198
|
-
indices = indices[np.isin(indices, indices_to_remove, invert=True)]
|
|
199
|
-
columns = df.columns[indices]
|
|
196
|
+
# Select only numeric columns (remove columns of dtype object or string)
|
|
197
|
+
columns = df.select_dtypes(include="number").columns
|
|
198
|
+
indices = df.columns.get_indexer(columns)
|
|
200
199
|
if len(columns) == 0:
|
|
201
200
|
raise ValueError("No numeric columns in the dataframe.")
|
|
202
201
|
# Select only columns of interest
|
|
203
|
-
|
|
204
|
-
|
|
202
|
+
mask = columns.isin(columns_of_interest)
|
|
203
|
+
columns = columns[mask]
|
|
204
|
+
indices = indices[mask]
|
|
205
|
+
if len(columns) == 0:
|
|
205
206
|
raise ValueError("No numeric columns at the specified column_indices.")
|
|
206
|
-
|
|
207
|
-
indices = indices[idx_of_interest]
|
|
207
|
+
|
|
208
208
|
# Print summary stats
|
|
209
209
|
_print_df_summary(df=df, indices=indices, columns=columns, print_column_names=print_column_names)
|
|
210
210
|
|
disdrodb/l1/classification.py
CHANGED
|
@@ -144,7 +144,7 @@ def qc_spikes_isolated_precip(hydrometeor_type, sample_interval):
|
|
|
144
144
|
|
|
145
145
|
Parameters
|
|
146
146
|
----------
|
|
147
|
-
hydrometeor_type:
|
|
147
|
+
hydrometeor_type: xarray.DataArray
|
|
148
148
|
Hydrometeor type classification array with a ``time`` coordinate.
|
|
149
149
|
Precipitation presence is defined where ``hydrometeor_type>= 1``.
|
|
150
150
|
sample_interval : float or int
|
|
@@ -153,7 +153,7 @@ def qc_spikes_isolated_precip(hydrometeor_type, sample_interval):
|
|
|
153
153
|
|
|
154
154
|
Returns
|
|
155
155
|
-------
|
|
156
|
-
flag_spikes :
|
|
156
|
+
flag_spikes : xarray.DataArray of int
|
|
157
157
|
Binary QC flag array (same dimensions as input) with:
|
|
158
158
|
* 0 : no spike detected
|
|
159
159
|
* 1 : isolated precipitation spike
|
|
@@ -762,9 +762,13 @@ def classify_raw_spectrum(
|
|
|
762
762
|
|
|
763
763
|
# ------------------------------------------------------------------------.
|
|
764
764
|
#### Define precipitation type variable
|
|
765
|
-
precipitation_type = xr.ones_like(ds["time"], dtype=float) * -
|
|
766
|
-
precipitation_type = xr.where(hydrometeor_type.isin([0]),
|
|
767
|
-
precipitation_type = xr.where(
|
|
765
|
+
precipitation_type = xr.ones_like(ds["time"], dtype=float) * -2
|
|
766
|
+
precipitation_type = xr.where(hydrometeor_type.isin([0]), -1, precipitation_type)
|
|
767
|
+
precipitation_type = xr.where(
|
|
768
|
+
hydrometeor_type.isin([1, 2, 3, 9]),
|
|
769
|
+
0,
|
|
770
|
+
precipitation_type,
|
|
771
|
+
) # 9 hail in rainfall class currently
|
|
768
772
|
precipitation_type = xr.where(hydrometeor_type.isin([5, 6, 7, 8]), 1, precipitation_type)
|
|
769
773
|
precipitation_type = xr.where(hydrometeor_type.isin([4]), 2, precipitation_type)
|
|
770
774
|
precipitation_type.attrs.update(
|
|
@@ -837,7 +841,7 @@ def classify_raw_spectrum(
|
|
|
837
841
|
# ------------------------------------------------------------------------
|
|
838
842
|
#### Define QC splashing, strong_wind, margin_fallers, spikes
|
|
839
843
|
# FUTURE: flag_spikes can be used for non hydrometeor classification,
|
|
840
|
-
# --> But caution because observing the below show true rainfall signature
|
|
844
|
+
# --> But caution because observing the below code show some true rainfall signature
|
|
841
845
|
# --> raw_spectrum.isel(time=(flag_spikes == 0) & (precipitation_type == 0)).disdrodb.plot_spectrum()
|
|
842
846
|
|
|
843
847
|
flag_splashing = xr.where((precipitation_type == 0) & (fraction_splash >= 0.1), 1, 0)
|
disdrodb/l2/empirical_dsd.py
CHANGED
|
@@ -106,7 +106,7 @@ def count_bins_with_drops(ds):
|
|
|
106
106
|
|
|
107
107
|
def _compute_qc_bins_metrics(arr):
|
|
108
108
|
# Find indices of non-zero elements
|
|
109
|
-
arr = arr.copy()
|
|
109
|
+
arr = np.asarray(arr).copy()
|
|
110
110
|
arr[np.isnan(arr)] = 0
|
|
111
111
|
non_zero_indices = np.nonzero(arr)[0]
|
|
112
112
|
if non_zero_indices.size == 0:
|
|
@@ -117,13 +117,16 @@ def _compute_qc_bins_metrics(arr):
|
|
|
117
117
|
segment = arr[start_idx : end_idx + 1]
|
|
118
118
|
|
|
119
119
|
# Compute number of bins with drops
|
|
120
|
-
total_bins =
|
|
120
|
+
total_bins = len(non_zero_indices)
|
|
121
|
+
|
|
122
|
+
# Compute number of bins in the segment
|
|
123
|
+
segment_bins = segment.size
|
|
121
124
|
|
|
122
125
|
# Compute number of missing bins (zeros)
|
|
123
126
|
n_missing_bins = int(np.sum(segment == 0))
|
|
124
127
|
|
|
125
128
|
# Compute fraction of bins with missing drops
|
|
126
|
-
fraction_missing = n_missing_bins /
|
|
129
|
+
fraction_missing = n_missing_bins / segment_bins
|
|
127
130
|
|
|
128
131
|
# Identify longest with with consecutive zeros
|
|
129
132
|
zero_mask = (segment == 0).astype(int)
|
|
@@ -152,14 +155,14 @@ def compute_qc_bins_metrics(ds):
|
|
|
152
155
|
optionally collapses over velocity methods and the velocity dimension, then
|
|
153
156
|
computes four metrics per time step:
|
|
154
157
|
|
|
155
|
-
1. Nbins: total number of diameter bins
|
|
156
|
-
2. Nbins_missing: number of bins with zero or NaN counts
|
|
157
|
-
3. Nbins_missing_fraction: fraction of missing bins (zeros)
|
|
158
|
+
1. Nbins: total number of diameter bins with non-zero count
|
|
159
|
+
2. Nbins_missing: number of bins with zero or NaN counts between the first and last non-zero count
|
|
160
|
+
3. Nbins_missing_fraction: fraction of missing bins (zeros) between the first and last non-zero count
|
|
158
161
|
4. Nbins_missing_consecutive: maximum length of consecutive missing bins
|
|
159
162
|
|
|
160
163
|
Parameters
|
|
161
164
|
----------
|
|
162
|
-
ds :
|
|
165
|
+
ds : xarray.Dataset
|
|
163
166
|
Input dataset containing one of the following variables:
|
|
164
167
|
'drop_counts', 'drop_number_concentration', or 'drop_number'.
|
|
165
168
|
If a 'velocity_method' dimension exists, only the first method is used.
|
|
@@ -167,7 +170,7 @@ def compute_qc_bins_metrics(ds):
|
|
|
167
170
|
|
|
168
171
|
Returns
|
|
169
172
|
-------
|
|
170
|
-
|
|
173
|
+
xarray.Dataset
|
|
171
174
|
Dataset with a new 'metric' dimension of size 4 and coordinates:
|
|
172
175
|
['Nbins', 'Nbins_missing', 'Nbins_missing_fraction', 'Nbins_missing_consecutive'],
|
|
173
176
|
indexed by 'time'.
|
|
@@ -298,7 +301,7 @@ def get_drop_number_concentration(drop_number, velocity, diameter_bin_width, sam
|
|
|
298
301
|
|
|
299
302
|
Returns
|
|
300
303
|
-------
|
|
301
|
-
drop_number_concentration : xarray.DataArray or ndarray
|
|
304
|
+
drop_number_concentration : xarray.DataArray or numpy.ndarray
|
|
302
305
|
Array of drop number concentrations \\( N(D) \\) in m⁻³·mm⁻¹, representing
|
|
303
306
|
the number of drops per unit volume per unit diameter interval.
|
|
304
307
|
|
|
@@ -355,7 +358,7 @@ def get_total_number_concentration(drop_number_concentration, diameter_bin_width
|
|
|
355
358
|
|
|
356
359
|
Returns
|
|
357
360
|
-------
|
|
358
|
-
total_number_concentration : xarray.DataArray or ndarray
|
|
361
|
+
total_number_concentration : xarray.DataArray or numpy.ndarray
|
|
359
362
|
Total number concentration \\( N_t \\) in m⁻³, representing the total number
|
|
360
363
|
of drops per unit volume.
|
|
361
364
|
|
|
@@ -692,8 +695,10 @@ def get_equivalent_reflectivity_factor(drop_number_concentration, diameter, diam
|
|
|
692
695
|
dim=DIAMETER_DIMENSION,
|
|
693
696
|
skipna=False,
|
|
694
697
|
)
|
|
698
|
+
# Set to NaN where z <= 0
|
|
695
699
|
invalid_mask = z > 0
|
|
696
700
|
z = z.where(invalid_mask)
|
|
701
|
+
|
|
697
702
|
# Compute equivalent reflectivity factor in dBZ
|
|
698
703
|
# - np.log10(np.nan) returns -Inf !
|
|
699
704
|
# --> We mask again after the log
|
|
@@ -741,8 +746,11 @@ def get_equivalent_reflectivity_spectrum(drop_number_concentration, diameter):
|
|
|
741
746
|
"""
|
|
742
747
|
# Compute reflectivity in mm⁶·m⁻³
|
|
743
748
|
z = drop_number_concentration * ((diameter * 1000) ** 6)
|
|
749
|
+
|
|
750
|
+
# Set to NaN where z <= 0
|
|
744
751
|
invalid_mask = z > 0
|
|
745
752
|
z = z.where(invalid_mask)
|
|
753
|
+
|
|
746
754
|
# Compute equivalent reflectivity factor in dBZ
|
|
747
755
|
# - np.log10(np.nan) returns -Inf !
|
|
748
756
|
# --> We mask again after the log
|
|
@@ -930,6 +938,8 @@ def get_min_max_diameter(drop_counts):
|
|
|
930
938
|
max_drop_diameter : xarray.DataArray
|
|
931
939
|
Maximum diameter where drop_counts is non-zero, for each time step.
|
|
932
940
|
"""
|
|
941
|
+
# TODO: maybe use lower bound for minimum, and upper bound for maximum
|
|
942
|
+
|
|
933
943
|
# Create a boolean mask where drop_counts is non-zero
|
|
934
944
|
non_zero_mask = drop_counts > 0
|
|
935
945
|
|
|
@@ -1500,7 +1510,7 @@ def get_kinetic_energy_spectrum(
|
|
|
1500
1510
|
|
|
1501
1511
|
Returns
|
|
1502
1512
|
-------
|
|
1503
|
-
|
|
1513
|
+
xarray.DataArray
|
|
1504
1514
|
Kinetic Energy Spectrum [J/m2/mm]
|
|
1505
1515
|
"""
|
|
1506
1516
|
KE_spectrum = (
|
|
@@ -1718,9 +1728,9 @@ def compute_integral_parameters(
|
|
|
1718
1728
|
|
|
1719
1729
|
Parameters
|
|
1720
1730
|
----------
|
|
1721
|
-
drop_number_concentration :
|
|
1731
|
+
drop_number_concentration : xarray.DataArray
|
|
1722
1732
|
Drop number concentration in each diameter bin [#/m3/mm].
|
|
1723
|
-
velocity :
|
|
1733
|
+
velocity : xarray.DataArray
|
|
1724
1734
|
Fall velocity of drops in each diameter bin [m/s].
|
|
1725
1735
|
The presence of a velocity_method dimension enable to compute the parameters
|
|
1726
1736
|
with different velocity estimates.
|
|
@@ -1873,9 +1883,9 @@ def compute_spectrum_parameters(
|
|
|
1873
1883
|
|
|
1874
1884
|
Parameters
|
|
1875
1885
|
----------
|
|
1876
|
-
drop_number_concentration :
|
|
1886
|
+
drop_number_concentration : xarray.DataArray
|
|
1877
1887
|
Drop number concentration in each diameter bin [#/m3/mm].
|
|
1878
|
-
velocity :
|
|
1888
|
+
velocity : xarray.DataArray
|
|
1879
1889
|
Fall velocity of drops in each diameter bin [m/s].
|
|
1880
1890
|
The presence of a velocity_method dimension enable to compute the parameters
|
|
1881
1891
|
with different velocity estimates.
|
disdrodb/l2/processing.py
CHANGED
|
@@ -36,7 +36,7 @@ from disdrodb.l2.empirical_dsd import (
|
|
|
36
36
|
get_rain_rate_from_drop_number,
|
|
37
37
|
)
|
|
38
38
|
from disdrodb.psd import create_psd, estimate_model_parameters
|
|
39
|
-
from disdrodb.psd.
|
|
39
|
+
from disdrodb.psd.gof_metrics import compute_gof_stats
|
|
40
40
|
from disdrodb.utils.decorators import check_pytmatrix_availability
|
|
41
41
|
from disdrodb.utils.manipulations import (
|
|
42
42
|
define_diameter_array,
|
|
@@ -364,10 +364,12 @@ def generate_l2e(
|
|
|
364
364
|
ds : xarray.Dataset
|
|
365
365
|
DISDRODB L1 dataset.
|
|
366
366
|
Alternatively, a xarray dataset with at least:
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
367
|
+
|
|
368
|
+
- variables: raw_drop_number
|
|
369
|
+
- dimension: DIAMETER_DIMENSION
|
|
370
|
+
- coordinates: diameter_bin_center, diameter_bin_width, sample_interval
|
|
371
|
+
- attributes: sensor_name
|
|
372
|
+
|
|
371
373
|
ds_env : xarray.Dataset, optional
|
|
372
374
|
Environmental dataset used for fall velocity and water density estimates.
|
|
373
375
|
If None, a default environment dataset will be loaded.
|
|
@@ -642,10 +644,12 @@ def generate_l2e(
|
|
|
642
644
|
def _get_default_optimization(psd_model):
|
|
643
645
|
"""PSD model defaults."""
|
|
644
646
|
defaults = {
|
|
645
|
-
"ExponentialPSD": "
|
|
646
|
-
"GammaPSD": "
|
|
647
|
-
"LognormalPSD": "
|
|
647
|
+
"ExponentialPSD": "GS",
|
|
648
|
+
"GammaPSD": "GS",
|
|
649
|
+
"LognormalPSD": "GS",
|
|
648
650
|
"NormalizedGammaPSD": "GS",
|
|
651
|
+
"GeneralizedGammaPSD": "GS",
|
|
652
|
+
"NormalizedGeneralizedGammaPSD": "GS",
|
|
649
653
|
}
|
|
650
654
|
optimization = defaults[psd_model]
|
|
651
655
|
return optimization
|
|
@@ -695,7 +699,7 @@ def generate_l2m(
|
|
|
695
699
|
psd_model,
|
|
696
700
|
# Fitting options
|
|
697
701
|
optimization=None,
|
|
698
|
-
|
|
702
|
+
optimization_settings=None,
|
|
699
703
|
# PSD discretization
|
|
700
704
|
diameter_min=0,
|
|
701
705
|
diameter_max=10,
|
|
@@ -735,7 +739,7 @@ def generate_l2m(
|
|
|
735
739
|
optimization : str, optional
|
|
736
740
|
The fitting optimization procedure. Either "GS" (Grid Search), "ML (Maximum Likelihood)
|
|
737
741
|
or "MOM" (Method of Moments).
|
|
738
|
-
|
|
742
|
+
optimization_settings : dict, optional
|
|
739
743
|
Dictionary with arguments to customize the fitting procedure.
|
|
740
744
|
minimum_nbins: int
|
|
741
745
|
Minimum number of bins with drops required to fit the PSD model.
|
|
@@ -786,7 +790,7 @@ def generate_l2m(
|
|
|
786
790
|
ds=ds,
|
|
787
791
|
psd_model=psd_model,
|
|
788
792
|
optimization=optimization,
|
|
789
|
-
|
|
793
|
+
optimization_settings=optimization_settings,
|
|
790
794
|
)
|
|
791
795
|
psd_fitting_attrs = ds_psd_params.attrs
|
|
792
796
|
|
|
@@ -841,6 +845,20 @@ def generate_l2m(
|
|
|
841
845
|
"drop_number_concentration",
|
|
842
846
|
"fall_velocity",
|
|
843
847
|
"N",
|
|
848
|
+
# L0C QC
|
|
849
|
+
"qc_time",
|
|
850
|
+
# L1 flags and variables
|
|
851
|
+
"qc_resampling",
|
|
852
|
+
"n_margin_fallers",
|
|
853
|
+
"n_splashing",
|
|
854
|
+
"flag_graupel",
|
|
855
|
+
"flag_hail",
|
|
856
|
+
"flag_spikes",
|
|
857
|
+
"flag_splashing",
|
|
858
|
+
"flag_wind_artefacts",
|
|
859
|
+
# L2E drop statistics
|
|
860
|
+
"Dmin",
|
|
861
|
+
"Dmax",
|
|
844
862
|
*METEOROLOGICAL_VARIABLES,
|
|
845
863
|
]
|
|
846
864
|
variables = [var for var in variables if var in ds]
|
|
@@ -885,12 +903,12 @@ def generate_l2_radar(
|
|
|
885
903
|
----------
|
|
886
904
|
ds : xarray.Dataset
|
|
887
905
|
Dataset containing the drop number concentration variable or the PSD parameters.
|
|
888
|
-
frequency : str, float, or list of str
|
|
906
|
+
frequency : str, float, or list of str or float, optional
|
|
889
907
|
Frequencies in GHz for which to compute the radar parameters.
|
|
890
908
|
Alternatively, also strings can be used to specify common radar frequencies.
|
|
891
909
|
If ``None``, the common radar frequencies will be used.
|
|
892
910
|
See ``disdrodb.scattering.available_radar_bands()``.
|
|
893
|
-
num_points: int or list of
|
|
911
|
+
num_points: int or list of int, optional
|
|
894
912
|
Number of bins into which discretize the PSD.
|
|
895
913
|
diameter_max : float or list of float, optional
|
|
896
914
|
Maximum diameter. The default value is 10 mm.
|
|
@@ -899,7 +917,7 @@ def generate_l2_radar(
|
|
|
899
917
|
axis_ratio_model : str or list of str, optional
|
|
900
918
|
Models to compute the axis ratio. The default model is ``Thurai2007``.
|
|
901
919
|
See available models with ``disdrodb.scattering.available_axis_ratio_models()``.
|
|
902
|
-
permittivity_model : str
|
|
920
|
+
permittivity_model : str or list of str, optional
|
|
903
921
|
Permittivity model to use to compute the refractive index and the
|
|
904
922
|
rayleigh_dielectric_factor. The default is ``Turner2016``.
|
|
905
923
|
See available models with ``disdrodb.scattering.available_permittivity_models()``.
|
disdrodb/metadata/download.py
CHANGED
disdrodb/metadata/geolocation.py
CHANGED
|
@@ -97,10 +97,10 @@ def infer_altitudes(lats, lons, dem="aster30m"):
|
|
|
97
97
|
|
|
98
98
|
Notes
|
|
99
99
|
-----
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
100
|
+
The OpenTopoData API has a limit of 1000 calls per day.
|
|
101
|
+
Each request can include up to 100 locations.
|
|
102
|
+
The API allows a maximum of 1 call per second.
|
|
103
|
+
The API requests are made in blocks of up to 100 coordinates,
|
|
104
104
|
with a 2-second delay between requests.
|
|
105
105
|
"""
|
|
106
106
|
# Check that lats and lons have the same length
|
disdrodb/metadata/reader.py
CHANGED
|
@@ -73,13 +73,13 @@ def read_metadata_archive(
|
|
|
73
73
|
metadata_archive_dir : str or Path-like, optional
|
|
74
74
|
Path to the root of the DISDRODB Metadata Archive. If None, the
|
|
75
75
|
default metadata base directory is used. Default is None.
|
|
76
|
-
data_sources : str or
|
|
76
|
+
data_sources : str or list of str, optional
|
|
77
77
|
One or more data source identifiers to filter stations by. If None,
|
|
78
78
|
no filtering on data source is applied. The default is is None.
|
|
79
|
-
campaign_names : str or
|
|
79
|
+
campaign_names : str or list of str, optional
|
|
80
80
|
One or more campaign names to filter stations by. If None, no filtering
|
|
81
81
|
on campaign is applied. The default is is None.
|
|
82
|
-
station_names : str or
|
|
82
|
+
station_names : str or list of str, optional
|
|
83
83
|
One or more station names to include. If None, all stations matching
|
|
84
84
|
other filters are considered. The default is is None.
|
|
85
85
|
available_data: bool, optional
|
disdrodb/metadata/search.py
CHANGED
|
@@ -59,40 +59,42 @@ def get_list_metadata(
|
|
|
59
59
|
|
|
60
60
|
- if available_data is False, return metadata filepaths of stations present in the DISDRODB Metadata Archive
|
|
61
61
|
- if available_data is True, return metadata filepaths of stations with data available on the
|
|
62
|
-
|
|
62
|
+
online DISDRODB Decentralized Data Archive (i.e., stations with the disdrodb_data_url in the metadata).
|
|
63
63
|
|
|
64
64
|
If ``product`` is specified:
|
|
65
65
|
|
|
66
66
|
- if available_data is False, return metadata filepaths of stations where
|
|
67
|
-
|
|
67
|
+
the product directory exists in the in the local DISDRODB Data Archive
|
|
68
68
|
- if available_data is True, return metadata filepaths of stations where product data exists in the
|
|
69
69
|
in the local DISDRODB Data Archive.
|
|
70
|
+
|
|
70
71
|
The default is is False.
|
|
71
72
|
|
|
72
|
-
data_sources : str or
|
|
73
|
+
data_sources : str or list of str, optional
|
|
73
74
|
One or more data source identifiers to filter stations by.
|
|
74
75
|
The name(s) must be UPPER CASE.
|
|
75
76
|
If None, no filtering on data source is applied. The default is is ``None``.
|
|
76
|
-
campaign_names : str or
|
|
77
|
+
campaign_names : str or list of str, optional
|
|
77
78
|
One or more campaign names to filter stations by.
|
|
78
79
|
The name(s) must be UPPER CASE.
|
|
79
80
|
If None, no filtering on campaign is applied. The default is is ``None``.
|
|
80
|
-
station_names : str or
|
|
81
|
+
station_names : str or list of str, optional
|
|
81
82
|
One or more station names to include.
|
|
82
83
|
If None, all stations matching other filters are considered. The default is is ``None``.
|
|
83
84
|
raise_error_if_empty : bool, optional
|
|
84
85
|
If True and no stations satisfy the criteria, raise a ``ValueError``.
|
|
85
86
|
If False, return an empty list/tuple. The default is False.
|
|
86
|
-
invalid_fields_policy :
|
|
87
|
+
invalid_fields_policy : str, optional
|
|
87
88
|
How to handle invalid filter values for ``data_sources``, ``campaign_names``,
|
|
88
|
-
or ``station_names`` that are not present in the metadata archive:
|
|
89
|
+
or ``station_names`` that are not present in the metadata archive. Valid options are:
|
|
89
90
|
|
|
90
91
|
- 'raise' : raise a ``ValueError`` (default)
|
|
91
92
|
- 'warn' : emit a warning, then ignore invalid entries
|
|
92
93
|
- 'ignore': silently drop invalid entries
|
|
94
|
+
|
|
93
95
|
data_archive_dir : str or Path-like, optional
|
|
94
96
|
Path to the root of the local DISDRODB Data Archive. Format: ``<...>/DISDRODB``
|
|
95
|
-
Required only if ``product``is specified.
|
|
97
|
+
Required only if ``product`` is specified.
|
|
96
98
|
If None, the``data_archive_dir`` path specified in the DISDRODB active configuration file is used.
|
|
97
99
|
The default is None.
|
|
98
100
|
metadata_archive_dir : str or Path-like, optional
|