disdrodb 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +1 -1
- disdrodb/_version.py +2 -2
- disdrodb/api/io.py +12 -2
- disdrodb/l0/check_standards.py +15 -10
- disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
- disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
- disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +10 -10
- disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
- disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
- disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
- disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
- disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
- disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
- disdrodb/l0/l0b_nc_processing.py +1 -1
- disdrodb/l0/l0b_processing.py +12 -10
- disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
- disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
- disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
- disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
- disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
- disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
- disdrodb/l0/readers/PARSIVEL/KIT/BURKINA_FASO.py +133 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
- disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +274 -0
- disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
- disdrodb/l0/standards.py +7 -4
- disdrodb/l0/template_tools.py +2 -2
- disdrodb/l1/encoding_attrs.py +21 -6
- disdrodb/l1/processing.py +6 -4
- disdrodb/l1/resampling.py +1 -1
- disdrodb/l1/routines.py +2 -1
- disdrodb/l2/empirical_dsd.py +100 -2
- disdrodb/l2/event.py +3 -3
- disdrodb/l2/processing.py +21 -12
- disdrodb/l2/processing_options.py +7 -7
- disdrodb/l2/routines.py +3 -3
- disdrodb/metadata/checks.py +15 -6
- disdrodb/metadata/manipulation.py +2 -2
- disdrodb/metadata/standards.py +83 -79
- disdrodb/metadata/writer.py +2 -2
- disdrodb/routines.py +246 -10
- disdrodb/scattering/routines.py +1 -1
- disdrodb/utils/dataframe.py +342 -0
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/METADATA +34 -61
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/RECORD +63 -47
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/WHEEL +1 -1
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/entry_points.txt +3 -3
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
# -----------------------------------------------------------------------------.
|
|
4
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
5
|
+
#
|
|
6
|
+
# This program is free software: you can redistribute it and/or modify
|
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
|
8
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
9
|
+
# (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This program is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
# GNU General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU General Public License
|
|
17
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
18
|
+
# -----------------------------------------------------------------------------.
|
|
19
|
+
"""Dataframe utilities."""
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def log_arange(start, stop, log_step=0.1, base=10):
|
|
25
|
+
"""
|
|
26
|
+
Return numbers spaced evenly on a log scale (similar to np.arange but in log space).
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
start : float
|
|
31
|
+
The starting value of the sequence (must be > 0).
|
|
32
|
+
stop : float
|
|
33
|
+
The end value of the sequence (must be > 0).
|
|
34
|
+
log_step : float
|
|
35
|
+
The step size in log-space (default is 0.1).
|
|
36
|
+
base : float
|
|
37
|
+
The logarithmic base (default is 10).
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
np.ndarray
|
|
42
|
+
Array of values spaced in log scale.
|
|
43
|
+
"""
|
|
44
|
+
if start <= 0 or stop <= 0:
|
|
45
|
+
raise ValueError("Both start and stop must be > 0 for log spacing.")
|
|
46
|
+
|
|
47
|
+
log_start = np.log(start) / np.log(base)
|
|
48
|
+
log_stop = np.log(stop) / np.log(base)
|
|
49
|
+
|
|
50
|
+
log_values = np.arange(log_start, log_stop, log_step)
|
|
51
|
+
return base**log_values
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def compute_1d_histogram(df, column, variables=None, bins=10, labels=None, prefix_name=True, include_quantiles=False):
|
|
55
|
+
"""Compute conditional univariate statistics.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
df : pandas.DataFrame
|
|
60
|
+
Input dataframe
|
|
61
|
+
column : str
|
|
62
|
+
Column name to be binned.
|
|
63
|
+
variables : str or list, optional
|
|
64
|
+
Column names for which conditional statistics will be computed.
|
|
65
|
+
If None, only counts are computed.
|
|
66
|
+
bins : int or array-like
|
|
67
|
+
Number of bins or bin edges.
|
|
68
|
+
labels : array-like, optional
|
|
69
|
+
Labels for the column bins. If None, uses bin centers.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
pandas.DataFrame
|
|
74
|
+
"""
|
|
75
|
+
# Copy data
|
|
76
|
+
df = df.copy()
|
|
77
|
+
|
|
78
|
+
# Ensure `variables` is a list of variables
|
|
79
|
+
# - If no variable specified, create dummy variable
|
|
80
|
+
if variables is None:
|
|
81
|
+
variables = ["dummy"]
|
|
82
|
+
df["dummy"] = np.ones(df[column].shape)
|
|
83
|
+
variables_specified = False
|
|
84
|
+
elif isinstance(variables, str):
|
|
85
|
+
variables = [variables]
|
|
86
|
+
variables_specified = True
|
|
87
|
+
elif isinstance(variables, list):
|
|
88
|
+
variables_specified = True
|
|
89
|
+
else:
|
|
90
|
+
raise TypeError("`variables` must be a string, list of strings, or None.")
|
|
91
|
+
variables = np.unique(variables)
|
|
92
|
+
|
|
93
|
+
# Handle column binning
|
|
94
|
+
if isinstance(bins, int):
|
|
95
|
+
bins = np.linspace(df[column].min(), df[column].max(), bins + 1)
|
|
96
|
+
|
|
97
|
+
# Drop rows where any of the key columns have NaN
|
|
98
|
+
df = df.dropna(subset=[column, *variables])
|
|
99
|
+
|
|
100
|
+
if len(df) == 0:
|
|
101
|
+
raise ValueError("No valid data points after removing NaN values")
|
|
102
|
+
|
|
103
|
+
# Create binned columns with explicit handling of out-of-bounds values
|
|
104
|
+
df[f"{column}_binned"] = pd.cut(df[column], bins=bins, include_lowest=True)
|
|
105
|
+
|
|
106
|
+
# Create complete IntervalIndex for both dimensions
|
|
107
|
+
intervals = df[f"{column}_binned"].cat.categories
|
|
108
|
+
|
|
109
|
+
# Create IntervalIndex with all possible combinations
|
|
110
|
+
full_index = pd.Index(intervals, name=f"{column}_binned")
|
|
111
|
+
|
|
112
|
+
# Define grouping object
|
|
113
|
+
df_grouped = df.groupby([f"{column}_binned"], observed=False)
|
|
114
|
+
|
|
115
|
+
# Compute statistics for specified variables
|
|
116
|
+
variables_stats = []
|
|
117
|
+
for i, var in enumerate(variables):
|
|
118
|
+
# Prepare prefix
|
|
119
|
+
prefix = f"{var}_" if prefix_name and variables_specified else ""
|
|
120
|
+
|
|
121
|
+
# Define statistics to compute
|
|
122
|
+
if variables_specified:
|
|
123
|
+
# Compute quantiles
|
|
124
|
+
quantiles = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]
|
|
125
|
+
df_stats_quantiles = df_grouped[var].quantile(quantiles).unstack(level=-1)
|
|
126
|
+
df_stats_quantiles.columns = [f"{prefix}Q{int(q*100)}" for q in df_stats_quantiles.columns]
|
|
127
|
+
df_stats_quantiles = df_stats_quantiles.rename(
|
|
128
|
+
columns={
|
|
129
|
+
f"{prefix}Q50": f"{prefix}median",
|
|
130
|
+
},
|
|
131
|
+
)
|
|
132
|
+
# Define other stats to compute
|
|
133
|
+
list_stats = [
|
|
134
|
+
(f"{prefix}std", "std"),
|
|
135
|
+
(f"{prefix}min", "min"),
|
|
136
|
+
(f"{prefix}max", "max"),
|
|
137
|
+
(f"{prefix}mad", lambda s: np.median(np.abs(s - np.median(s)))),
|
|
138
|
+
]
|
|
139
|
+
if i == 0:
|
|
140
|
+
list_stats.append(("count", "count"))
|
|
141
|
+
else:
|
|
142
|
+
list_stats = [("count", "count")]
|
|
143
|
+
|
|
144
|
+
# Compute statistics
|
|
145
|
+
df_stats = df_grouped[var].agg(list_stats)
|
|
146
|
+
|
|
147
|
+
# Compute other variable statistics
|
|
148
|
+
if variables_specified:
|
|
149
|
+
df_stats[f"{prefix}range"] = df_stats[f"{prefix}max"] - df_stats[f"{prefix}min"]
|
|
150
|
+
df_stats[f"{prefix}iqr"] = df_stats_quantiles[f"{prefix}Q75"] - df_stats_quantiles[f"{prefix}Q25"]
|
|
151
|
+
df_stats[f"{prefix}ipr80"] = df_stats_quantiles[f"{prefix}Q90"] - df_stats_quantiles[f"{prefix}Q10"]
|
|
152
|
+
df_stats[f"{prefix}ipr90"] = df_stats_quantiles[f"{prefix}Q95"] - df_stats_quantiles[f"{prefix}Q5"]
|
|
153
|
+
df_stats[f"{prefix}ipr98"] = df_stats_quantiles[f"{prefix}Q99"] - df_stats_quantiles[f"{prefix}Q1"]
|
|
154
|
+
if include_quantiles:
|
|
155
|
+
df_stats = pd.concat((df_stats, df_stats_quantiles), axis=1)
|
|
156
|
+
else:
|
|
157
|
+
df_stats[f"{prefix}median"] = df_stats_quantiles[f"{prefix}median"]
|
|
158
|
+
variables_stats.append(df_stats)
|
|
159
|
+
|
|
160
|
+
# Combine all statistics into a single DataFrame
|
|
161
|
+
df_stats = pd.concat(variables_stats, axis=1)
|
|
162
|
+
|
|
163
|
+
# Reindex to include all interval combinations
|
|
164
|
+
df_stats = df_stats.reindex(full_index)
|
|
165
|
+
|
|
166
|
+
# Determine bin centers
|
|
167
|
+
centers = intervals.mid
|
|
168
|
+
|
|
169
|
+
# Use provided labels if available
|
|
170
|
+
coords = labels if labels is not None else centers
|
|
171
|
+
|
|
172
|
+
# Reset index and add coordinates/labels
|
|
173
|
+
df_stats = df_stats.reset_index()
|
|
174
|
+
df_stats[f"{column}"] = pd.Categorical(df_stats[f"{column}_binned"].map(dict(zip(intervals, coords, strict=False))))
|
|
175
|
+
df_stats = df_stats.drop(columns=f"{column}_binned")
|
|
176
|
+
|
|
177
|
+
return df_stats
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def compute_2d_histogram(
|
|
181
|
+
df,
|
|
182
|
+
x,
|
|
183
|
+
y,
|
|
184
|
+
variables=None,
|
|
185
|
+
x_bins=10,
|
|
186
|
+
y_bins=10,
|
|
187
|
+
x_labels=None,
|
|
188
|
+
y_labels=None,
|
|
189
|
+
prefix_name=True,
|
|
190
|
+
include_quantiles=False,
|
|
191
|
+
):
|
|
192
|
+
"""Compute conditional bivariate statistics.
|
|
193
|
+
|
|
194
|
+
Parameters
|
|
195
|
+
----------
|
|
196
|
+
df : pandas.DataFrame
|
|
197
|
+
Input dataframe
|
|
198
|
+
x : str
|
|
199
|
+
Column name for x-axis binning (will be rounded to integers)
|
|
200
|
+
y : str
|
|
201
|
+
Column name for y-axis binning
|
|
202
|
+
variables : str or list, optional
|
|
203
|
+
Column names for which statistics will be computed.
|
|
204
|
+
If None, only counts are computed.
|
|
205
|
+
x_bins : int or array-like
|
|
206
|
+
Number of bins or bin edges for x
|
|
207
|
+
y_bins : int or array-like
|
|
208
|
+
Number of bins or bin edges for y
|
|
209
|
+
x_labels : array-like, optional
|
|
210
|
+
Labels for x bins. If None, uses bin centers
|
|
211
|
+
y_labels : array-like, optional
|
|
212
|
+
Labels for y bins. If None, uses bin centers
|
|
213
|
+
|
|
214
|
+
Returns
|
|
215
|
+
-------
|
|
216
|
+
xarray.Dataset
|
|
217
|
+
Dataset with dimensions corresponding to binned variables and
|
|
218
|
+
data variables for each statistic
|
|
219
|
+
"""
|
|
220
|
+
# # If polars, cast to pandas
|
|
221
|
+
# if isinstance(df, pl.DataFrame):
|
|
222
|
+
# df = df.to_pandas()
|
|
223
|
+
|
|
224
|
+
# Copy data
|
|
225
|
+
df = df.copy()
|
|
226
|
+
|
|
227
|
+
# Ensure `variables` is a list of variables
|
|
228
|
+
# - If no variable specified, create dummy variable
|
|
229
|
+
if variables is None:
|
|
230
|
+
variables = ["dummy"]
|
|
231
|
+
df["dummy"] = np.ones(df[x].shape)
|
|
232
|
+
variables_specified = False
|
|
233
|
+
elif isinstance(variables, str):
|
|
234
|
+
variables = [variables]
|
|
235
|
+
variables_specified = True
|
|
236
|
+
elif isinstance(variables, list):
|
|
237
|
+
variables_specified = True
|
|
238
|
+
else:
|
|
239
|
+
raise TypeError("`variables` must be a string, list of strings, or None.")
|
|
240
|
+
variables = np.unique(variables)
|
|
241
|
+
|
|
242
|
+
# Handle x-axis binning
|
|
243
|
+
if isinstance(x_bins, int):
|
|
244
|
+
x_bins = np.linspace(df[x].min(), df[x].max(), x_bins + 1)
|
|
245
|
+
# Handle y-axis binning
|
|
246
|
+
if isinstance(y_bins, int):
|
|
247
|
+
y_bins = np.linspace(df[y].min(), df[y].max(), y_bins + 1)
|
|
248
|
+
|
|
249
|
+
# Drop rows where any of the key columns have NaN
|
|
250
|
+
df = df.dropna(subset=[x, y, *variables])
|
|
251
|
+
|
|
252
|
+
if len(df) == 0:
|
|
253
|
+
raise ValueError("No valid data points after removing NaN values")
|
|
254
|
+
|
|
255
|
+
# Create binned columns with explicit handling of out-of-bounds values
|
|
256
|
+
df[f"{x}_binned"] = pd.cut(df[x], bins=x_bins, include_lowest=True)
|
|
257
|
+
df[f"{y}_binned"] = pd.cut(df[y], bins=y_bins, include_lowest=True)
|
|
258
|
+
|
|
259
|
+
# Create complete IntervalIndex for both dimensions
|
|
260
|
+
x_intervals = df[f"{x}_binned"].cat.categories
|
|
261
|
+
y_intervals = df[f"{y}_binned"].cat.categories
|
|
262
|
+
|
|
263
|
+
# Create MultiIndex with all possible combinations
|
|
264
|
+
full_index = pd.MultiIndex.from_product([x_intervals, y_intervals], names=[f"{x}_binned", f"{y}_binned"])
|
|
265
|
+
|
|
266
|
+
# Define grouping object
|
|
267
|
+
df_grouped = df.groupby([f"{x}_binned", f"{y}_binned"], observed=False)
|
|
268
|
+
|
|
269
|
+
# Compute statistics for specified variables
|
|
270
|
+
variables_stats = []
|
|
271
|
+
for i, var in enumerate(variables):
|
|
272
|
+
# Prepare prefix
|
|
273
|
+
prefix = f"{var}_" if prefix_name and variables_specified else ""
|
|
274
|
+
|
|
275
|
+
# Define statistics to compute
|
|
276
|
+
if variables_specified:
|
|
277
|
+
# Compute quantiles
|
|
278
|
+
quantiles = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]
|
|
279
|
+
df_stats_quantiles = df_grouped[var].quantile(quantiles).unstack(level=-1)
|
|
280
|
+
df_stats_quantiles.columns = [f"{prefix}Q{int(q*100)}" for q in df_stats_quantiles.columns]
|
|
281
|
+
df_stats_quantiles = df_stats_quantiles.rename(
|
|
282
|
+
columns={
|
|
283
|
+
f"{prefix}Q50": f"{prefix}median",
|
|
284
|
+
},
|
|
285
|
+
)
|
|
286
|
+
# Define other stats to compute
|
|
287
|
+
list_stats = [
|
|
288
|
+
(f"{prefix}std", "std"),
|
|
289
|
+
(f"{prefix}min", "min"),
|
|
290
|
+
(f"{prefix}max", "max"),
|
|
291
|
+
(f"{prefix}mad", lambda s: np.median(np.abs(s - np.median(s)))),
|
|
292
|
+
]
|
|
293
|
+
if i == 0:
|
|
294
|
+
list_stats.append(("count", "count"))
|
|
295
|
+
else:
|
|
296
|
+
list_stats = [("count", "count")]
|
|
297
|
+
|
|
298
|
+
# Compute statistics
|
|
299
|
+
df_stats = df_grouped[var].agg(list_stats)
|
|
300
|
+
|
|
301
|
+
# Compute other variable statistics
|
|
302
|
+
if variables_specified:
|
|
303
|
+
df_stats[f"{prefix}range"] = df_stats[f"{prefix}max"] - df_stats[f"{prefix}min"]
|
|
304
|
+
df_stats[f"{prefix}iqr"] = df_stats_quantiles[f"{prefix}Q75"] - df_stats_quantiles[f"{prefix}Q25"]
|
|
305
|
+
df_stats[f"{prefix}ipr80"] = df_stats_quantiles[f"{prefix}Q90"] - df_stats_quantiles[f"{prefix}Q10"]
|
|
306
|
+
df_stats[f"{prefix}ipr90"] = df_stats_quantiles[f"{prefix}Q95"] - df_stats_quantiles[f"{prefix}Q5"]
|
|
307
|
+
df_stats[f"{prefix}ipr98"] = df_stats_quantiles[f"{prefix}Q99"] - df_stats_quantiles[f"{prefix}Q1"]
|
|
308
|
+
if include_quantiles:
|
|
309
|
+
df_stats = pd.concat((df_stats, df_stats_quantiles), axis=1)
|
|
310
|
+
else:
|
|
311
|
+
df_stats[f"{prefix}median"] = df_stats_quantiles[f"{prefix}median"]
|
|
312
|
+
variables_stats.append(df_stats)
|
|
313
|
+
|
|
314
|
+
# Combine all statistics into a single DataFrame
|
|
315
|
+
df_stats = pd.concat(variables_stats, axis=1)
|
|
316
|
+
|
|
317
|
+
# Reindex to include all interval combinations
|
|
318
|
+
df_stats = df_stats.reindex(full_index)
|
|
319
|
+
|
|
320
|
+
# Determine coordinates
|
|
321
|
+
x_centers = x_intervals.mid
|
|
322
|
+
y_centers = y_intervals.mid
|
|
323
|
+
|
|
324
|
+
# Use provided labels if available
|
|
325
|
+
x_coords = x_labels if x_labels is not None else x_centers
|
|
326
|
+
y_coords = y_labels if y_labels is not None else y_centers
|
|
327
|
+
|
|
328
|
+
# Reset index and set new coordinates
|
|
329
|
+
df_stats = df_stats.reset_index()
|
|
330
|
+
df_stats[f"{x}"] = pd.Categorical(df_stats[f"{x}_binned"].map(dict(zip(x_intervals, x_coords, strict=False))))
|
|
331
|
+
df_stats[f"{y}"] = pd.Categorical(df_stats[f"{y}_binned"].map(dict(zip(y_intervals, y_coords, strict=False))))
|
|
332
|
+
|
|
333
|
+
# Set new MultiIndex with coordinates
|
|
334
|
+
df_stats = df_stats.set_index([f"{x}", f"{y}"])
|
|
335
|
+
df_stats = df_stats.drop(columns=[f"{x}_binned", f"{y}_binned"])
|
|
336
|
+
|
|
337
|
+
# Convert to dataset
|
|
338
|
+
ds = df_stats.to_xarray()
|
|
339
|
+
|
|
340
|
+
# Transpose arrays
|
|
341
|
+
ds = ds.transpose(y, x)
|
|
342
|
+
return ds
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: disdrodb
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: disdrodb provides tools to download, standardize, share and analyze global disdrometer data.
|
|
5
5
|
Author: Gionata Ghiggi
|
|
6
6
|
Project-URL: homepage, https://github.com/ltelab/disdrodb
|
|
@@ -47,7 +47,7 @@ Requires-Dist: twine; extra == "dev"
|
|
|
47
47
|
Requires-Dist: loghub; extra == "dev"
|
|
48
48
|
Dynamic: license-file
|
|
49
49
|
|
|
50
|
-
# 📦 DISDRODB - A package to standardize, process and analyze global disdrometer data
|
|
50
|
+
# 📦 DISDRODB - A package to standardize, process and analyze global disdrometer data
|
|
51
51
|
|
|
52
52
|
| | |
|
|
53
53
|
| ----------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
@@ -60,17 +60,16 @@ Dynamic: license-file
|
|
|
60
60
|
| Linting | [](https://github.com/psf/black) [](https://github.com/astral-sh/ruff) [](https://github.com/codespell-project/codespell) |
|
|
61
61
|
| Code Coverage | [](https://coveralls.io/github/ltelab/disdrodb?branch=main) [](https://codecov.io/gh/ltelab/disdrodb) |
|
|
62
62
|
| Code Quality | [](https://www.codefactor.io/repository/github/ltelab/disdrodb) [](https://codebeat.co/projects/github-com-ltelab-disdrodb-main) [](https://app.codacy.com/gh/ltelab/disdrodb/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [](https://codescene.io/projects/36773) |
|
|
63
|
-
| License | [](https://github.com/ltelab/disdrodb/blob/main/LICENSE)
|
|
64
|
-
| Community | [](https://join.slack.com/t/disdrodbworkspace/shared_invite/zt-25l4mvgo7-cfBdXalzlWGd4Pt7H~FqoA) [](https://github.com/ltelab/disdrodb/discussions)
|
|
65
|
-
| Citation | [](https://zenodo.org/doi/10.5281/zenodo.7680581)
|
|
63
|
+
| License | [](https://github.com/ltelab/disdrodb/blob/main/LICENSE) |
|
|
64
|
+
| Community | [](https://join.slack.com/t/disdrodbworkspace/shared_invite/zt-25l4mvgo7-cfBdXalzlWGd4Pt7H~FqoA) [](https://github.com/ltelab/disdrodb/discussions) |
|
|
65
|
+
| Citation | [](https://zenodo.org/doi/10.5281/zenodo.7680581) |
|
|
66
66
|
|
|
67
67
|
[**Slack**](https://join.slack.com/t/disdrodbworkspace/shared_invite/zt-25l4mvgo7-cfBdXalzlWGd4Pt7H~FqoA) | [**Documentation**](https://disdrodb.readthedocs.io/en/latest/)
|
|
68
68
|
|
|
69
69
|
DISDRODB is part of an international joint effort to index, collect and homogenize drop size distribution (DSD) data from around the world.
|
|
70
70
|
|
|
71
71
|
The DISDRODB project also aims to establish a global standard for sharing disdrometer observations.
|
|
72
|
-
Built on FAIR data principles and Climate & Forecast (CF) conventions, DISDRODB standards
|
|
73
|
-
|
|
72
|
+
Built on FAIR data principles and Climate & Forecast (CF) conventions, DISDRODB standards facilitate the processing, analysis and visualization of disdrometer data.
|
|
74
73
|
|
|
75
74
|
## ℹ️ Software Overview
|
|
76
75
|
|
|
@@ -87,13 +86,13 @@ The software enables you to:
|
|
|
87
86
|
- Compute empirical and model-based drop size distribution parameters and derive geophysical and polarimetric radar variables of interest (DISDRODB L2 product)
|
|
88
87
|
|
|
89
88
|
Currently, the DISDRODB Working Group is finalizing the development of the L1 and L2 scientific products.
|
|
90
|
-
If you have ideas, algorithms, data or expertise to share, or you want to contribute to the future DISDRODB products, do not hesitate to get in touch
|
|
89
|
+
If you have ideas, algorithms, data, or expertise to share, or you want to contribute to the future DISDRODB products, do not hesitate to get in touch!!!
|
|
91
90
|
|
|
92
|
-
Join the [**DISDRODB Slack Workspace**](https://join.slack.com/t/disdrodbworkspace/shared_invite/zt-25l4mvgo7-cfBdXalzlWGd4Pt7H~FqoA) to meet the DISDRODB Community
|
|
91
|
+
Join the [**DISDRODB Slack Workspace**](https://join.slack.com/t/disdrodbworkspace/shared_invite/zt-25l4mvgo7-cfBdXalzlWGd4Pt7H~FqoA) to meet the DISDRODB Community!
|
|
93
92
|
|
|
94
93
|
## 🚀 Quick Start
|
|
95
94
|
|
|
96
|
-
|
|
95
|
+
Create your own DISDRODB Local Data Archive.
|
|
97
96
|
|
|
98
97
|
### 📚 Download the DISDRODB Metadata Archive
|
|
99
98
|
|
|
@@ -102,76 +101,62 @@ The DISDRODB Metadata Archive is a collection of metadata files that describe th
|
|
|
102
101
|
To download the DISDRODB Metadata Archive, navigate to the desired directory and run:
|
|
103
102
|
|
|
104
103
|
```bash
|
|
105
|
-
|
|
106
104
|
git clone https://github.com/ltelab/DISDRODB-METADATA.git
|
|
107
|
-
|
|
108
105
|
```
|
|
109
106
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
Alternatively, you can download a static version of the DISDRODB Metadata Archive using:
|
|
107
|
+
Or download a static snapshot without using git:
|
|
113
108
|
|
|
114
109
|
```bash
|
|
115
|
-
disdrodb_download_metadata_archive /
|
|
110
|
+
disdrodb_download_metadata_archive /path/to/DISDRODB-METADATA
|
|
116
111
|
```
|
|
117
112
|
|
|
118
113
|
### 📚 Define the DISDRODB Configuration File
|
|
119
114
|
|
|
120
|
-
The disdrodb software
|
|
121
|
-
is stored on your local machine, as well as where you want to download the raw stations data
|
|
122
|
-
as well as where to save the DISDRODB products you will generate.
|
|
123
|
-
|
|
124
|
-
Within the `disdrodb` package, the argument `metadata_archive_dir` refers to the base directory of your local DISDRODB Metadata Archive, while `data_archive_dir` refers to the base directory of your local DISDRODB Data Archive.
|
|
115
|
+
The disdrodb software requires to know two directories:
|
|
125
116
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
all DISDRODB products will be saved.
|
|
117
|
+
- `metadata_archive_dir`: the base of your local DISDRODB Metadata Archive
|
|
118
|
+
- `data_archive_dir`: the base of your local DISDRODB Data Archive
|
|
129
119
|
|
|
130
|
-
|
|
131
|
-
Please note that on Windows these paths must end with `\DISDRODB`, while on macOS/Linux they must end with `/DISDRODB`.
|
|
120
|
+
On Windows, paths must end with `\DISDRODB`; on macOS/Linux, they must end with `/DISDRODB`.
|
|
132
121
|
|
|
133
122
|
```python
|
|
134
123
|
import disdrodb
|
|
135
124
|
|
|
136
125
|
metadata_archive_dir = "/<path_to>/DISDRODB-METADATA/DISDRODB"
|
|
137
|
-
data_archive_dir = "/<
|
|
126
|
+
data_archive_dir = "/<path_to>/DISDRODB"
|
|
138
127
|
disdrodb.define_configs(
|
|
139
128
|
metadata_archive_dir=metadata_archive_dir, data_archive_dir=data_archive_dir
|
|
140
129
|
)
|
|
141
130
|
```
|
|
142
131
|
|
|
143
|
-
|
|
144
|
-
that will be used as default configuration file when running the disdrodb software.
|
|
132
|
+
This creates a `.config_disdrodb.yml` file in your home directory (e.g., `~/.config_disdrodb.yml`).
|
|
145
133
|
|
|
146
|
-
|
|
147
|
-
should get the `metadata_archive_dir` and `data_archive_dir` paths you just defined in the DISDRODB Configuration File:
|
|
134
|
+
To verify the configuration, open a new Python session and run:
|
|
148
135
|
|
|
149
136
|
```python
|
|
150
137
|
import disdrodb
|
|
151
138
|
|
|
152
|
-
print("
|
|
153
|
-
print("
|
|
139
|
+
print("Metadata Archive Directory:", disdrodb.get_metadata_archive_dir())
|
|
140
|
+
print("Data Archive Directory:", disdrodb.get_data_archive_dir())
|
|
154
141
|
```
|
|
155
142
|
|
|
156
|
-
|
|
157
|
-
by typing the following command in the terminal:
|
|
143
|
+
Or in the shell:
|
|
158
144
|
|
|
159
145
|
```bash
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
disdrodb_metadata_archive_directory
|
|
163
|
-
|
|
146
|
+
disdrodb_metadata_archive_directory
|
|
147
|
+
disdrodb_data_archive_directory
|
|
164
148
|
```
|
|
165
149
|
|
|
166
150
|
### 📥 Download the DISDRODB Raw Data Archive
|
|
167
151
|
|
|
168
|
-
To download all data stored into the DISDRODB Decentralized Data Archive,
|
|
152
|
+
To download all data stored into the DISDRODB Decentralized Data Archive,
|
|
153
|
+
you just have to run the following command:
|
|
169
154
|
|
|
170
155
|
```bash
|
|
171
156
|
disdrodb_download_archive
|
|
172
157
|
```
|
|
173
158
|
|
|
174
|
-
|
|
159
|
+
To download from a specific source (e.g., EPFL):
|
|
175
160
|
|
|
176
161
|
```bash
|
|
177
162
|
disdrodb_download_archive --data-sources EPFL
|
|
@@ -185,7 +170,7 @@ To open the local DISDRODB Data Archive directory, type:
|
|
|
185
170
|
disdrodb_open_data_archive
|
|
186
171
|
```
|
|
187
172
|
|
|
188
|
-
### 💫 Transform
|
|
173
|
+
### 💫 Transform Raw Data to Standardized netCDFs
|
|
189
174
|
|
|
190
175
|
If you want to convert all stations raw data into standardized netCDF4 files, run the following command in the terminal:
|
|
191
176
|
|
|
@@ -195,40 +180,28 @@ disdrodb_run_l0
|
|
|
195
180
|
|
|
196
181
|
Type `disdrodb_run_l0 --help` to see further options.
|
|
197
182
|
|
|
198
|
-
### 💫 Generate
|
|
183
|
+
### 💫 Generate DISDRODB L1 and p2 products
|
|
199
184
|
|
|
200
|
-
To generate
|
|
185
|
+
To generate DISDRODB L1 and L2 products, run the following commands in the terminal:
|
|
201
186
|
|
|
202
187
|
```bash
|
|
203
188
|
disdrodb_run_l1
|
|
204
|
-
```
|
|
205
|
-
```bash
|
|
206
189
|
disdrodb_run_l2e
|
|
207
|
-
```
|
|
208
|
-
```bash
|
|
209
190
|
disdrodb_run_l2m
|
|
210
191
|
```
|
|
211
192
|
|
|
212
|
-
### 💫
|
|
193
|
+
### 💫 Analyze Analysis‐Ready Products
|
|
213
194
|
|
|
214
|
-
The
|
|
215
|
-
a DISDRODB product into a `xarray.Dataset` (or `pandas.DataFrame` for the DISDRODB L0A product).
|
|
195
|
+
The software’s `open_dataset` function **lazily** opens all station files of a given product:
|
|
216
196
|
|
|
217
197
|
```python
|
|
218
198
|
import disdrodb
|
|
219
199
|
|
|
220
|
-
# Define station arguments
|
|
221
|
-
data_source = "EPFL"
|
|
222
|
-
campaign_name = "HYMEX_LTE_SOP3"
|
|
223
|
-
station_name = "10"
|
|
224
|
-
|
|
225
|
-
# Open all station files of a specific product
|
|
226
200
|
ds = disdrodb.open_dataset(
|
|
227
201
|
product="L0C",
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
station_name=station_name,
|
|
202
|
+
data_source="EPFL",
|
|
203
|
+
campaign_name="HYMEX_LTE_SOP3",
|
|
204
|
+
station_name="10",
|
|
232
205
|
)
|
|
233
206
|
ds
|
|
234
207
|
```
|