disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +68 -34
- disdrodb/_config.py +5 -4
- disdrodb/_version.py +16 -3
- disdrodb/accessor/__init__.py +20 -0
- disdrodb/accessor/methods.py +125 -0
- disdrodb/api/checks.py +177 -24
- disdrodb/api/configs.py +3 -3
- disdrodb/api/info.py +13 -13
- disdrodb/api/io.py +281 -22
- disdrodb/api/path.py +184 -195
- disdrodb/api/search.py +18 -9
- disdrodb/cli/disdrodb_create_summary.py +103 -0
- disdrodb/cli/disdrodb_create_summary_station.py +91 -0
- disdrodb/cli/disdrodb_run_l0.py +1 -1
- disdrodb/cli/disdrodb_run_l0_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
- disdrodb/cli/disdrodb_run_l0b.py +1 -1
- disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
- disdrodb/cli/disdrodb_run_l0c.py +1 -1
- disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
- disdrodb/cli/disdrodb_run_l1_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
- disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
- disdrodb/configs.py +149 -4
- disdrodb/constants.py +61 -0
- disdrodb/data_transfer/download_data.py +127 -11
- disdrodb/etc/configs/attributes.yaml +339 -0
- disdrodb/etc/configs/encodings.yaml +473 -0
- disdrodb/etc/products/L1/global.yaml +13 -0
- disdrodb/etc/products/L2E/10MIN.yaml +12 -0
- disdrodb/etc/products/L2E/1MIN.yaml +1 -0
- disdrodb/etc/products/L2E/global.yaml +22 -0
- disdrodb/etc/products/L2M/10MIN.yaml +12 -0
- disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
- disdrodb/etc/products/L2M/global.yaml +26 -0
- disdrodb/issue/writer.py +2 -0
- disdrodb/l0/__init__.py +13 -0
- disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
- disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
- disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
- disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
- disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
- disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
- disdrodb/l0/l0a_processing.py +37 -32
- disdrodb/l0/l0b_nc_processing.py +118 -8
- disdrodb/l0/l0b_processing.py +30 -65
- disdrodb/l0/l0c_processing.py +369 -259
- disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
- disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
- disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
- disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
- disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
- disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
- disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
- disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
- disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
- disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
- disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
- disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
- disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
- disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
- disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
- disdrodb/l1/__init__.py +5 -0
- disdrodb/l1/fall_velocity.py +46 -0
- disdrodb/l1/filters.py +34 -20
- disdrodb/l1/processing.py +46 -45
- disdrodb/l1/resampling.py +77 -66
- disdrodb/l1_env/routines.py +18 -3
- disdrodb/l2/__init__.py +7 -0
- disdrodb/l2/empirical_dsd.py +58 -10
- disdrodb/l2/processing.py +268 -117
- disdrodb/metadata/checks.py +132 -125
- disdrodb/metadata/standards.py +3 -1
- disdrodb/psd/fitting.py +631 -345
- disdrodb/psd/models.py +9 -6
- disdrodb/routines/__init__.py +54 -0
- disdrodb/{l0/routines.py → routines/l0.py} +316 -355
- disdrodb/{l1/routines.py → routines/l1.py} +76 -116
- disdrodb/routines/l2.py +1019 -0
- disdrodb/{routines.py → routines/wrappers.py} +98 -10
- disdrodb/scattering/__init__.py +16 -4
- disdrodb/scattering/axis_ratio.py +61 -37
- disdrodb/scattering/permittivity.py +504 -0
- disdrodb/scattering/routines.py +746 -184
- disdrodb/summary/__init__.py +17 -0
- disdrodb/summary/routines.py +4196 -0
- disdrodb/utils/archiving.py +434 -0
- disdrodb/utils/attrs.py +68 -125
- disdrodb/utils/cli.py +5 -5
- disdrodb/utils/compression.py +30 -1
- disdrodb/utils/dask.py +121 -9
- disdrodb/utils/dataframe.py +61 -7
- disdrodb/utils/decorators.py +31 -0
- disdrodb/utils/directories.py +35 -15
- disdrodb/utils/encoding.py +37 -19
- disdrodb/{l2 → utils}/event.py +15 -173
- disdrodb/utils/logger.py +14 -7
- disdrodb/utils/manipulations.py +81 -0
- disdrodb/utils/routines.py +166 -0
- disdrodb/utils/subsetting.py +214 -0
- disdrodb/utils/time.py +35 -177
- disdrodb/utils/writer.py +20 -7
- disdrodb/utils/xarray.py +5 -4
- disdrodb/viz/__init__.py +13 -0
- disdrodb/viz/plots.py +398 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
- disdrodb/l1/encoding_attrs.py +0 -642
- disdrodb/l2/processing_options.py +0 -213
- disdrodb/l2/routines.py +0 -868
- /disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0
disdrodb/viz/plots.py
CHANGED
|
@@ -15,3 +15,401 @@
|
|
|
15
15
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""DISDRODB Plotting Tools."""
|
|
18
|
+
import matplotlib.pyplot as plt
|
|
19
|
+
import numpy as np
|
|
20
|
+
import psutil
|
|
21
|
+
import xarray as xr
|
|
22
|
+
from matplotlib.colors import LogNorm, Normalize
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def plot_nd(ds, var="drop_number_concentration", cmap=None, norm=None):
|
|
26
|
+
"""Plot drop number concentration N(D) timeseries."""
|
|
27
|
+
# Check inputs
|
|
28
|
+
if var not in ds:
|
|
29
|
+
raise ValueError(f"{var} is not a xarray Dataset variable!")
|
|
30
|
+
# Check only time and diameter dimensions are specified
|
|
31
|
+
# TODO: DIAMETER_DIMENSION, "time"
|
|
32
|
+
|
|
33
|
+
# Select N(D)
|
|
34
|
+
ds_var = ds[[var]].compute()
|
|
35
|
+
|
|
36
|
+
# Regularize input
|
|
37
|
+
ds_var = ds_var.disdrodb.regularize()
|
|
38
|
+
|
|
39
|
+
# Set 0 values to np.nan
|
|
40
|
+
ds_var = ds_var.where(ds_var[var] > 0)
|
|
41
|
+
|
|
42
|
+
# Define cmap an norm
|
|
43
|
+
if cmap is None:
|
|
44
|
+
cmap = plt.get_cmap("Spectral_r").copy()
|
|
45
|
+
|
|
46
|
+
vmin = ds_var[var].min().item()
|
|
47
|
+
norm = LogNorm(vmin, None) if norm is None else norm
|
|
48
|
+
|
|
49
|
+
# Plot N(D)
|
|
50
|
+
p = ds_var[var].plot.pcolormesh(x="time", norm=norm, cmap=cmap)
|
|
51
|
+
p.axes.set_title("Drop number concentration (N(D))")
|
|
52
|
+
p.axes.set_ylabel("Drop diameter (mm)")
|
|
53
|
+
return p
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def normalize_array(arr, method="max"):
|
|
57
|
+
"""Normalize a NumPy array according to the chosen method.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
arr : np.ndarray
|
|
62
|
+
Input array.
|
|
63
|
+
method : str
|
|
64
|
+
Normalization method. Options:
|
|
65
|
+
- 'max' : Divide by the maximum value.
|
|
66
|
+
- 'minmax': Scale to [0, 1] range.
|
|
67
|
+
- 'zscore': Standardize to mean 0, std 1.
|
|
68
|
+
- 'log' : Apply log10 transform (shifted if min <= 0).
|
|
69
|
+
- 'none' : No normalization (return original array).
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
np.ndarray
|
|
74
|
+
Normalized array.
|
|
75
|
+
"""
|
|
76
|
+
arr = np.asarray(arr, dtype=float)
|
|
77
|
+
|
|
78
|
+
if method == "max":
|
|
79
|
+
max_val = np.nanmax(arr)
|
|
80
|
+
return arr / max_val if max_val != 0 else arr
|
|
81
|
+
|
|
82
|
+
if method == "minmax":
|
|
83
|
+
min_val = np.nanmin(arr)
|
|
84
|
+
max_val = np.nanmax(arr)
|
|
85
|
+
return (arr - min_val) / (max_val - min_val) if max_val != min_val else np.zeros_like(arr)
|
|
86
|
+
|
|
87
|
+
if method == "zscore":
|
|
88
|
+
mean_val = np.nanmean(arr)
|
|
89
|
+
std_val = np.nanstd(arr)
|
|
90
|
+
return (arr - mean_val) / std_val if std_val != 0 else np.zeros_like(arr)
|
|
91
|
+
|
|
92
|
+
if method == "log":
|
|
93
|
+
min_val = np.nanmin(arr)
|
|
94
|
+
shifted = arr - min_val + 1e-12 # Shift to avoid log(0) or log of negative
|
|
95
|
+
return np.log10(shifted)
|
|
96
|
+
|
|
97
|
+
if method == "none":
|
|
98
|
+
return arr
|
|
99
|
+
|
|
100
|
+
raise ValueError(f"Unknown normalization method: {method}")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _np_to_rgba_alpha(arr, cmap="viridis", cmap_norm=None, scaling="linear"):
|
|
104
|
+
"""Convert a numpy array to an RGBA array with alpha based on array value.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
arr : numpy.ndarray
|
|
109
|
+
arr of counts or frequencies.
|
|
110
|
+
cmap : str or Colormap, optional
|
|
111
|
+
Matplotlib colormap to use for RGB channels.
|
|
112
|
+
cmap_norm: matplotlib.colors.Norm
|
|
113
|
+
Norm to be used to scale data before assigning cmap colors.
|
|
114
|
+
The default is Normalize(vmin, vmax).
|
|
115
|
+
scaling : str, optional
|
|
116
|
+
Scaling type for alpha mapping:
|
|
117
|
+
- "linear" : min-max normalization
|
|
118
|
+
- "log" : logarithmic normalization (positive values only)
|
|
119
|
+
- "sqrt" : square-root (power-law with exponent=0.5)
|
|
120
|
+
- "exp" : exponential scaling
|
|
121
|
+
- "quantile" : percentile-based scaling
|
|
122
|
+
- "none" : full opacity (alpha=1)
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
rgba : 3D numpy array (ny, nx, 4)
|
|
127
|
+
RGBA array.
|
|
128
|
+
"""
|
|
129
|
+
# Ensure numpy array
|
|
130
|
+
arr = np.asarray(arr, dtype=float)
|
|
131
|
+
# Define mask with NaN pixel
|
|
132
|
+
mask_na = np.isnan(arr)
|
|
133
|
+
# Retrieve array shape
|
|
134
|
+
ny, nx = arr.shape
|
|
135
|
+
|
|
136
|
+
# Define colormap norm
|
|
137
|
+
if cmap_norm is None:
|
|
138
|
+
cmap_norm = Normalize(vmin=np.nanmin(arr), vmax=np.nanmax(arr))
|
|
139
|
+
|
|
140
|
+
# Define alpha
|
|
141
|
+
if scaling == "linear":
|
|
142
|
+
norm = Normalize(vmin=np.nanmin(arr), vmax=np.nanmax(arr))
|
|
143
|
+
alpha = norm(arr)
|
|
144
|
+
elif scaling == "log":
|
|
145
|
+
vals = np.where(arr > 0, arr, np.nan) # mask non-positive
|
|
146
|
+
norm = LogNorm(vmin=np.nanmin(vals), vmax=np.nanmax(vals))
|
|
147
|
+
alpha = norm(arr)
|
|
148
|
+
alpha = np.nan_to_num(alpha, nan=0.0)
|
|
149
|
+
elif scaling == "sqrt":
|
|
150
|
+
alpha = np.sqrt(np.clip(arr, 0, None) / np.nanmax(arr))
|
|
151
|
+
elif scaling == "exp":
|
|
152
|
+
normed = np.clip(arr / np.nanmax(arr), 0, 1)
|
|
153
|
+
alpha = np.expm1(normed) / np.expm1(1)
|
|
154
|
+
elif scaling == "quantile":
|
|
155
|
+
flat = arr.ravel()
|
|
156
|
+
ranks = np.argsort(np.argsort(flat)) # rankdata without scipy
|
|
157
|
+
alpha = ranks / (len(flat) - 1)
|
|
158
|
+
alpha = alpha.reshape(arr.shape)
|
|
159
|
+
elif scaling == "none":
|
|
160
|
+
alpha = np.ones_like(arr, dtype=float)
|
|
161
|
+
else:
|
|
162
|
+
raise ValueError(f"Unknown scaling type: {scaling}")
|
|
163
|
+
|
|
164
|
+
# Map values to colors
|
|
165
|
+
cmap = plt.get_cmap(cmap).copy()
|
|
166
|
+
rgba = cmap(cmap_norm(arr))
|
|
167
|
+
|
|
168
|
+
# Set alpha channel
|
|
169
|
+
alpha[mask_na] = 0 # where input was NaN
|
|
170
|
+
rgba[..., -1] = np.clip(alpha, 0, 1)
|
|
171
|
+
return rgba
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def to_rgba(obj, cmap="viridis", norm=None, scaling="none"):
|
|
175
|
+
"""Map a xarray DataArray (or numpy array) to RGBA with optional alpha-scaling."""
|
|
176
|
+
input_is_xarray = False
|
|
177
|
+
if isinstance(obj, xr.DataArray):
|
|
178
|
+
# Define template for RGBA DataArray
|
|
179
|
+
da_rgba = obj.copy()
|
|
180
|
+
da_rgba = da_rgba.expand_dims({"rgba": 4}).transpose(..., "rgba")
|
|
181
|
+
input_is_xarray = True
|
|
182
|
+
|
|
183
|
+
# Extract numpy array
|
|
184
|
+
obj = obj.to_numpy()
|
|
185
|
+
|
|
186
|
+
# Apply transparency
|
|
187
|
+
arr = _np_to_rgba_alpha(obj, cmap=cmap, cmap_norm=norm, scaling=scaling)
|
|
188
|
+
|
|
189
|
+
# Return xarray.DataArray
|
|
190
|
+
if input_is_xarray:
|
|
191
|
+
da_rgba.data = arr
|
|
192
|
+
return da_rgba
|
|
193
|
+
# Or numpy array otherwise
|
|
194
|
+
return arr
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def max_blend_images(ds_rgb, dim):
|
|
198
|
+
"""Max blend a RGBA DataArray across a samples dimensions."""
|
|
199
|
+
# Ensure dimension to blend in first position
|
|
200
|
+
ds_rgb = ds_rgb.transpose(dim, ...)
|
|
201
|
+
# Extract numpy array
|
|
202
|
+
stack = ds_rgb.data
|
|
203
|
+
# Extract alpha array
|
|
204
|
+
alphas = stack[..., 3]
|
|
205
|
+
# Select the winning RGBA per pixel # (N, H, W)
|
|
206
|
+
idx = np.argmax(alphas, axis=0) # (H, W), index of image with max alpha
|
|
207
|
+
idx4 = np.repeat(idx[np.newaxis, ..., np.newaxis], 4, axis=-1) # (1, H, W, 4)
|
|
208
|
+
out = np.take_along_axis(stack, idx4, axis=0)[0] # (H, W, 4)
|
|
209
|
+
# Create output RGBA array
|
|
210
|
+
da = ds_rgb.isel({dim: 0}).copy()
|
|
211
|
+
da.data = out
|
|
212
|
+
return da
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _create_denseline_grid(indices, ny, nx, nsamples):
|
|
216
|
+
# Assign 1 when line pass in a bin
|
|
217
|
+
valid = (indices >= 0) & (indices < ny)
|
|
218
|
+
s_idx, x_idx = np.nonzero(valid)
|
|
219
|
+
y_idx = indices[valid]
|
|
220
|
+
|
|
221
|
+
# ----------------------------------------------
|
|
222
|
+
### Vectorized code with high memory footprint because of 3D array
|
|
223
|
+
|
|
224
|
+
# # Create 3D array with hits
|
|
225
|
+
# grid_3d = np.zeros((nsamples, ny, nx), dtype=np.int64)
|
|
226
|
+
# grid_3d[s_idx, y_idx, x_idx] = 1
|
|
227
|
+
|
|
228
|
+
# # Normalize by columns
|
|
229
|
+
# col_sums = grid_3d.sum(axis=1, keepdims=True)
|
|
230
|
+
# col_sums[col_sums == 0] = 1 # Avoid division by zero
|
|
231
|
+
# grid_3d = grid_3d / col_sums
|
|
232
|
+
|
|
233
|
+
# # Sum over samples
|
|
234
|
+
# grid = grid_3d.sum(axis=0)
|
|
235
|
+
|
|
236
|
+
# # Free memory
|
|
237
|
+
# del grid_3d
|
|
238
|
+
|
|
239
|
+
# ----------------------------------------------
|
|
240
|
+
## Vectorized alternative with much lower memory footprint
|
|
241
|
+
|
|
242
|
+
# Count hits per (sample, y, x)
|
|
243
|
+
grid = np.zeros((ny, nx), dtype=np.float64)
|
|
244
|
+
|
|
245
|
+
# Compute per-sample-per-column counts
|
|
246
|
+
col_counts = np.zeros((nsamples, nx), dtype=np.int64)
|
|
247
|
+
np.add.at(col_counts, (s_idx, x_idx), 1)
|
|
248
|
+
|
|
249
|
+
# Define weights to normalize contributions, avoiding division by zero
|
|
250
|
+
# - Weight = 1 / (# hits per column, per sample)
|
|
251
|
+
col_counts[col_counts == 0] = 1
|
|
252
|
+
weights = 1.0 / col_counts[s_idx, x_idx]
|
|
253
|
+
|
|
254
|
+
# Accumulate weighted contributions
|
|
255
|
+
np.add.at(grid, (y_idx, x_idx), weights)
|
|
256
|
+
|
|
257
|
+
# Return 2D grid
|
|
258
|
+
return grid
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _compute_block_size(ny, nx, dtype=np.float64, safety_margin=2e9):
|
|
262
|
+
"""Compute maximum block size given available memory."""
|
|
263
|
+
avail_mem = psutil.virtual_memory().available - safety_margin
|
|
264
|
+
|
|
265
|
+
# Constant cost for final grid
|
|
266
|
+
base = ny * nx * np.dtype(dtype).itemsize
|
|
267
|
+
|
|
268
|
+
# Per-sample cost (worst case, includes col_counts + indices + weights)
|
|
269
|
+
per_sample = nx * 40
|
|
270
|
+
|
|
271
|
+
max_block = (avail_mem - base) // per_sample
|
|
272
|
+
return max(1, int(max_block))
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def compute_dense_lines(
|
|
276
|
+
da: xr.DataArray,
|
|
277
|
+
coord: str,
|
|
278
|
+
x_bins: list,
|
|
279
|
+
y_bins: list,
|
|
280
|
+
normalization="max",
|
|
281
|
+
):
|
|
282
|
+
"""
|
|
283
|
+
Compute a 2D density-of-lines histogram from an xarray.DataArray.
|
|
284
|
+
|
|
285
|
+
Parameters
|
|
286
|
+
----------
|
|
287
|
+
da : xarray.DataArray
|
|
288
|
+
Input data array. One of its dimensions (named by ``coord``) is taken
|
|
289
|
+
as the horizontal coordinate. All other dimensions are collapsed into
|
|
290
|
+
“series,” so that each combination of the remaining dimension values
|
|
291
|
+
produces one 1D line along ``coord``.
|
|
292
|
+
coord : str
|
|
293
|
+
The name of the coordinate/dimension of the DataArray to bin over.
|
|
294
|
+
``da.coords[coord]`` must be a 1D numeric array (monotonic is recommended).
|
|
295
|
+
x_bins : array_like of shape (nx+1,)
|
|
296
|
+
Bin edges to bin the coordinate/dimension.
|
|
297
|
+
Must be monotonically increasing.
|
|
298
|
+
The number of x-bins will be ``nx = len(x_bins) - 1``.
|
|
299
|
+
y_bins : array_like of shape (ny+1,)
|
|
300
|
+
Bin edges for the DataArray values.
|
|
301
|
+
Must be monotonically increasing.
|
|
302
|
+
The number of y-bins will be ``ny = len(y_bins) - 1``.
|
|
303
|
+
normalization : bool, optional
|
|
304
|
+
If 'none', returns the raw histogram.
|
|
305
|
+
By default, the function normalize the histogram by its global maximum ('max').
|
|
306
|
+
Log-normalization ('log') is also available.
|
|
307
|
+
|
|
308
|
+
Returns
|
|
309
|
+
-------
|
|
310
|
+
xr.DataArray
|
|
311
|
+
2D histogram of shape ``(ny, nx)``. Dimensions are ``('y', 'x')``, where:
|
|
312
|
+
|
|
313
|
+
- ``x``: the bin-center coordinate of ``x_bins`` (length ``nx``)
|
|
314
|
+
- ``y``: the bin-center coordinate of ``y_bins`` (length ``ny``)
|
|
315
|
+
|
|
316
|
+
Each element ``out.values[y_i, x_j]`` is the count (or normalized count) of how
|
|
317
|
+
many “series-values” from ``da`` fell into the rectangular bin
|
|
318
|
+
``x_bins[j] ≤ x_value < x_bins[j+1]`` and
|
|
319
|
+
``y_bins[i] ≤ data_value < y_bins[i+1]``.
|
|
320
|
+
|
|
321
|
+
References
|
|
322
|
+
----------
|
|
323
|
+
Moritz, D., Fisher, D. (2018).
|
|
324
|
+
Visualizing a Million Time Series with the Density Line Chart
|
|
325
|
+
https://doi.org/10.48550/arXiv.1808.06019
|
|
326
|
+
"""
|
|
327
|
+
# Check DataArray name
|
|
328
|
+
if da.name is None or da.name == "":
|
|
329
|
+
raise ValueError("The DataArray must have a name.")
|
|
330
|
+
|
|
331
|
+
# Validate x_bins and y_bins
|
|
332
|
+
x_bins = np.asarray(x_bins)
|
|
333
|
+
y_bins = np.asarray(y_bins)
|
|
334
|
+
if x_bins.ndim != 1 or x_bins.size < 2:
|
|
335
|
+
raise ValueError("`x_bins` must be a 1D array with at least two edges.")
|
|
336
|
+
if y_bins.ndim != 1 or y_bins.size < 2:
|
|
337
|
+
raise ValueError("`y_bins` must be a 1D array with at least two edges.")
|
|
338
|
+
if not np.all(np.diff(x_bins) > 0):
|
|
339
|
+
raise ValueError("`x_bins` must be strictly increasing.")
|
|
340
|
+
if not np.all(np.diff(y_bins) > 0):
|
|
341
|
+
raise ValueError("`y_bins` must be strictly increasing.")
|
|
342
|
+
|
|
343
|
+
# Verify that `coord` exists as either a dimension or a coordinate
|
|
344
|
+
if coord not in (list(da.coords) + list(da.dims)):
|
|
345
|
+
raise ValueError(f"'{coord}' is not a dimension or coordinate of the DataArray.")
|
|
346
|
+
if coord not in da.dims:
|
|
347
|
+
if da[coord].ndim != 1:
|
|
348
|
+
raise ValueError(f"Coordinate '{coord}' must be 1D. Instead has dimensions {da[coord].dims}")
|
|
349
|
+
x_dim = da[coord].dims[0]
|
|
350
|
+
else:
|
|
351
|
+
x_dim = coord
|
|
352
|
+
|
|
353
|
+
# Extract the coordinate array
|
|
354
|
+
x_values = (x_bins[0:-1] + x_bins[1:]) / 2
|
|
355
|
+
|
|
356
|
+
# Extract the array (samples, x)
|
|
357
|
+
other_dims = [d for d in da.dims if d != x_dim]
|
|
358
|
+
if len(other_dims) == 1:
|
|
359
|
+
arr = da.transpose(*other_dims, x_dim).to_numpy()
|
|
360
|
+
else:
|
|
361
|
+
arr = da.stack({"sample": other_dims}).transpose("sample", x_dim).to_numpy()
|
|
362
|
+
|
|
363
|
+
# Define y bins center
|
|
364
|
+
y_center = (y_bins[0:-1] + y_bins[1:]) / 2
|
|
365
|
+
|
|
366
|
+
# Prepare the 2D count grid of shape (ny, nx)
|
|
367
|
+
# - ny correspond tot he value of the timeseries at nx points
|
|
368
|
+
nx = len(x_bins) - 1
|
|
369
|
+
ny = len(y_bins) - 1
|
|
370
|
+
nsamples = arr.shape[0]
|
|
371
|
+
|
|
372
|
+
# For each (series, x-index), find which y-bin it falls into:
|
|
373
|
+
# - np.searchsorted(y_bins, value) gives the insertion index in y_bins;
|
|
374
|
+
# --> subtracting 1 yields the bin index.
|
|
375
|
+
# If a value is not in y_bins, searchsorted returns 0, so idx = -1
|
|
376
|
+
# If a valueis NaN, the indices value will be ny
|
|
377
|
+
indices = np.searchsorted(y_bins, arr) - 1 # (samples, nx)
|
|
378
|
+
|
|
379
|
+
# Compute unormalized DenseLines grid
|
|
380
|
+
# grid = _create_denseline_grid(
|
|
381
|
+
# indices=indices,
|
|
382
|
+
# ny=ny,
|
|
383
|
+
# nx=nx,
|
|
384
|
+
# nsamples=nsamples
|
|
385
|
+
# )
|
|
386
|
+
|
|
387
|
+
# Compute unormalized DenseLines grid by blocks to avoid running out of memory
|
|
388
|
+
# - Define block size based on available RAM memory
|
|
389
|
+
block = _compute_block_size(ny=ny, nx=nx, dtype=np.float64, safety_margin=4e9)
|
|
390
|
+
list_grid = []
|
|
391
|
+
for i in range(0, nsamples, block):
|
|
392
|
+
block_start_idx = i
|
|
393
|
+
block_end_idx = min(i + block, nsamples)
|
|
394
|
+
block_indices = indices[block_start_idx:block_end_idx, :]
|
|
395
|
+
block_nsamples = block_end_idx - block_start_idx
|
|
396
|
+
block_grid = _create_denseline_grid(indices=block_indices, ny=ny, nx=nx, nsamples=block_nsamples)
|
|
397
|
+
list_grid.append(block_grid)
|
|
398
|
+
|
|
399
|
+
grid_3d = np.stack(list_grid, axis=0)
|
|
400
|
+
|
|
401
|
+
# Finalize sum over samples
|
|
402
|
+
grid = grid_3d.sum(axis=0)
|
|
403
|
+
|
|
404
|
+
# Normalize grid
|
|
405
|
+
grid = normalize_array(grid, method=normalization)
|
|
406
|
+
|
|
407
|
+
# Create DataArray
|
|
408
|
+
name = da.name
|
|
409
|
+
out = xr.DataArray(grid, dims=[name, coord], coords={coord: (coord, x_values), name: (name, y_center)})
|
|
410
|
+
|
|
411
|
+
# Mask values which are 0 with NaN
|
|
412
|
+
out = out.where(out > 0)
|
|
413
|
+
|
|
414
|
+
# Return 2D histogram
|
|
415
|
+
return out
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: disdrodb
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: disdrodb provides tools to download, standardize, share and analyze global disdrometer data.
|
|
5
5
|
Author: Gionata Ghiggi
|
|
6
6
|
Project-URL: homepage, https://github.com/ltelab/disdrodb
|
|
@@ -33,6 +33,7 @@ Requires-Dist: numpy
|
|
|
33
33
|
Requires-Dist: scipy
|
|
34
34
|
Requires-Dist: dask[distributed]
|
|
35
35
|
Requires-Dist: xarray
|
|
36
|
+
Requires-Dist: matplotlib
|
|
36
37
|
Provides-Extra: dev
|
|
37
38
|
Requires-Dist: jupyter; extra == "dev"
|
|
38
39
|
Requires-Dist: pre-commit; extra == "dev"
|
|
@@ -190,9 +191,9 @@ disdrodb_run_l2e
|
|
|
190
191
|
disdrodb_run_l2m
|
|
191
192
|
```
|
|
192
193
|
|
|
193
|
-
### 💫 Analyze Analysis
|
|
194
|
+
### 💫 Analyze Analysis-Ready Products
|
|
194
195
|
|
|
195
|
-
The software
|
|
196
|
+
The software's `open_dataset` function **lazily** opens all station files of a given product:
|
|
196
197
|
|
|
197
198
|
```python
|
|
198
199
|
import disdrodb
|