disdrodb 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -0
  4. disdrodb/api/checks.py +8 -7
  5. disdrodb/api/io.py +81 -29
  6. disdrodb/api/path.py +17 -14
  7. disdrodb/api/search.py +15 -18
  8. disdrodb/cli/disdrodb_open_products_options.py +38 -0
  9. disdrodb/cli/disdrodb_run.py +2 -2
  10. disdrodb/cli/disdrodb_run_station.py +4 -4
  11. disdrodb/configs.py +1 -1
  12. disdrodb/data_transfer/download_data.py +70 -1
  13. disdrodb/etc/configs/attributes.yaml +62 -8
  14. disdrodb/etc/configs/encodings.yaml +28 -0
  15. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_SSE.yaml +8 -0
  16. disdrodb/etc/products/L2M/MODELS/GAMMA_ML.yaml +1 -1
  17. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_SSE.yaml +8 -0
  18. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_SSE.yaml +8 -0
  19. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +1 -1
  20. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_SSE.yaml +8 -0
  21. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_SSE.yaml +8 -0
  22. disdrodb/etc/products/L2M/global.yaml +4 -4
  23. disdrodb/fall_velocity/graupel.py +8 -8
  24. disdrodb/fall_velocity/hail.py +2 -2
  25. disdrodb/fall_velocity/rain.py +33 -5
  26. disdrodb/issue/checks.py +1 -1
  27. disdrodb/l0/l0_reader.py +1 -1
  28. disdrodb/l0/l0a_processing.py +2 -2
  29. disdrodb/l0/l0b_nc_processing.py +5 -5
  30. disdrodb/l0/l0b_processing.py +20 -24
  31. disdrodb/l0/l0c_processing.py +18 -13
  32. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +4 -0
  33. disdrodb/l0/readers/PARSIVEL2/VIETNAM/IGE_PARSIVEL2.py +239 -0
  34. disdrodb/l0/template_tools.py +13 -13
  35. disdrodb/l1/classification.py +10 -6
  36. disdrodb/l2/empirical_dsd.py +25 -15
  37. disdrodb/l2/processing.py +32 -14
  38. disdrodb/metadata/download.py +1 -1
  39. disdrodb/metadata/geolocation.py +4 -4
  40. disdrodb/metadata/reader.py +3 -3
  41. disdrodb/metadata/search.py +10 -8
  42. disdrodb/psd/__init__.py +4 -0
  43. disdrodb/psd/fitting.py +2660 -592
  44. disdrodb/psd/gof_metrics.py +389 -0
  45. disdrodb/psd/grid_search.py +1066 -0
  46. disdrodb/psd/models.py +1281 -145
  47. disdrodb/routines/l2.py +6 -6
  48. disdrodb/routines/options_validation.py +8 -8
  49. disdrodb/scattering/axis_ratio.py +70 -2
  50. disdrodb/scattering/permittivity.py +13 -10
  51. disdrodb/scattering/routines.py +10 -10
  52. disdrodb/summary/routines.py +23 -20
  53. disdrodb/utils/archiving.py +29 -22
  54. disdrodb/utils/attrs.py +6 -4
  55. disdrodb/utils/dataframe.py +4 -4
  56. disdrodb/utils/encoding.py +3 -1
  57. disdrodb/utils/event.py +9 -9
  58. disdrodb/utils/logger.py +4 -7
  59. disdrodb/utils/manipulations.py +2 -2
  60. disdrodb/utils/subsetting.py +1 -1
  61. disdrodb/utils/time.py +8 -7
  62. disdrodb/viz/plots.py +25 -17
  63. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/METADATA +44 -33
  64. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/RECORD +68 -66
  65. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/entry_points.txt +1 -0
  66. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +0 -6
  67. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +0 -6
  68. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +0 -6
  69. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_MAE.yaml +0 -6
  70. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_MAE.yaml +0 -6
  71. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +0 -6
  72. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_Z_MAE.yaml +0 -6
  73. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/WHEEL +0 -0
  74. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/licenses/LICENSE +0 -0
  75. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,389 @@
1
+ # -----------------------------------------------------------------------------.
2
+ # Copyright (c) 2021-2026 DISDRODB developers
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU General Public License as published by
6
+ # the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
8
+ #
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU General Public License for more details.
13
+ #
14
+ # You should have received a copy of the GNU General Public License
15
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ # -----------------------------------------------------------------------------.
17
+ """Define Goodness-Of-Fit metrics for xarray objects."""
18
+
19
+ import numpy as np
20
+ import xarray as xr
21
+
22
+ from disdrodb.constants import DIAMETER_DIMENSION
23
+ from disdrodb.utils.warnings import suppress_warnings
24
+
25
+
26
+ def compute_kl_divergence(pk, qk, dim, eps=1e-12):
27
+ """Compute Kullback-Leibler (KL) divergence.
28
+
29
+ Compares two probability distributions.
30
+ When KL < 0.1 the two distributions are similar.
31
+ When KL < 0.01 the two distributions are nearly indistinguishable.
32
+
33
+ Note that when qk is 0 but pk > 0 at some bin, KL divergence explodes.
34
+
35
+ Parameters
36
+ ----------
37
+ pk : xarray.DataArray
38
+ Observed / true / empirical probability distribution
39
+ qk : xarray.DataArray
40
+ Predicted / model / approximating probability distribution
41
+ dim : str
42
+ Name of the bin dimension.
43
+ eps : float, optional
44
+ Small value for numerical stability. Default is 1e-12.
45
+
46
+ Returns
47
+ -------
48
+ xarray.DataArray
49
+ Kullback-Leibler (KL) divergence.
50
+
51
+ """
52
+ # Get row masses before computing CDFs
53
+ pk_mass = pk.sum(dim=dim)
54
+ qk_mass = qk.sum(dim=dim)
55
+
56
+ # Regularize probability to avoid division by zero (or log of 0)
57
+ qk_regularized = xr.where(qk == 0, eps, qk)
58
+ pk_regularized = xr.where(pk == 0, eps, pk)
59
+
60
+ # Compute log probability ratio
61
+ log_prob_ratio = np.log(pk_regularized / qk_regularized)
62
+
63
+ # Compute divergence (zero out where pk=0)
64
+ kl = (pk * log_prob_ratio).sum(dim=dim, skipna=False)
65
+
66
+ # Clip tiny negative values due to numerical noise
67
+ kl = xr.where(kl >= 0.0, kl, 0.0)
68
+
69
+ # Handle edge cases when P=0 or Q=0
70
+ kl = xr.where((pk_mass == 0) | (qk_mass == 0), np.nan, kl)
71
+ return kl
72
+
73
+
74
+ def compute_jensen_shannon_distance(pk, qk, dim, eps=1e-12):
75
+ """Compute Jensen-Shannon distance.
76
+
77
+ Symmetric and finite version of KL divergence.
78
+ The Jensen-Shannon distance is the square root of the Jensen-Shannon divergence.
79
+ Values are defined between 0 and np.sqrt(ln(2)) = 0.83256
80
+
81
+ Parameters
82
+ ----------
83
+ pk : xarray.DataArray
84
+ Observed / true probability distribution
85
+ qk : xarray.DataArray
86
+ Predicted / model probability distribution
87
+ dim : str
88
+ Name of the bin dimension
89
+ eps : float, optional
90
+ Small value for numerical stability. Default is 1e-12.
91
+
92
+ Returns
93
+ -------
94
+ xarray.DataArray
95
+ Jensen-Shannon distance
96
+ """
97
+ # Mixture distribution
98
+ mk = 0.5 * (pk + qk)
99
+
100
+ # KL(P || M)
101
+ kl_pm = compute_kl_divergence(pk=pk, qk=mk, dim=dim, eps=eps)
102
+
103
+ # KL(Q || M)
104
+ kl_qm = compute_kl_divergence(pk=qk, qk=mk, dim=dim, eps=eps)
105
+
106
+ # Jensen-Shannon divergence [0, ln(2)]
107
+ js_div = 0.5 * (kl_pm + kl_qm)
108
+ js_div = np.maximum(js_div, 0.0) # clip tiny negative values to zero (numerical safety)
109
+
110
+ # Jensen-Shannon distance
111
+ js_distance = np.sqrt(js_div)
112
+ return js_distance
113
+
114
+
115
+ def compute_wasserstein_distance(
116
+ pk,
117
+ qk,
118
+ D,
119
+ dD,
120
+ dim,
121
+ integration="bin",
122
+ ):
123
+ """Compute Wasserstein-1 distance (Earth Mover's Distance) between two distributions.
124
+
125
+ Parameters
126
+ ----------
127
+ pk : xarray.DataArray
128
+ Observed / true probability distribution
129
+ qk : xarray.DataArray
130
+ Predicted / model probability distribution
131
+ D : xarray.DataArray
132
+ Bin centers
133
+ dD : xarray.DataArray
134
+ Bin widths
135
+ dim : str
136
+ Name of the bin dimension
137
+ integration : str, optional
138
+ Integration scheme used to compute the Wasserstein integral.
139
+ Supported options are ``"bin"`` and ``"left_riemann"``.
140
+
141
+ ``"bin"`` compute Histogram-based Wasserstein distance. Distributions are interpreted as
142
+ piecewise-constant densities over bins of width ``dD``. The distance is
143
+ computed by integrating the difference between cumulative distribution
144
+ functions over each bin. This is the default.
145
+
146
+ ``"left_riemann"`` computes Discrete-support Wasserstein distance. Probability mass is assumed to be
147
+ concentrated at bin centers ``D``, and the integral is approximated using
148
+ the spacing between support points, consistent with :func:`scipy.stats.wasserstein_distance`.
149
+
150
+
151
+ Returns
152
+ -------
153
+ xarray.DataArray
154
+ Wasserstein-1 distance
155
+ """
156
+ # Get row masses before computing CDFs
157
+ pk_mass = pk.sum(dim=dim)
158
+ qk_mass = qk.sum(dim=dim)
159
+
160
+ # CDFs
161
+ cdf_p = pk.cumsum(dim)
162
+ cdf_q = qk.cumsum(dim)
163
+
164
+ # Absolute CDF difference
165
+ diff = abs(cdf_p - cdf_q)
166
+
167
+ if integration == "bin":
168
+ # Histogram-based Wasserstein (density interpretation)
169
+ wd = (diff * dD).sum(dim=dim)
170
+
171
+ elif integration == "left_riemann":
172
+ # Discrete-support Wasserstein (SciPy-style)
173
+ # Evaluate |CDF difference| at left support points D_i
174
+ diff_left = diff.isel({dim: slice(None, -1)})
175
+
176
+ # Compute spacing between support points and
177
+ # explicitly assign left coordinates to avoid misalignment
178
+ dx = D.diff(dim)
179
+ dx = dx.assign_coords({dim: D.isel({dim: slice(None, -1)})})
180
+ wd = (diff_left * dx).sum(dim=dim)
181
+ else:
182
+ raise ValueError("integration must be 'bin' or 'left_riemann'")
183
+
184
+ # Handle edge cases (Wasserstein distance is undefined when P=0 or Q=0)
185
+ wd = xr.where((pk_mass == 0) | (qk_mass == 0), np.nan, wd)
186
+ return wd
187
+
188
+
189
+ def compute_kolmogorov_smirnov_distance(pk, qk, dim):
190
+ """Compute Kolmogorov-Smirnov (KS) distance.
191
+
192
+ The Kolmogorov-Smirnov (KS) distance is bounded between 0 and 1,
193
+ where 0 indicates that the two distributions are identical.
194
+ The associated KS test p-value ranges from 0 to 1,
195
+ with a value of 1 indicating no evidence against the null hypothesis that the distributions are identical.
196
+ When the p value is smaller than the significance level (e.g. < 0.05) the model is rejected.
197
+
198
+ If model parameters are estimated from the same data to which the model is compared,
199
+ the standard KS p-values are invalid.
200
+ The solution is to use a parametric bootstrap:
201
+ 1. Fit model to your data
202
+ 2. Simulate many datasets from that fitted gamma
203
+ 3. Refit gamma for each simulated dataset
204
+ 4. Compute KS statistic each time
205
+ 5. Compare your observed KS statistic to the bootstrap distribution
206
+
207
+ Parameters
208
+ ----------
209
+ pk : xarray.DataArray
210
+ Observed / true probability distribution
211
+ qk : xarray.DataArray
212
+ Predicted / model probability distribution
213
+ dim : str
214
+ Name of the bin dimension
215
+
216
+ Returns
217
+ -------
218
+ ks_statistic : xarray.DataArray
219
+ Kolmogorov-Smirnov statistic (maximum CDF difference)
220
+ If 0, the two distributions are identical.
221
+ ks_p_value : xarray.DataArray
222
+ Kolmogorov-Smirnov Test p-value (asymptotic approximation)
223
+ A p-value of 0 means “strong evidence against equality.”
224
+ A p-value of 1 means “no evidence against equality.”
225
+ Identical distributions show a pvalue of 1.
226
+ Similar distributions show a pvalue close to 1.
227
+ """
228
+ # Get row masses before computing CDFs
229
+ pk_mass = pk.sum(dim=dim)
230
+ qk_mass = qk.sum(dim=dim)
231
+
232
+ # CDFs
233
+ cdf_p = pk.cumsum(dim)
234
+ cdf_q = qk.cumsum(dim)
235
+
236
+ # KS statistic
237
+ ks = np.abs(cdf_p - cdf_q).max(dim=dim)
238
+
239
+ # Effective sample sizes (Rényi-2 effective N)
240
+ n_eff_p = 1.0 / (pk**2).sum(dim=dim)
241
+ n_eff_q = 1.0 / (qk**2).sum(dim=dim)
242
+
243
+ # Combined effective sample size
244
+ n_eff = (n_eff_p * n_eff_q) / (n_eff_p + n_eff_q)
245
+
246
+ # Asymptotic KS p-value approximation
247
+ p_value = 2.0 * np.exp(-2.0 * (ks * np.sqrt(n_eff)) ** 2)
248
+ p_value = p_value.clip(0.0, 1.0)
249
+
250
+ # Handle edge cases (KS distance is undefined when P=0 or Q=0)
251
+ ks = xr.where((pk_mass == 0) | (qk_mass == 0), np.nan, ks)
252
+ p_value = xr.where((pk_mass == 0) | (qk_mass == 0), np.nan, p_value)
253
+
254
+ return ks, p_value
255
+
256
+
257
+ def compute_gof_stats(obs, pred, dim=DIAMETER_DIMENSION):
258
+ """
259
+ Compute various goodness-of-fit (GoF) statistics between observed and predicted values.
260
+
261
+ Computes a comprehensive set of GOF metrics including Pearson correlation,
262
+ error statistics, and distribution distance metrics (KL, JS, Wasserstein, KS).
263
+
264
+ Parameters
265
+ ----------
266
+ obs : xarray.DataArray
267
+ Observations DataArray with at least dimension ``dim``.
268
+ Should contain 'diameter_bin_center' and 'diameter_bin_width' coordinates.
269
+ pred : xarray.DataArray
270
+ Predictions DataArray with at least dimension ``dim``.
271
+ dim : str, optional
272
+ DataArray dimension over which to compute GOF statistics.
273
+ Default is DIAMETER_DIMENSION.
274
+
275
+ Returns
276
+ -------
277
+ ds : xarray.Dataset
278
+ Dataset containing the following computed GoF statistics:
279
+
280
+ - R2: Coefficient of determination (squared Pearson correlation)
281
+ - MAE: Mean Absolute Error
282
+ - MaxAE: Maximum Absolute Error
283
+ - RelMaxAE: Relative Maximum Absolute Error
284
+ - PeakDiff: Difference at distribution peak (N(D) max)
285
+ - RelPeakDiff: Relative difference at peak
286
+ - DmodeDiff: Difference in mode diameters
287
+ - NtDiff: Difference in total number concentration
288
+ - KLDiv: Kullback-Leibler divergence
289
+ - JSD: Jensen-Shannon distance
290
+ - WD: Wasserstein-1 distance
291
+ - KS: Kolmogorov-Smirnov statistic
292
+ """
293
+ # TODO: add censoring option (by setting values to np.nan?)
294
+ from disdrodb.l2.empirical_dsd import get_mode_diameter
295
+
296
+ # Retrieve diameter and diameter bin width
297
+ diameter = obs["diameter_bin_center"]
298
+ diameter_bin_width = obs["diameter_bin_width"]
299
+
300
+ # Compute errors
301
+ error = obs - pred
302
+
303
+ # Compute max obs and pred
304
+ obs_max = obs.max(dim=dim, skipna=False)
305
+ pred_max = pred.max(dim=dim, skipna=False)
306
+
307
+ # Compute NaN mask
308
+ mask_nan = np.logical_or(np.isnan(obs_max), np.isnan(pred_max))
309
+
310
+ # Compute GOF statistics
311
+ with suppress_warnings():
312
+ # Compute Pearson Correlation
313
+ # - if same values and a single values return NaN (std of constant value is not defined)
314
+ pearson_r = xr.corr(obs, pred, dim=dim)
315
+
316
+ # Compute Mean Absolute Error (MAE)
317
+ mae = np.abs(error).mean(dim=dim, skipna=False)
318
+
319
+ # Compute maximum absolute error
320
+ max_error = np.abs(error).max(dim=dim, skipna=False)
321
+ relative_max_error = xr.where(max_error == 0, 0, xr.where(obs_max == 0, np.nan, max_error / obs_max))
322
+
323
+ # Compute deviation of N(D) at distribution mode
324
+ mode_deviation = obs_max - pred_max
325
+ mode_relative_deviation = xr.where(
326
+ mode_deviation == 0,
327
+ 0,
328
+ xr.where(obs_max == 0, np.nan, mode_deviation / obs_max),
329
+ )
330
+
331
+ # Compute diameter difference of the distribution mode
332
+ diameter_mode_pred = get_mode_diameter(pred, diameter)
333
+ diameter_mode_obs = get_mode_diameter(obs, diameter)
334
+ diameter_mode_deviation = diameter_mode_obs - diameter_mode_pred
335
+
336
+ # Compute difference in total number concentration
337
+ total_number_concentration_obs = (obs * diameter_bin_width).sum(dim=dim, skipna=False)
338
+ total_number_concentration_pred = (pred * diameter_bin_width).sum(dim=dim, skipna=False)
339
+ total_number_concentration_difference = total_number_concentration_pred - total_number_concentration_obs
340
+
341
+ # Compute pdf per bin
342
+ pk_pdf = obs / total_number_concentration_obs
343
+ qk_pdf = pred / total_number_concentration_pred
344
+
345
+ # Compute probabilities per bin
346
+ pk = pk_pdf * diameter_bin_width
347
+ pk = pk / pk.sum(dim=dim, skipna=False) # this might not be necessary
348
+ qk = qk_pdf * diameter_bin_width
349
+ qk = qk / qk.sum(dim=dim, skipna=False) # this might not be necessary
350
+
351
+ # Keep prob mass to 0 if total concentration is 0
352
+ pk = xr.where(total_number_concentration_obs == 0, 0, pk)
353
+ qk = xr.where(total_number_concentration_pred == 0, 0, qk)
354
+
355
+ # Compute Kullback-Leibler divergence
356
+ kl_divergence = compute_kl_divergence(pk=pk, qk=qk, dim=dim)
357
+
358
+ # Compute Jensen-Shannon distance
359
+ js_distance = compute_jensen_shannon_distance(pk=pk, qk=qk, dim=dim)
360
+
361
+ # Compute Wasserstein-1 distance
362
+ wd = compute_wasserstein_distance(pk=pk, qk=qk, D=diameter, dD=diameter_bin_width, dim=dim)
363
+
364
+ # Compute Kolmogorov-Smirnov distance
365
+ ks_stat, ks_pvalue = compute_kolmogorov_smirnov_distance(pk=pk, qk=qk, dim=dim)
366
+
367
+ # Create an xarray.Dataset to hold the computed statistics
368
+ ds = xr.Dataset(
369
+ {
370
+ "R2": pearson_r**2, # Squared Pearson correlation coefficient
371
+ "MAE": mae, # Mean Absolute Error
372
+ "MaxAE": max_error, # Maximum Absolute Error
373
+ "RelMaxAE": relative_max_error, # Relative Maximum Absolute Error
374
+ "PeakDiff": mode_deviation, # Difference at distribution peak
375
+ "RelPeakDiff": mode_relative_deviation, # Relative difference at peak
376
+ "DmodeDiff": diameter_mode_deviation, # Difference in mode diameters
377
+ "NtDiff": total_number_concentration_difference,
378
+ "KLDiv": kl_divergence, # Kullback-Leibler divergence
379
+ "JSD": js_distance, # Jensen-Shannon distance
380
+ "WD": wd, # Wasserstein-1 distance
381
+ "KS": ks_stat, # Kolmogorov-Smirnov statistic
382
+ # "KS_pvalue": ks_p_value, # Kolmogorov-Smirnov Test p-value
383
+ },
384
+ )
385
+ # Round
386
+ ds = ds.round(2)
387
+ # Mask where input obs or pred is NaN
388
+ ds = ds.where(~mask_nan)
389
+ return ds