disdrodb 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +4 -0
- disdrodb/_version.py +2 -2
- disdrodb/accessor/methods.py +14 -0
- disdrodb/api/checks.py +8 -7
- disdrodb/api/io.py +81 -29
- disdrodb/api/path.py +17 -14
- disdrodb/api/search.py +15 -18
- disdrodb/cli/disdrodb_open_products_options.py +38 -0
- disdrodb/cli/disdrodb_run.py +2 -2
- disdrodb/cli/disdrodb_run_station.py +4 -4
- disdrodb/configs.py +1 -1
- disdrodb/data_transfer/download_data.py +70 -1
- disdrodb/etc/configs/attributes.yaml +62 -8
- disdrodb/etc/configs/encodings.yaml +28 -0
- disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/GAMMA_ML.yaml +1 -1
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +1 -1
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_SSE.yaml +8 -0
- disdrodb/etc/products/L2M/global.yaml +4 -4
- disdrodb/fall_velocity/graupel.py +8 -8
- disdrodb/fall_velocity/hail.py +2 -2
- disdrodb/fall_velocity/rain.py +33 -5
- disdrodb/issue/checks.py +1 -1
- disdrodb/l0/l0_reader.py +1 -1
- disdrodb/l0/l0a_processing.py +2 -2
- disdrodb/l0/l0b_nc_processing.py +5 -5
- disdrodb/l0/l0b_processing.py +20 -24
- disdrodb/l0/l0c_processing.py +18 -13
- disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +4 -0
- disdrodb/l0/readers/PARSIVEL2/VIETNAM/IGE_PARSIVEL2.py +239 -0
- disdrodb/l0/template_tools.py +13 -13
- disdrodb/l1/classification.py +10 -6
- disdrodb/l2/empirical_dsd.py +25 -15
- disdrodb/l2/processing.py +32 -14
- disdrodb/metadata/download.py +1 -1
- disdrodb/metadata/geolocation.py +4 -4
- disdrodb/metadata/reader.py +3 -3
- disdrodb/metadata/search.py +10 -8
- disdrodb/psd/__init__.py +4 -0
- disdrodb/psd/fitting.py +2660 -592
- disdrodb/psd/gof_metrics.py +389 -0
- disdrodb/psd/grid_search.py +1066 -0
- disdrodb/psd/models.py +1281 -145
- disdrodb/routines/l2.py +6 -6
- disdrodb/routines/options_validation.py +8 -8
- disdrodb/scattering/axis_ratio.py +70 -2
- disdrodb/scattering/permittivity.py +13 -10
- disdrodb/scattering/routines.py +10 -10
- disdrodb/summary/routines.py +23 -20
- disdrodb/utils/archiving.py +29 -22
- disdrodb/utils/attrs.py +6 -4
- disdrodb/utils/dataframe.py +4 -4
- disdrodb/utils/encoding.py +3 -1
- disdrodb/utils/event.py +9 -9
- disdrodb/utils/logger.py +4 -7
- disdrodb/utils/manipulations.py +2 -2
- disdrodb/utils/subsetting.py +1 -1
- disdrodb/utils/time.py +8 -7
- disdrodb/viz/plots.py +25 -17
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/METADATA +44 -33
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/RECORD +68 -66
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/WHEEL +1 -1
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/entry_points.txt +1 -0
- disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +0 -6
- disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_Z_MAE.yaml +0 -6
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {disdrodb-0.4.0.dist-info → disdrodb-0.5.1.dist-info}/top_level.txt +0 -0
disdrodb/psd/fitting.py
CHANGED
|
@@ -16,6 +16,8 @@
|
|
|
16
16
|
# -----------------------------------------------------------------------------.
|
|
17
17
|
"""Routines for PSD fitting."""
|
|
18
18
|
|
|
19
|
+
import copy
|
|
20
|
+
|
|
19
21
|
import numpy as np
|
|
20
22
|
import scipy.stats as ss
|
|
21
23
|
import xarray as xr
|
|
@@ -31,7 +33,19 @@ from disdrodb.l2.empirical_dsd import (
|
|
|
31
33
|
get_normalized_intercept_parameter_from_moments,
|
|
32
34
|
get_total_number_concentration,
|
|
33
35
|
)
|
|
34
|
-
from disdrodb.psd.
|
|
36
|
+
from disdrodb.psd.grid_search import (
|
|
37
|
+
check_objectives,
|
|
38
|
+
check_transformation,
|
|
39
|
+
compute_weighted_loss,
|
|
40
|
+
)
|
|
41
|
+
from disdrodb.psd.models import (
|
|
42
|
+
ExponentialPSD,
|
|
43
|
+
GammaPSD,
|
|
44
|
+
GeneralizedGammaPSD,
|
|
45
|
+
LognormalPSD,
|
|
46
|
+
NormalizedGammaPSD,
|
|
47
|
+
NormalizedGeneralizedGammaPSD,
|
|
48
|
+
)
|
|
35
49
|
from disdrodb.utils.manipulations import get_diameter_bin_edges
|
|
36
50
|
from disdrodb.utils.warnings import suppress_warnings
|
|
37
51
|
|
|
@@ -57,117 +71,6 @@ from disdrodb.utils.warnings import suppress_warnings
|
|
|
57
71
|
# - LogNormal,Exponential, Gamma: Nt
|
|
58
72
|
# --> get_total_number_concentration(drop_number_concentration, diameter_bin_width)
|
|
59
73
|
|
|
60
|
-
|
|
61
|
-
####--------------------------------------------------------------------------------------.
|
|
62
|
-
#### Goodness of fit (GOF)
|
|
63
|
-
def compute_gof_stats(obs, pred, dim=DIAMETER_DIMENSION):
|
|
64
|
-
"""
|
|
65
|
-
Compute various goodness-of-fit (GoF) statistics between obs and predicted values.
|
|
66
|
-
|
|
67
|
-
Parameters
|
|
68
|
-
----------
|
|
69
|
-
obs: xarray.DataArray
|
|
70
|
-
Observations DataArray with at least dimension ``dim``.
|
|
71
|
-
pred: xarray.DataArray
|
|
72
|
-
Predictions DataArray with at least dimension ``dim``.
|
|
73
|
-
dim: str
|
|
74
|
-
DataArray dimension over which to compute GOF statistics.
|
|
75
|
-
The default is DIAMETER_DIMENSION.
|
|
76
|
-
|
|
77
|
-
Returns
|
|
78
|
-
-------
|
|
79
|
-
ds: xarray.Dataset
|
|
80
|
-
Dataset containing the computed GoF statistics.
|
|
81
|
-
"""
|
|
82
|
-
from disdrodb.l2.empirical_dsd import get_mode_diameter
|
|
83
|
-
|
|
84
|
-
# Retrieve diameter and diameter bin width
|
|
85
|
-
diameter = obs["diameter_bin_center"]
|
|
86
|
-
diameter_bin_width = obs["diameter_bin_width"]
|
|
87
|
-
|
|
88
|
-
# Compute errors
|
|
89
|
-
error = obs - pred
|
|
90
|
-
|
|
91
|
-
# Compute max obs and pred
|
|
92
|
-
obs_max = obs.max(dim=dim, skipna=False)
|
|
93
|
-
pred_max = pred.max(dim=dim, skipna=False)
|
|
94
|
-
|
|
95
|
-
# Compute NaN mask
|
|
96
|
-
mask_nan = np.logical_or(np.isnan(obs_max), np.isnan(pred_max))
|
|
97
|
-
|
|
98
|
-
# Compute GOF statistics
|
|
99
|
-
with suppress_warnings():
|
|
100
|
-
# Compute Pearson Correlation
|
|
101
|
-
pearson_r = xr.corr(obs, pred, dim=dim)
|
|
102
|
-
|
|
103
|
-
# Compute Mean Absolute Error (MAE)
|
|
104
|
-
mae = np.abs(error).mean(dim=dim, skipna=False)
|
|
105
|
-
|
|
106
|
-
# Compute maximum absolute error
|
|
107
|
-
max_error = np.abs(error).max(dim=dim, skipna=False)
|
|
108
|
-
relative_max_error = xr.where(max_error == 0, 0, xr.where(obs_max == 0, np.nan, max_error / obs_max))
|
|
109
|
-
|
|
110
|
-
# Compute deviation of N(D) at distribution mode
|
|
111
|
-
mode_deviation = obs_max - pred_max
|
|
112
|
-
mode_relative_deviation = xr.where(
|
|
113
|
-
mode_deviation == 0,
|
|
114
|
-
0,
|
|
115
|
-
xr.where(obs_max == 0, np.nan, mode_deviation / obs_max),
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
# Compute diameter difference of the distribution mode
|
|
119
|
-
diameter_mode_pred = get_mode_diameter(pred, diameter)
|
|
120
|
-
diameter_mode_obs = get_mode_diameter(obs, diameter)
|
|
121
|
-
diameter_mode_deviation = diameter_mode_obs - diameter_mode_pred
|
|
122
|
-
|
|
123
|
-
# Compute difference in total number concentration
|
|
124
|
-
total_number_concentration_obs = (obs * diameter_bin_width).sum(dim=dim, skipna=False)
|
|
125
|
-
total_number_concentration_pred = (pred * diameter_bin_width).sum(dim=dim, skipna=False)
|
|
126
|
-
total_number_concentration_difference = total_number_concentration_pred - total_number_concentration_obs
|
|
127
|
-
|
|
128
|
-
# Compute Kullback-Leibler divergence
|
|
129
|
-
# - Compute pdf per bin
|
|
130
|
-
pk_pdf = obs / total_number_concentration_obs
|
|
131
|
-
qk_pdf = pred / total_number_concentration_pred
|
|
132
|
-
|
|
133
|
-
# - Compute probabilities per bin
|
|
134
|
-
pk = pk_pdf * diameter_bin_width
|
|
135
|
-
pk = pk / pk.sum(dim=dim, skipna=False) # this might not be necessary
|
|
136
|
-
qk = qk_pdf * diameter_bin_width
|
|
137
|
-
qk = qk / qk.sum(dim=dim, skipna=False) # this might not be necessary
|
|
138
|
-
|
|
139
|
-
# - Compute log probability ratio
|
|
140
|
-
epsilon = 1e-10
|
|
141
|
-
pk = xr.where(pk == 0, epsilon, pk)
|
|
142
|
-
qk = xr.where(qk == 0, epsilon, qk)
|
|
143
|
-
log_prob_ratio = np.log(pk / qk)
|
|
144
|
-
log_prob_ratio = log_prob_ratio.where(np.isfinite(log_prob_ratio))
|
|
145
|
-
|
|
146
|
-
# - Compute divergence
|
|
147
|
-
kl_divergence = (pk * log_prob_ratio).sum(dim=dim, skipna=False)
|
|
148
|
-
kl_divergence = xr.where((error == 0).all(dim=dim), 0, kl_divergence)
|
|
149
|
-
|
|
150
|
-
# Create an xarray.Dataset to hold the computed statistics
|
|
151
|
-
ds = xr.Dataset(
|
|
152
|
-
{
|
|
153
|
-
"R2": pearson_r**2, # Squared Pearson correlation coefficient
|
|
154
|
-
"MAE": mae, # Mean Absolute Error
|
|
155
|
-
"MaxAE": max_error, # Maximum Absolute Error
|
|
156
|
-
"RelMaxAE": relative_max_error, # Relative Maximum Absolute Error
|
|
157
|
-
"PeakDiff": mode_deviation, # Difference at distribution peak
|
|
158
|
-
"RelPeakDiff": mode_relative_deviation, # Relative difference at peak
|
|
159
|
-
"DmodeDiff": diameter_mode_deviation, # Difference in mode diameters
|
|
160
|
-
"NtDiff": total_number_concentration_difference,
|
|
161
|
-
"KLDiv": kl_divergence, # Kullback-Leibler divergence
|
|
162
|
-
},
|
|
163
|
-
)
|
|
164
|
-
# Round
|
|
165
|
-
ds = ds.round(2)
|
|
166
|
-
# Mask where input obs or pred is NaN
|
|
167
|
-
ds = ds.where(~mask_nan)
|
|
168
|
-
return ds
|
|
169
|
-
|
|
170
|
-
|
|
171
74
|
####--------------------------------------------------------------------------------------.
|
|
172
75
|
#### Maximum Likelihood (ML)
|
|
173
76
|
|
|
@@ -186,8 +89,9 @@ def get_expected_probabilities(params, cdf_func, pdf_func, bin_edges, probabilit
|
|
|
186
89
|
Probability density function (PDF) that takes a value and parameters as inputs.
|
|
187
90
|
bin_edges : array-like
|
|
188
91
|
Edges of the bins for which to compute the probabilities.
|
|
189
|
-
probability_method :
|
|
190
|
-
Method to compute the probabilities.
|
|
92
|
+
probability_method : str
|
|
93
|
+
Method to compute the probabilities. Valid values are 'cdf' and 'pdf'.
|
|
94
|
+
If 'cdf', use the CDF to compute probabilities.
|
|
191
95
|
If 'pdf', integrate the PDF over each bin range.
|
|
192
96
|
normalized : bool, optional
|
|
193
97
|
If True, normalize the probabilities to sum to 1. Default is False.
|
|
@@ -365,7 +269,7 @@ def estimate_lognormal_parameters(
|
|
|
365
269
|
|
|
366
270
|
References
|
|
367
271
|
----------
|
|
368
|
-
|
|
272
|
+
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.lognorm.html#scipy.stats.lognorm
|
|
369
273
|
"""
|
|
370
274
|
# Definite initial guess for the parameters
|
|
371
275
|
scale = np.exp(mu) # mu = np.log(scale)
|
|
@@ -477,12 +381,13 @@ def estimate_exponential_parameters(
|
|
|
477
381
|
Notes
|
|
478
382
|
-----
|
|
479
383
|
The exponential distribution is defined as:
|
|
480
|
-
|
|
384
|
+
|
|
385
|
+
N(D) = N0 * exp(-Lambda * D) = Nt * Lambda * exp(-Lambda * D)
|
|
481
386
|
where Lambda = 1 / scale and N0 = Nt * Lambda.
|
|
482
387
|
|
|
483
388
|
References
|
|
484
389
|
----------
|
|
485
|
-
|
|
390
|
+
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.expon.html
|
|
486
391
|
"""
|
|
487
392
|
# Definite initial guess for parameters
|
|
488
393
|
scale = 1 / Lambda
|
|
@@ -558,8 +463,7 @@ def estimate_gamma_parameters(
|
|
|
558
463
|
output_dictionary=True,
|
|
559
464
|
optimizer="Nelder-Mead",
|
|
560
465
|
):
|
|
561
|
-
"""
|
|
562
|
-
Estimate the parameters of a gamma distribution given histogram data.
|
|
466
|
+
r"""Estimate the parameters of a gamma distribution given histogram data.
|
|
563
467
|
|
|
564
468
|
Parameters
|
|
565
469
|
----------
|
|
@@ -598,13 +502,29 @@ def estimate_gamma_parameters(
|
|
|
598
502
|
Notes
|
|
599
503
|
-----
|
|
600
504
|
The gamma distribution is defined as:
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
505
|
+
|
|
506
|
+
.. math::
|
|
507
|
+
|
|
508
|
+
N(D) = N_0 \, D^{\mu} \, \exp(-\Lambda D)
|
|
509
|
+
|
|
510
|
+
where:
|
|
511
|
+
|
|
512
|
+
- :math:`D` is the particle diameter,
|
|
513
|
+
- :math:`\Lambda = 1 / \text{scale}` is the slope parameter,
|
|
514
|
+
- :math:`\mu = a - 1` is the shape parameter, with :math:`a` the gamma distribution shape parameter.
|
|
515
|
+
|
|
516
|
+
The intercept parameter :math:`N_0` is defined as:
|
|
517
|
+
|
|
518
|
+
.. math::
|
|
519
|
+
|
|
520
|
+
N_0 = N_t \, \frac{\Lambda^{\mu + 1}}{\Gamma(\mu + 1)}
|
|
521
|
+
|
|
522
|
+
where :math:`N_t` is the total number concentration and
|
|
523
|
+
:math:`\Gamma(\cdot)` denotes the gamma function.
|
|
604
524
|
|
|
605
525
|
References
|
|
606
526
|
----------
|
|
607
|
-
|
|
527
|
+
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html
|
|
608
528
|
|
|
609
529
|
"""
|
|
610
530
|
# Define initial guess for parameters
|
|
@@ -764,20 +684,20 @@ def get_gamma_parameters(
|
|
|
764
684
|
truncated_likelihood=True,
|
|
765
685
|
optimizer="Nelder-Mead",
|
|
766
686
|
):
|
|
767
|
-
"""
|
|
768
|
-
Estimate gamma distribution parameters for drop size distribution (DSD) data.
|
|
687
|
+
"""Estimate gamma distribution parameters for drop size distribution (DSD) data.
|
|
769
688
|
|
|
770
689
|
Parameters
|
|
771
690
|
----------
|
|
772
691
|
ds : xarray.Dataset
|
|
773
692
|
Input dataset containing drop size distribution data. It must include the following variables:
|
|
693
|
+
|
|
774
694
|
- ``drop_number_concentration``: The number concentration of drops.
|
|
775
695
|
- ``diameter_bin_width``": The width of each diameter bin.
|
|
776
696
|
- ``diameter_bin_lower``: The lower bounds of the diameter bins.
|
|
777
697
|
- ``diameter_bin_upper``: The upper bounds of the diameter bins.
|
|
778
698
|
- ``diameter_bin_center``: The center values of the diameter bins.
|
|
779
|
-
- The moments M0...M6 variables required to compute the initial parameters
|
|
780
|
-
|
|
699
|
+
- The moments M0...M6 variables required to compute the initial parameters with the specified mom_method.
|
|
700
|
+
|
|
781
701
|
init_method: str or list
|
|
782
702
|
The method(s) of moments used to initialize the gamma parameters.
|
|
783
703
|
If None (or 'None'), the scale parameter is set to 1 and mu to 0 (a=1).
|
|
@@ -795,9 +715,11 @@ def get_gamma_parameters(
|
|
|
795
715
|
-------
|
|
796
716
|
xarray.Dataset
|
|
797
717
|
Dataset containing the estimated gamma distribution parameters:
|
|
718
|
+
|
|
798
719
|
- ``N0``: Intercept parameter.
|
|
799
720
|
- ``mu``: Shape parameter.
|
|
800
721
|
- ``Lambda``: Scale parameter.
|
|
722
|
+
|
|
801
723
|
The dataset will also have an attribute ``disdrodb_psd_model`` set to ``GammaPSD``.
|
|
802
724
|
|
|
803
725
|
Notes
|
|
@@ -872,12 +794,14 @@ def get_lognormal_parameters(
|
|
|
872
794
|
Parameters
|
|
873
795
|
----------
|
|
874
796
|
ds : xarray.Dataset
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
797
|
+
Input dataset containing drop size distribution data. It must include the following variables:
|
|
798
|
+
|
|
799
|
+
- ``drop_number_concentration``: The number concentration of drops.
|
|
800
|
+
- ``diameter_bin_width``: The width of each diameter bin.
|
|
801
|
+
- ``diameter_bin_lower``: The lower bounds of the diameter bins.
|
|
802
|
+
- ``diameter_bin_upper``: The upper bounds of the diameter bins.
|
|
803
|
+
- ``diameter_bin_center``: The center values of the diameter bins.
|
|
804
|
+
|
|
881
805
|
probability_method : str, optional
|
|
882
806
|
Method to compute probabilities. The default value is ``cdf``.
|
|
883
807
|
likelihood : str, optional
|
|
@@ -891,9 +815,11 @@ def get_lognormal_parameters(
|
|
|
891
815
|
-------
|
|
892
816
|
xarray.Dataset
|
|
893
817
|
Dataset containing the estimated lognormal distribution parameters:
|
|
818
|
+
|
|
894
819
|
- ``Nt``: Total number concentration.
|
|
895
820
|
- ``mu``: Mean of the lognormal distribution.
|
|
896
821
|
- ``sigma``: Standard deviation of the lognormal distribution.
|
|
822
|
+
|
|
897
823
|
The resulting dataset will have an attribute ``disdrodb_psd_model`` set to ``LognormalPSD``.
|
|
898
824
|
|
|
899
825
|
Notes
|
|
@@ -1038,407 +964,1150 @@ def get_exponential_parameters(
|
|
|
1038
964
|
#### Grid Search (GS)
|
|
1039
965
|
|
|
1040
966
|
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
967
|
+
DEFAULT_OBJECTIVES = [
|
|
968
|
+
{
|
|
969
|
+
"target": "N(D)",
|
|
970
|
+
"transformation": "identity",
|
|
971
|
+
"loss": "SSE",
|
|
972
|
+
"censoring": "none",
|
|
973
|
+
"loss_weight": 0.8,
|
|
974
|
+
},
|
|
975
|
+
{
|
|
976
|
+
"target": "Z",
|
|
977
|
+
"transformation": "identity",
|
|
978
|
+
"loss": "AE",
|
|
979
|
+
"censoring": "none",
|
|
980
|
+
"loss_weight": 0.2,
|
|
981
|
+
},
|
|
982
|
+
]
|
|
1052
983
|
|
|
1053
|
-
def _compute_z(ND, D, dD):
|
|
1054
|
-
axis = 1 if ND.ndim == 2 else None
|
|
1055
|
-
z = np.sum(((D) ** 6 * ND * dD), axis=axis) # mm⁶·m⁻³
|
|
1056
|
-
Z = 10 * np.log10(z)
|
|
1057
|
-
return Z
|
|
1058
984
|
|
|
985
|
+
def apply_exponential_gs(
|
|
986
|
+
Nt,
|
|
987
|
+
ND_obs,
|
|
988
|
+
V,
|
|
989
|
+
# Coords
|
|
990
|
+
D,
|
|
991
|
+
dD,
|
|
992
|
+
# PSD parameters
|
|
993
|
+
Lambda,
|
|
994
|
+
# Optimization options
|
|
995
|
+
objectives,
|
|
996
|
+
# Output options
|
|
997
|
+
return_loss=False,
|
|
998
|
+
):
|
|
999
|
+
"""Estimate ExponentialPSD model parameters using Grid Search.
|
|
1059
1000
|
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
obs = _compute_z(ND_obs, D, dD)
|
|
1064
|
-
pred = _compute_z(ND_preds, D, dD)
|
|
1065
|
-
elif target == "R":
|
|
1066
|
-
obs = _compute_rain_rate(ND_obs, D, dD, V)
|
|
1067
|
-
pred = _compute_rain_rate(ND_preds, D, dD, V)
|
|
1068
|
-
else: # "LWC"
|
|
1069
|
-
obs = _compute_lwc(ND_obs, D, dD)
|
|
1070
|
-
pred = _compute_lwc(ND_preds, D, dD)
|
|
1001
|
+
This function performs a grid search optimization to find the best parameters
|
|
1002
|
+
(N0, Lambda) for the ExponentialPSD model by minimizing a weighted
|
|
1003
|
+
cost function across one or more objectives.
|
|
1071
1004
|
|
|
1072
|
-
|
|
1073
|
-
|
|
1005
|
+
Parameters
|
|
1006
|
+
----------
|
|
1007
|
+
Nt : float
|
|
1008
|
+
Total number concentration.
|
|
1009
|
+
ND_obs : numpy.ndarray
|
|
1010
|
+
Observed PSD data [#/mm/m3].
|
|
1011
|
+
V : numpy.ndarray
|
|
1012
|
+
Fall velocity [m/s].
|
|
1013
|
+
D : numpy.ndarray
|
|
1014
|
+
Diameter bins [mm].
|
|
1015
|
+
dD : numpy.ndarray
|
|
1016
|
+
Diameter bin widths [mm].
|
|
1017
|
+
Lambda : int, float or numpy.ndarray
|
|
1018
|
+
Lambda parameter values to search.
|
|
1019
|
+
objectives: list of dict
|
|
1020
|
+
target : str, optional
|
|
1021
|
+
Target quantity to optimize. Valid options:
|
|
1022
|
+
|
|
1023
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
1024
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
1025
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
1026
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
1027
|
+
- ``"M<p>"`` : Moment of order p
|
|
1028
|
+
|
|
1029
|
+
transformation : str, optional
|
|
1030
|
+
Transformation applied to the target quantity before computing the loss.
|
|
1031
|
+
Valid options:
|
|
1032
|
+
|
|
1033
|
+
- ``"identity"`` : No transformation
|
|
1034
|
+
- ``"log"`` : Logarithmic transformation
|
|
1035
|
+
- ``"sqrt"`` : Square root transformation
|
|
1036
|
+
|
|
1037
|
+
censoring : str
|
|
1038
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
1039
|
+
treated as censored at the edges of the diameter range due to
|
|
1040
|
+
instrumental sensitivity limits:
|
|
1041
|
+
|
|
1042
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
1043
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
1044
|
+
the spectrum where the observed number concentration is zero are
|
|
1045
|
+
removed prior to cost-function evaluation.
|
|
1046
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
1047
|
+
the spectrum where the observed number concentration is zero are
|
|
1048
|
+
removed prior to cost-function evaluation.
|
|
1049
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
1050
|
+
range of diameter bins with non-zero observed concentrations is
|
|
1051
|
+
retained.
|
|
1052
|
+
|
|
1053
|
+
loss : int, optional
|
|
1054
|
+
Loss function.
|
|
1055
|
+
If target is ``"N(D)"``, valid options are:
|
|
1056
|
+
|
|
1057
|
+
- ``SSE``: Sum of Squared Errors
|
|
1058
|
+
- ``SAE``: Sum of Absolute Errors
|
|
1059
|
+
- ``MAE``: Mean Absolute Error
|
|
1060
|
+
- ``MSE``: Mean Squared Error
|
|
1061
|
+
- ``RMSE``: Root Mean Squared Error
|
|
1062
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
1063
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
1064
|
+
- ``WD``: Wasserstein Distance
|
|
1065
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
1066
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
1067
|
+
|
|
1068
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
1069
|
+
- ``AE``: Absolute Error
|
|
1070
|
+
- ``SE``: Squared Error
|
|
1071
|
+
|
|
1072
|
+
loss_weight: int, optional
|
|
1073
|
+
Weight of this objective when multiple objectives are used.
|
|
1074
|
+
Must be specified if more than one objective is specified.
|
|
1075
|
+
return_loss : bool, optional
|
|
1076
|
+
If True, return both the loss surface and parameters.
|
|
1077
|
+
Default is False.
|
|
1074
1078
|
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1079
|
+
Returns
|
|
1080
|
+
-------
|
|
1081
|
+
parameters : numpy.ndarray
|
|
1082
|
+
Best parameters as [N0, Lambda].
|
|
1083
|
+
An array of NaN values is returned if no valid solution is found.
|
|
1084
|
+
total_loss : numpy.ndarray, optional
|
|
1085
|
+
1D array of total loss values.
|
|
1086
|
+
Only returned if return_loss=True.
|
|
1078
1087
|
|
|
1079
|
-
|
|
1088
|
+
Notes
|
|
1089
|
+
-----
|
|
1090
|
+
When multiple objectives are provided, losses are normalized and weighted.
|
|
1091
|
+
The best parameters correspond to the minimum total weighted loss.
|
|
1092
|
+
"""
|
|
1093
|
+
# Ensure input is numpy array
|
|
1094
|
+
Nt = np.asarray(Nt)
|
|
1095
|
+
ND_obs = np.asarray(ND_obs)
|
|
1096
|
+
V = np.asarray(V)
|
|
1080
1097
|
|
|
1098
|
+
# Convert lambda to array if needed
|
|
1099
|
+
if not isinstance(Lambda, np.ndarray):
|
|
1100
|
+
Lambda = np.atleast_1d(Lambda)
|
|
1081
1101
|
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
return errors
|
|
1088
|
-
if transformation == "log":
|
|
1089
|
-
errors = np.mean(np.abs(np.log(ND_obs[None, :] + 1) - np.log(ND_preds + 1)) ** error_order, axis=1)
|
|
1090
|
-
return errors
|
|
1091
|
-
if transformation == "sqrt":
|
|
1092
|
-
errors = np.mean(np.abs(np.sqrt(ND_obs[None, :]) - np.sqrt(ND_preds)) ** error_order, axis=1)
|
|
1093
|
-
return errors
|
|
1094
|
-
# if target in ["Z", "R", "LWC"]:
|
|
1095
|
-
return _compute_target_variable_error(target, ND_obs, ND_preds, D, dD, V)
|
|
1102
|
+
# Perform grid search
|
|
1103
|
+
with suppress_warnings():
|
|
1104
|
+
# Compute N(D)
|
|
1105
|
+
N0_arr = Nt * Lambda
|
|
1106
|
+
ND_preds = ExponentialPSD.formula(D=D[None, :], N0=N0_arr[:, None], Lambda=Lambda[:, None])
|
|
1096
1107
|
|
|
1108
|
+
# Compute loss
|
|
1109
|
+
total_loss = compute_weighted_loss(
|
|
1110
|
+
ND_obs=ND_obs,
|
|
1111
|
+
ND_preds=ND_preds,
|
|
1112
|
+
D=D,
|
|
1113
|
+
dD=dD,
|
|
1114
|
+
V=V,
|
|
1115
|
+
objectives=objectives,
|
|
1116
|
+
)
|
|
1097
1117
|
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1118
|
+
# Define best parameters
|
|
1119
|
+
if not np.all(np.isnan(total_loss)):
|
|
1120
|
+
best_index = np.nanargmin(total_loss)
|
|
1121
|
+
N0 = N0_arr[best_index].item()
|
|
1122
|
+
Lambda_best = Lambda[best_index].item()
|
|
1123
|
+
parameters = np.array([N0, Lambda_best])
|
|
1124
|
+
else:
|
|
1125
|
+
parameters = np.array([np.nan, np.nan])
|
|
1101
1126
|
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
Center of the range (e.g., current best estimate).
|
|
1106
|
-
step : float
|
|
1107
|
-
Coarse step size used in the first search.
|
|
1108
|
-
bounds : tuple of (float, float)
|
|
1109
|
-
Lower and upper bounds (can include -np.inf, np.inf).
|
|
1110
|
-
factor : float, optional
|
|
1111
|
-
How wide the refined range extends from the center (in multiples of step).
|
|
1112
|
-
Default = 2.
|
|
1113
|
-
refinement : int, optional
|
|
1114
|
-
Factor to refine the step size (smaller step = finer grid).
|
|
1115
|
-
Default = 20.
|
|
1127
|
+
# If asked, return cost function
|
|
1128
|
+
if return_loss:
|
|
1129
|
+
return parameters, total_loss
|
|
1116
1130
|
|
|
1117
|
-
|
|
1118
|
-
-------
|
|
1119
|
-
np.ndarray
|
|
1120
|
-
Array of values constrained to bounds.
|
|
1121
|
-
"""
|
|
1122
|
-
lower = max(center - factor * step, bounds[0])
|
|
1123
|
-
upper = min(center + factor * step, bounds[1])
|
|
1124
|
-
new_step = step / refinement
|
|
1125
|
-
return np.arange(lower, upper, new_step)
|
|
1131
|
+
return parameters
|
|
1126
1132
|
|
|
1127
1133
|
|
|
1128
|
-
def
|
|
1134
|
+
def apply_gamma_gs(
|
|
1129
1135
|
Nt,
|
|
1130
1136
|
ND_obs,
|
|
1131
1137
|
V,
|
|
1132
1138
|
# Coords
|
|
1133
1139
|
D,
|
|
1134
1140
|
dD,
|
|
1135
|
-
#
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1141
|
+
# PSD parameters
|
|
1142
|
+
mu,
|
|
1143
|
+
Lambda,
|
|
1144
|
+
# Optimization options
|
|
1145
|
+
objectives,
|
|
1146
|
+
# Output options
|
|
1147
|
+
return_loss=False,
|
|
1139
1148
|
):
|
|
1140
|
-
"""
|
|
1141
|
-
|
|
1142
|
-
|
|
1149
|
+
"""Estimate GammaPSD model parameters using Grid Search.
|
|
1150
|
+
|
|
1151
|
+
This function performs a grid search optimization to find the best parameters
|
|
1152
|
+
(mu, Lambda) for the GammaPSD model by minimizing a weighted
|
|
1153
|
+
cost function across one or more objectives.
|
|
1154
|
+
|
|
1155
|
+
Parameters
|
|
1156
|
+
----------
|
|
1157
|
+
Nt : float
|
|
1158
|
+
Total number concentration.
|
|
1159
|
+
ND_obs : numpy.ndarray
|
|
1160
|
+
Observed PSD data [#/mm/m3].
|
|
1161
|
+
V : numpy.ndarray
|
|
1162
|
+
Fall velocity [m/s].
|
|
1163
|
+
D : numpy.ndarray
|
|
1164
|
+
Diameter bins [mm].
|
|
1165
|
+
dD : numpy.ndarray
|
|
1166
|
+
Diameter bin widths [mm].
|
|
1167
|
+
mu : int, float or numpy.ndarray
|
|
1168
|
+
mu parameter values to search.
|
|
1169
|
+
Lambda : int, float or numpy.ndarray
|
|
1170
|
+
Lambda parameter values to search.
|
|
1171
|
+
objectives: list of dict
|
|
1172
|
+
target : str, optional
|
|
1173
|
+
Target quantity to optimize. Valid options:
|
|
1174
|
+
|
|
1175
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
1176
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
1177
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
1178
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
1179
|
+
- ``"M<p>"`` : Moment of order p
|
|
1180
|
+
|
|
1181
|
+
transformation : str, optional
|
|
1182
|
+
Transformation applied to the target quantity before computing the loss.
|
|
1183
|
+
Valid options:
|
|
1184
|
+
|
|
1185
|
+
- ``"identity"`` : No transformation
|
|
1186
|
+
- ``"log"`` : Logarithmic transformation
|
|
1187
|
+
- ``"sqrt"`` : Square root transformation
|
|
1188
|
+
|
|
1189
|
+
censoring : str
|
|
1190
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
1191
|
+
treated as censored at the edges of the diameter range due to
|
|
1192
|
+
instrumental sensitivity limits:
|
|
1193
|
+
|
|
1194
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
1195
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
1196
|
+
the spectrum where the observed number concentration is zero are
|
|
1197
|
+
removed prior to cost-function evaluation.
|
|
1198
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
1199
|
+
the spectrum where the observed number concentration is zero are
|
|
1200
|
+
removed prior to cost-function evaluation.
|
|
1201
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
1202
|
+
range of diameter bins with non-zero observed concentrations is
|
|
1203
|
+
retained.
|
|
1204
|
+
|
|
1205
|
+
loss : int, optional
|
|
1206
|
+
Loss function.
|
|
1207
|
+
If target is ``"N(D)"``, valid options are:
|
|
1208
|
+
|
|
1209
|
+
- ``SSE``: Sum of Squared Errors
|
|
1210
|
+
- ``SAE``: Sum of Absolute Errors
|
|
1211
|
+
- ``MAE``: Mean Absolute Error
|
|
1212
|
+
- ``MSE``: Mean Squared Error
|
|
1213
|
+
- ``RMSE``: Root Mean Squared Error
|
|
1214
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
1215
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
1216
|
+
- ``WD``: Wasserstein Distance
|
|
1217
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
1218
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
1219
|
+
|
|
1220
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
1221
|
+
|
|
1222
|
+
- ``AE``: Absolute Error
|
|
1223
|
+
- ``SE``: Squared Error
|
|
1224
|
+
|
|
1225
|
+
loss_weight: int, optional
|
|
1226
|
+
Weight of this objective when multiple objectives are used.
|
|
1227
|
+
Must be specified if more than one objective is specified.
|
|
1228
|
+
return_loss : bool, optional
|
|
1229
|
+
If True, return both the loss surface and parameters.
|
|
1230
|
+
Default is False.
|
|
1231
|
+
|
|
1232
|
+
Returns
|
|
1233
|
+
-------
|
|
1234
|
+
parameters : numpy.ndarray
|
|
1235
|
+
Best parameters as [N0, Lambda, mu].
|
|
1236
|
+
An array of NaN values is returned if no valid solution is found.
|
|
1237
|
+
total_loss : numpy.ndarray, optional
|
|
1238
|
+
2D array of total loss values reshaped to (len(mu), len(Lambda)).
|
|
1239
|
+
Only returned if return_loss=True.
|
|
1240
|
+
|
|
1241
|
+
Notes
|
|
1242
|
+
-----
|
|
1243
|
+
When multiple objectives are provided, losses are normalized and weighted
|
|
1244
|
+
The best parameters correspond to the minimum total weighted loss.
|
|
1245
|
+
"""
|
|
1246
|
+
# Ensure input is numpy array
|
|
1247
|
+
Nt = np.asarray(Nt)
|
|
1248
|
+
ND_obs = np.asarray(ND_obs)
|
|
1249
|
+
V = np.asarray(V)
|
|
1250
|
+
|
|
1251
|
+
# Define combinations of parameters for grid search
|
|
1252
|
+
mu_grid, Lambda_grid = np.meshgrid(
|
|
1253
|
+
mu,
|
|
1254
|
+
Lambda,
|
|
1255
|
+
indexing="xy",
|
|
1256
|
+
)
|
|
1257
|
+
mu_arr = mu_grid.ravel()
|
|
1258
|
+
Lambda_arr = Lambda_grid.ravel()
|
|
1143
1259
|
|
|
1144
1260
|
# Perform grid search
|
|
1145
1261
|
with suppress_warnings():
|
|
1146
|
-
# Compute
|
|
1147
|
-
|
|
1148
|
-
ND_preds =
|
|
1262
|
+
# Compute N(D)
|
|
1263
|
+
N0 = np.exp(np.log(Nt) + (mu_arr[:, None] + 1) * np.log(Lambda_arr[:, None]) - gammaln(mu_arr[:, None] + 1))
|
|
1264
|
+
ND_preds = GammaPSD.formula(D=D[None, :], N0=N0, Lambda=Lambda_arr[:, None], mu=mu_arr[:, None])
|
|
1149
1265
|
|
|
1150
|
-
# Compute
|
|
1151
|
-
|
|
1266
|
+
# Compute loss
|
|
1267
|
+
total_loss = compute_weighted_loss(
|
|
1152
1268
|
ND_obs=ND_obs,
|
|
1153
1269
|
ND_preds=ND_preds,
|
|
1154
1270
|
D=D,
|
|
1155
1271
|
dD=dD,
|
|
1156
1272
|
V=V,
|
|
1157
|
-
|
|
1158
|
-
transformation=transformation,
|
|
1159
|
-
error_order=error_order,
|
|
1273
|
+
objectives=objectives,
|
|
1160
1274
|
)
|
|
1161
|
-
# Replace inf with NaN
|
|
1162
|
-
errors[~np.isfinite(errors)] = np.nan
|
|
1163
1275
|
|
|
1164
|
-
#
|
|
1165
|
-
if np.all(np.isnan(
|
|
1166
|
-
|
|
1276
|
+
# Define best parameters
|
|
1277
|
+
if not np.all(np.isnan(total_loss)):
|
|
1278
|
+
best_index = np.nanargmin(total_loss)
|
|
1279
|
+
N0_best = N0[best_index].item()
|
|
1280
|
+
mu_best = mu_arr[best_index].item()
|
|
1281
|
+
Lambda_best = Lambda_arr[best_index].item()
|
|
1282
|
+
parameters = np.array([N0_best, Lambda_best, mu_best])
|
|
1283
|
+
else:
|
|
1284
|
+
parameters = np.array([np.nan, np.nan, np.nan])
|
|
1285
|
+
|
|
1286
|
+
# If asked, return cost function
|
|
1287
|
+
if return_loss:
|
|
1288
|
+
total_loss = total_loss.reshape(mu_grid.shape)
|
|
1289
|
+
return parameters, total_loss
|
|
1290
|
+
|
|
1291
|
+
return parameters
|
|
1292
|
+
|
|
1293
|
+
|
|
1294
|
+
def apply_generalized_gamma_gs(
|
|
1295
|
+
Nt,
|
|
1296
|
+
ND_obs,
|
|
1297
|
+
V,
|
|
1298
|
+
# Coords
|
|
1299
|
+
D,
|
|
1300
|
+
dD,
|
|
1301
|
+
# PSD parameters
|
|
1302
|
+
mu,
|
|
1303
|
+
c,
|
|
1304
|
+
Lambda,
|
|
1305
|
+
# Optimization options
|
|
1306
|
+
objectives,
|
|
1307
|
+
# Output options
|
|
1308
|
+
return_loss=False,
|
|
1309
|
+
):
|
|
1310
|
+
"""Estimate GeneralizedGammaPSD model parameters using Grid Search.
|
|
1311
|
+
|
|
1312
|
+
This function performs a grid search optimization to find the best parameters
|
|
1313
|
+
(mu, c, Lambda) for the GeneralizedGammaPSD model by minimizing a weighted
|
|
1314
|
+
cost function across one or more objectives.
|
|
1315
|
+
|
|
1316
|
+
Parameters
|
|
1317
|
+
----------
|
|
1318
|
+
Nt : float
|
|
1319
|
+
Total number concentration.
|
|
1320
|
+
ND_obs : numpy.ndarray
|
|
1321
|
+
Observed PSD data [#/mm/m3].
|
|
1322
|
+
V : numpy.ndarray
|
|
1323
|
+
Fall velocity [m/s].
|
|
1324
|
+
D : numpy.ndarray
|
|
1325
|
+
Diameter bins [mm].
|
|
1326
|
+
dD : numpy.ndarray
|
|
1327
|
+
Diameter bin widths [mm].
|
|
1328
|
+
mu : int, float or numpy.ndarray
|
|
1329
|
+
mu parameter values to search.
|
|
1330
|
+
c : int, float or numpy.ndarray
|
|
1331
|
+
c parameter values to search.
|
|
1332
|
+
Lambda : int, float or numpy.ndarray
|
|
1333
|
+
Lambda parameter values to search.
|
|
1334
|
+
objectives: list of dict
|
|
1335
|
+
target : str, optional
|
|
1336
|
+
Target quantity to optimize. Valid options:
|
|
1337
|
+
|
|
1338
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
1339
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
1340
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
1341
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
1342
|
+
- ``"M<p>"`` : Moment of order p
|
|
1343
|
+
|
|
1344
|
+
transformation : str, optional
|
|
1345
|
+
Transformation applied to the target quantity before computing the loss.
|
|
1346
|
+
Valid options:
|
|
1347
|
+
|
|
1348
|
+
- ``"identity"`` : No transformation
|
|
1349
|
+
- ``"log"`` : Logarithmic transformation
|
|
1350
|
+
- ``"sqrt"`` : Square root transformation
|
|
1351
|
+
|
|
1352
|
+
censoring : str
|
|
1353
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
1354
|
+
treated as censored at the edges of the diameter range due to
|
|
1355
|
+
instrumental sensitivity limits:
|
|
1356
|
+
|
|
1357
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
1358
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
1359
|
+
the spectrum where the observed number concentration is zero are
|
|
1360
|
+
removed prior to cost-function evaluation.
|
|
1361
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
1362
|
+
the spectrum where the observed number concentration is zero are
|
|
1363
|
+
removed prior to cost-function evaluation.
|
|
1364
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
1365
|
+
range of diameter bins with non-zero observed concentrations is
|
|
1366
|
+
retained.
|
|
1367
|
+
|
|
1368
|
+
loss : int, optional
|
|
1369
|
+
Loss function.
|
|
1370
|
+
If target is ``"N(D)"``, valid options are:
|
|
1371
|
+
|
|
1372
|
+
- ``SSE``: Sum of Squared Errors
|
|
1373
|
+
- ``SAE``: Sum of Absolute Errors
|
|
1374
|
+
- ``MAE``: Mean Absolute Error
|
|
1375
|
+
- ``MSE``: Mean Squared Error
|
|
1376
|
+
- ``RMSE``: Root Mean Squared Error
|
|
1377
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
1378
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
1379
|
+
- ``WD``: Wasserstein Distance
|
|
1380
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
1381
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
1382
|
+
|
|
1383
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
1384
|
+
|
|
1385
|
+
- ``AE``: Absolute Error
|
|
1386
|
+
- ``SE``: Squared Error
|
|
1387
|
+
|
|
1388
|
+
loss_weight: int, optional
|
|
1389
|
+
Weight of this objective when multiple objectives are used.
|
|
1390
|
+
Must be specified if more than one objective is specified.
|
|
1391
|
+
return_loss : bool, optional
|
|
1392
|
+
If True, return both the loss surface and parameters.
|
|
1393
|
+
Default is False.
|
|
1167
1394
|
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1395
|
+
Returns
|
|
1396
|
+
-------
|
|
1397
|
+
parameters : numpy.ndarray
|
|
1398
|
+
Best parameters as [Lambda, mu, c].
|
|
1399
|
+
An array of NaN values is returned if no valid solution is found.
|
|
1400
|
+
total_loss : numpy.ndarray, optional
|
|
1401
|
+
3D array of total loss values reshaped to (len(mu), len(Lambda), len(c)).
|
|
1402
|
+
Only returned if return_loss=True.
|
|
1171
1403
|
|
|
1404
|
+
Notes
|
|
1405
|
+
-----
|
|
1406
|
+
When multiple objectives are provided, losses are normalized and weighted.
|
|
1407
|
+
The best parameters correspond to the minimum total weighted loss.
|
|
1408
|
+
"""
|
|
1409
|
+
# Ensure input is numpy array
|
|
1410
|
+
Nt = np.asarray(Nt)
|
|
1411
|
+
ND_obs = np.asarray(ND_obs)
|
|
1412
|
+
V = np.asarray(V)
|
|
1172
1413
|
|
|
1173
|
-
def _apply_gamma_gs(mu_values, lambda_values, Nt, ND_obs, D, dD, V, target, transformation, error_order):
|
|
1174
|
-
"""Routine for GammaPSD parameters grid search."""
|
|
1175
1414
|
# Define combinations of parameters for grid search
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1415
|
+
mu_grid, Lambda_grid, c_grid = np.meshgrid(
|
|
1416
|
+
mu,
|
|
1417
|
+
Lambda,
|
|
1418
|
+
c,
|
|
1419
|
+
indexing="xy",
|
|
1420
|
+
)
|
|
1421
|
+
mu_arr = mu_grid.ravel()
|
|
1422
|
+
Lambda_arr = Lambda_grid.ravel()
|
|
1423
|
+
c_arr = c_grid.ravel()
|
|
1179
1424
|
|
|
1180
1425
|
# Perform grid search
|
|
1181
1426
|
with suppress_warnings():
|
|
1182
|
-
# Compute
|
|
1183
|
-
|
|
1184
|
-
|
|
1427
|
+
# Compute N(D)
|
|
1428
|
+
ND_preds = GeneralizedGammaPSD.formula(
|
|
1429
|
+
D=D[None, :],
|
|
1430
|
+
Nt=Nt,
|
|
1431
|
+
Lambda=Lambda_arr[:, None],
|
|
1432
|
+
mu=mu_arr[:, None],
|
|
1433
|
+
c=c_arr[:, None],
|
|
1434
|
+
)
|
|
1185
1435
|
|
|
1186
|
-
# Compute
|
|
1187
|
-
|
|
1436
|
+
# Compute loss
|
|
1437
|
+
total_loss = compute_weighted_loss(
|
|
1188
1438
|
ND_obs=ND_obs,
|
|
1189
1439
|
ND_preds=ND_preds,
|
|
1190
1440
|
D=D,
|
|
1191
1441
|
dD=dD,
|
|
1192
1442
|
V=V,
|
|
1193
|
-
|
|
1194
|
-
transformation=transformation,
|
|
1195
|
-
error_order=error_order,
|
|
1443
|
+
objectives=objectives,
|
|
1196
1444
|
)
|
|
1197
1445
|
|
|
1198
|
-
#
|
|
1199
|
-
|
|
1446
|
+
# Define best parameters
|
|
1447
|
+
if not np.all(np.isnan(total_loss)):
|
|
1448
|
+
best_index = np.nanargmin(total_loss)
|
|
1449
|
+
mu_best = mu_arr[best_index].item()
|
|
1450
|
+
c_best = c_arr[best_index].item()
|
|
1451
|
+
Lambda_best = Lambda_arr[best_index].item()
|
|
1452
|
+
parameters = np.array([Nt, Lambda_best, mu_best, c_best])
|
|
1453
|
+
else:
|
|
1454
|
+
parameters = np.array([np.nan, np.nan, np.nan, np.nan])
|
|
1200
1455
|
|
|
1201
|
-
# If
|
|
1202
|
-
if
|
|
1203
|
-
|
|
1456
|
+
# If asked, return cost function
|
|
1457
|
+
if return_loss:
|
|
1458
|
+
total_loss = total_loss.reshape(mu_grid.shape)
|
|
1459
|
+
return parameters, total_loss
|
|
1204
1460
|
|
|
1205
|
-
|
|
1206
|
-
best_index = np.nanargmin(errors)
|
|
1207
|
-
return N0[best_index].item(), mu_arr[best_index].item(), lambda_arr[best_index].item()
|
|
1461
|
+
return parameters
|
|
1208
1462
|
|
|
1209
1463
|
|
|
1210
|
-
def
|
|
1464
|
+
def apply_lognormal_gs(
|
|
1211
1465
|
Nt,
|
|
1212
1466
|
ND_obs,
|
|
1213
1467
|
V,
|
|
1214
1468
|
# Coords
|
|
1215
1469
|
D,
|
|
1216
1470
|
dD,
|
|
1217
|
-
#
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1471
|
+
# PSD parameters
|
|
1472
|
+
mu,
|
|
1473
|
+
sigma,
|
|
1474
|
+
# Optimization options
|
|
1475
|
+
objectives,
|
|
1476
|
+
# Output options
|
|
1477
|
+
return_loss=False,
|
|
1221
1478
|
):
|
|
1222
|
-
"""Estimate
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1479
|
+
"""Estimate LognormalPSD model parameters using Grid Search.
|
|
1480
|
+
|
|
1481
|
+
This function performs a grid search optimization to find the best parameters
|
|
1482
|
+
(mu, sigma) for the LognormalPSD model by minimizing a weighted
|
|
1483
|
+
cost function across one or more objectives.
|
|
1484
|
+
|
|
1485
|
+
Parameters
|
|
1486
|
+
----------
|
|
1487
|
+
Nt : float
|
|
1488
|
+
Total number concentration.
|
|
1489
|
+
ND_obs : numpy.ndarray
|
|
1490
|
+
Observed PSD data [#/mm/m3].
|
|
1491
|
+
V : numpy.ndarray
|
|
1492
|
+
Fall velocity [m/s].
|
|
1493
|
+
D : numpy.ndarray
|
|
1494
|
+
Diameter bins [mm].
|
|
1495
|
+
dD : numpy.ndarray
|
|
1496
|
+
Diameter bin widths [mm].
|
|
1497
|
+
mu : int, float or numpy.ndarray
|
|
1498
|
+
mu parameter values to search.
|
|
1499
|
+
sigma : int, float or numpy.ndarray
|
|
1500
|
+
sigma parameter values to search.
|
|
1501
|
+
objectives: list of dict
|
|
1502
|
+
target : str, optional
|
|
1503
|
+
Target quantity to optimize. Valid options:
|
|
1504
|
+
|
|
1505
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
1506
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
1507
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
1508
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
1509
|
+
- ``"M<p>"`` : Moment of order p
|
|
1510
|
+
|
|
1511
|
+
transformation : str, optional
|
|
1512
|
+
Transformation applied to the target quantity before computing the loss.
|
|
1513
|
+
Valid options:
|
|
1514
|
+
|
|
1515
|
+
- ``"identity"`` : No transformation
|
|
1516
|
+
- ``"log"`` : Logarithmic transformation
|
|
1517
|
+
- ``"sqrt"`` : Square root transformation
|
|
1518
|
+
|
|
1519
|
+
censoring : str
|
|
1520
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
1521
|
+
treated as censored at the edges of the diameter range due to
|
|
1522
|
+
instrumental sensitivity limits:
|
|
1523
|
+
|
|
1524
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
1525
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
1526
|
+
the spectrum where the observed number concentration is zero are
|
|
1527
|
+
removed prior to cost-function evaluation.
|
|
1528
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
1529
|
+
the spectrum where the observed number concentration is zero are
|
|
1530
|
+
removed prior to cost-function evaluation.
|
|
1531
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
1532
|
+
range of diameter bins with non-zero observed concentrations is
|
|
1533
|
+
retained.
|
|
1534
|
+
|
|
1535
|
+
loss : int, optional
|
|
1536
|
+
Loss function.
|
|
1537
|
+
If target is ``"N(D)"``, valid options are:
|
|
1538
|
+
|
|
1539
|
+
- ``SSE``: Sum of Squared Errors
|
|
1540
|
+
- ``SAE``: Sum of Absolute Errors
|
|
1541
|
+
- ``MAE``: Mean Absolute Error
|
|
1542
|
+
- ``MSE``: Mean Squared Error
|
|
1543
|
+
- ``RMSE``: Root Mean Squared Error
|
|
1544
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
1545
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
1546
|
+
- ``WD``: Wasserstein Distance
|
|
1547
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
1548
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
1549
|
+
|
|
1550
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
1551
|
+
|
|
1552
|
+
- ``AE``: Absolute Error
|
|
1553
|
+
- ``SE``: Squared Error
|
|
1554
|
+
|
|
1555
|
+
loss_weight: int, optional
|
|
1556
|
+
Weight of this objective when multiple objectives are used.
|
|
1557
|
+
Must be specified if more than one objective is specified.
|
|
1558
|
+
return_loss : bool, optional
|
|
1559
|
+
If True, return both the loss surface and parameters.
|
|
1560
|
+
Default is False.
|
|
1265
1561
|
|
|
1266
|
-
|
|
1562
|
+
Returns
|
|
1563
|
+
-------
|
|
1564
|
+
parameters : numpy.ndarray
|
|
1565
|
+
Best parameters as [mu, sigma].
|
|
1566
|
+
An array of NaN values is returned if no valid solution is found.
|
|
1567
|
+
total_loss : numpy.ndarray, optional
|
|
1568
|
+
2D array of total loss values reshaped to (len(mu), len(sigma)).
|
|
1569
|
+
Only returned if return_loss=True.
|
|
1267
1570
|
|
|
1571
|
+
Notes
|
|
1572
|
+
-----
|
|
1573
|
+
When multiple objectives are provided, losses are normalized and weighted.
|
|
1574
|
+
The best parameters correspond to the minimum total weighted loss
|
|
1575
|
+
"""
|
|
1576
|
+
# Ensure input is numpy array
|
|
1577
|
+
Nt = np.asarray(Nt)
|
|
1578
|
+
ND_obs = np.asarray(ND_obs)
|
|
1579
|
+
V = np.asarray(V)
|
|
1268
1580
|
|
|
1269
|
-
def _apply_lognormal_gs(mu_values, sigma_values, Nt, ND_obs, D, dD, V, target, transformation, error_order):
|
|
1270
|
-
"""Routine for LognormalPSD parameters grid search."""
|
|
1271
1581
|
# Define combinations of parameters for grid search
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1582
|
+
mu_grid, sigma_grid = np.meshgrid(
|
|
1583
|
+
mu,
|
|
1584
|
+
sigma,
|
|
1585
|
+
indexing="xy",
|
|
1586
|
+
)
|
|
1587
|
+
mu_arr = mu_grid.ravel()
|
|
1588
|
+
sigma_arr = sigma_grid.ravel()
|
|
1275
1589
|
|
|
1276
1590
|
# Perform grid search
|
|
1277
1591
|
with suppress_warnings():
|
|
1278
|
-
# Compute
|
|
1592
|
+
# Compute N(D)
|
|
1279
1593
|
ND_preds = LognormalPSD.formula(D=D[None, :], Nt=Nt, mu=mu_arr[:, None], sigma=sigma_arr[:, None])
|
|
1280
1594
|
|
|
1281
|
-
# Compute
|
|
1282
|
-
|
|
1595
|
+
# Compute loss
|
|
1596
|
+
total_loss = compute_weighted_loss(
|
|
1283
1597
|
ND_obs=ND_obs,
|
|
1284
1598
|
ND_preds=ND_preds,
|
|
1285
1599
|
D=D,
|
|
1286
1600
|
dD=dD,
|
|
1287
1601
|
V=V,
|
|
1288
|
-
|
|
1289
|
-
transformation=transformation,
|
|
1290
|
-
error_order=error_order,
|
|
1602
|
+
objectives=objectives,
|
|
1291
1603
|
)
|
|
1292
1604
|
|
|
1293
|
-
#
|
|
1294
|
-
|
|
1605
|
+
# Define best parameters
|
|
1606
|
+
if not np.all(np.isnan(total_loss)):
|
|
1607
|
+
best_index = np.nanargmin(total_loss)
|
|
1608
|
+
mu_best = mu_arr[best_index].item()
|
|
1609
|
+
sigma_best = sigma_arr[best_index].item()
|
|
1610
|
+
parameters = np.array([Nt, mu_best, sigma_best])
|
|
1611
|
+
else:
|
|
1612
|
+
parameters = np.array([np.nan, np.nan, np.nan])
|
|
1295
1613
|
|
|
1296
|
-
# If
|
|
1297
|
-
if
|
|
1298
|
-
|
|
1614
|
+
# If asked, return cost function
|
|
1615
|
+
if return_loss:
|
|
1616
|
+
total_loss = total_loss.reshape(mu_grid.shape)
|
|
1617
|
+
return parameters, total_loss
|
|
1299
1618
|
|
|
1300
|
-
|
|
1301
|
-
best_index = np.nanargmin(errors)
|
|
1302
|
-
return Nt, mu_arr[best_index].item(), sigma_arr[best_index].item()
|
|
1619
|
+
return parameters
|
|
1303
1620
|
|
|
1304
1621
|
|
|
1305
|
-
def
|
|
1306
|
-
|
|
1622
|
+
def apply_normalized_gamma_gs(
|
|
1623
|
+
Nw,
|
|
1624
|
+
D50,
|
|
1307
1625
|
ND_obs,
|
|
1308
1626
|
V,
|
|
1309
1627
|
# Coords
|
|
1310
1628
|
D,
|
|
1311
1629
|
dD,
|
|
1312
|
-
#
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1630
|
+
# PSD parameters
|
|
1631
|
+
mu,
|
|
1632
|
+
# Optimization options
|
|
1633
|
+
objectives,
|
|
1634
|
+
# Output options
|
|
1635
|
+
return_loss=False,
|
|
1316
1636
|
):
|
|
1317
|
-
"""Estimate
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1637
|
+
"""Estimate NormalizedGammaPSD model parameters using Grid Search.
|
|
1638
|
+
|
|
1639
|
+
This function performs a grid search optimization to find the best parameter
|
|
1640
|
+
(mu) for the NormalizedGammaPSD model by minimizing a weighted
|
|
1641
|
+
cost function across one or more objectives.
|
|
1642
|
+
|
|
1643
|
+
Parameters
|
|
1644
|
+
----------
|
|
1645
|
+
Nw : float
|
|
1646
|
+
Normalized intercept parameter.
|
|
1647
|
+
D50 : float
|
|
1648
|
+
Median volume diameter parameter.
|
|
1649
|
+
ND_obs : numpy.ndarray
|
|
1650
|
+
Observed PSD data [#/mm/m3].
|
|
1651
|
+
V : numpy.ndarray
|
|
1652
|
+
Fall velocity [m/s].
|
|
1653
|
+
D : numpy.ndarray
|
|
1654
|
+
Diameter bins [mm].
|
|
1655
|
+
dD : numpy.ndarray
|
|
1656
|
+
Diameter bin widths [mm].
|
|
1657
|
+
mu : int, float or numpy.ndarray
|
|
1658
|
+
mu parameter values to search.
|
|
1659
|
+
objectives: list of dict
|
|
1660
|
+
target : str, optional
|
|
1661
|
+
Target quantity to optimize. Valid options:
|
|
1662
|
+
|
|
1663
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
1664
|
+
- ``"H(x)"`` : Normalized drop number concentration [-]
|
|
1665
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
1666
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
1667
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
1668
|
+
- ``"M<p>"`` : Moment of order p
|
|
1669
|
+
|
|
1670
|
+
transformation : str, optional
|
|
1671
|
+
Transformation applied to the target quantity before computing the loss.
|
|
1672
|
+
Valid options:
|
|
1673
|
+
|
|
1674
|
+
- ``"identity"`` : No transformation
|
|
1675
|
+
- ``"log"`` : Logarithmic transformation
|
|
1676
|
+
- ``"sqrt"`` : Square root transformation
|
|
1677
|
+
|
|
1678
|
+
censoring : str
|
|
1679
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
1680
|
+
treated as censored at the edges of the diameter range due to
|
|
1681
|
+
instrumental sensitivity limits:
|
|
1682
|
+
|
|
1683
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
1684
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
1685
|
+
the spectrum where the observed number concentration is zero are
|
|
1686
|
+
removed prior to cost-function evaluation.
|
|
1687
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
1688
|
+
the spectrum where the observed number concentration is zero are
|
|
1689
|
+
removed prior to cost-function evaluation.
|
|
1690
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
1691
|
+
range of diameter bins with non-zero observed concentrations is
|
|
1692
|
+
retained.
|
|
1693
|
+
|
|
1694
|
+
loss : int, optional
|
|
1695
|
+
Loss function.
|
|
1696
|
+
If target is ``"N(D)"`` or ``"H(x)"``, valid options are:
|
|
1697
|
+
|
|
1698
|
+
- ``SSE``: Sum of Squared Errors
|
|
1699
|
+
- ``SAE``: Sum of Absolute Errors
|
|
1700
|
+
- ``MAE``: Mean Absolute Error
|
|
1701
|
+
- ``MSE``: Mean Squared Error
|
|
1702
|
+
- ``RMSE``: Root Mean Squared Error
|
|
1703
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
1704
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
1705
|
+
- ``WD``: Wasserstein Distance
|
|
1706
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
1707
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
1708
|
+
|
|
1709
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
1710
|
+
|
|
1711
|
+
- ``AE``: Absolute Error
|
|
1712
|
+
- ``SE``: Squared Error
|
|
1713
|
+
|
|
1714
|
+
loss_weight: int, optional
|
|
1715
|
+
Weight of this objective when multiple objectives are used.
|
|
1716
|
+
Must be specified if more than one objective is specified.
|
|
1717
|
+
return_loss : bool, optional
|
|
1718
|
+
If True, return both the loss surface and parameters.
|
|
1719
|
+
Default is False.
|
|
1347
1720
|
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1721
|
+
Returns
|
|
1722
|
+
-------
|
|
1723
|
+
parameters : numpy.ndarray
|
|
1724
|
+
Best parameters as [Nw, mu, D50].
|
|
1725
|
+
An array of NaN values is returned if no valid solution is found.
|
|
1726
|
+
total_loss : numpy.ndarray, optional
|
|
1727
|
+
1D array of total loss values.
|
|
1728
|
+
Only returned if return_loss=True.
|
|
1729
|
+
|
|
1730
|
+
Notes
|
|
1731
|
+
-----
|
|
1732
|
+
When multiple objectives are provided, losses are normalized and weighted.
|
|
1733
|
+
The best parameters correspond to the minimum total weighted loss
|
|
1734
|
+
"""
|
|
1735
|
+
# Ensure input is numpy array
|
|
1736
|
+
Nw = np.asarray(Nw)
|
|
1737
|
+
D50 = np.asarray(D50)
|
|
1738
|
+
ND_obs = np.asarray(ND_obs)
|
|
1739
|
+
V = np.asarray(V)
|
|
1740
|
+
|
|
1741
|
+
# Convert mu to array if needed
|
|
1742
|
+
mu_arr = np.atleast_1d(mu) if not isinstance(mu, np.ndarray) else mu
|
|
1743
|
+
|
|
1744
|
+
# Perform grid search
|
|
1351
1745
|
with suppress_warnings():
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1746
|
+
# Compute N(D)
|
|
1747
|
+
ND_preds = NormalizedGammaPSD.formula(D=D[None, :], D50=D50, Nw=Nw, mu=mu_arr[:, None])
|
|
1748
|
+
|
|
1749
|
+
# Compute loss
|
|
1750
|
+
total_loss = compute_weighted_loss(
|
|
1751
|
+
ND_obs=ND_obs,
|
|
1752
|
+
ND_preds=ND_preds,
|
|
1753
|
+
D=D,
|
|
1754
|
+
dD=dD,
|
|
1755
|
+
V=V,
|
|
1756
|
+
objectives=objectives,
|
|
1757
|
+
Nc=Nw,
|
|
1758
|
+
)
|
|
1365
1759
|
|
|
1366
|
-
|
|
1760
|
+
# Define best parameters
|
|
1761
|
+
if not np.all(np.isnan(total_loss)):
|
|
1762
|
+
best_index = np.nanargmin(total_loss)
|
|
1763
|
+
mu_best = mu_arr[best_index].item()
|
|
1764
|
+
parameters = np.array([Nw, D50, mu_best])
|
|
1765
|
+
else:
|
|
1766
|
+
parameters = np.array([np.nan, np.nan, np.nan])
|
|
1367
1767
|
|
|
1768
|
+
# If asked, return cost function
|
|
1769
|
+
if return_loss:
|
|
1770
|
+
return parameters, total_loss
|
|
1368
1771
|
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1772
|
+
return parameters
|
|
1773
|
+
|
|
1774
|
+
|
|
1775
|
+
def apply_normalized_generalized_gamma_gs(
|
|
1776
|
+
Nc,
|
|
1777
|
+
Dc,
|
|
1372
1778
|
ND_obs,
|
|
1373
1779
|
V,
|
|
1374
1780
|
# Coords
|
|
1375
1781
|
D,
|
|
1376
1782
|
dD,
|
|
1377
|
-
#
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1783
|
+
# PSD parameters
|
|
1784
|
+
i,
|
|
1785
|
+
j,
|
|
1786
|
+
mu,
|
|
1787
|
+
c,
|
|
1788
|
+
# Optimization options
|
|
1789
|
+
objectives,
|
|
1790
|
+
# Output options
|
|
1791
|
+
return_loss=False,
|
|
1381
1792
|
):
|
|
1382
|
-
"""Estimate
|
|
1383
|
-
|
|
1384
|
-
|
|
1793
|
+
"""Estimate NormalizedGeneralizedGammaPSD model parameters using Grid Search.
|
|
1794
|
+
|
|
1795
|
+
This function performs a grid search optimization to find the best parameters
|
|
1796
|
+
(mu, c) for the NormalizedGeneralizedGammaPSD model by minimizing a weighted
|
|
1797
|
+
cost function across one or more objectives.
|
|
1798
|
+
|
|
1799
|
+
Parameters
|
|
1800
|
+
----------
|
|
1801
|
+
Nc : float
|
|
1802
|
+
Normalized intercept parameter.
|
|
1803
|
+
Dc : float
|
|
1804
|
+
Normalized characteristic diameter parameter.
|
|
1805
|
+
ND_obs : numpy.ndarray
|
|
1806
|
+
Observed PSD data [#/mm/m3].
|
|
1807
|
+
V : numpy.ndarray
|
|
1808
|
+
Fall velocity [m/s].
|
|
1809
|
+
D : numpy.ndarray
|
|
1810
|
+
Diameter bins [mm].
|
|
1811
|
+
dD : numpy.ndarray
|
|
1812
|
+
Diameter bin widths [mm].
|
|
1813
|
+
i : int
|
|
1814
|
+
Moment order i of the NormalizedGeneralizedGammaPSD.
|
|
1815
|
+
j : int
|
|
1816
|
+
Moment order j of the NormalizedGeneralizedGammaPSD.
|
|
1817
|
+
mu : int, float or numpy.ndarray
|
|
1818
|
+
mu parameter values to search.
|
|
1819
|
+
c : int, float or numpy.ndarray
|
|
1820
|
+
c parameter values to search.
|
|
1821
|
+
objectives: list of dict
|
|
1822
|
+
target : str, optional
|
|
1823
|
+
Target quantity to optimize. Valid options:
|
|
1824
|
+
|
|
1825
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
1826
|
+
- ``"H(x)"`` : Normalized drop number concentration [-]
|
|
1827
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
1828
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
1829
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
1830
|
+
- ``"M<p>"`` : Moment of order p
|
|
1831
|
+
|
|
1832
|
+
transformation : str, optional
|
|
1833
|
+
Transformation applied to the target quantity before computing the loss.
|
|
1834
|
+
Valid options:
|
|
1835
|
+
|
|
1836
|
+
- ``"identity"`` : No transformation
|
|
1837
|
+
- ``"log"`` : Logarithmic transformation
|
|
1838
|
+
- ``"sqrt"`` : Square root transformation
|
|
1839
|
+
|
|
1840
|
+
censoring : str
|
|
1841
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
1842
|
+
treated as censored at the edges of the diameter range due to
|
|
1843
|
+
instrumental sensitivity limits:
|
|
1844
|
+
|
|
1845
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
1846
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
1847
|
+
the spectrum where the observed number concentration is zero are
|
|
1848
|
+
removed prior to cost-function evaluation.
|
|
1849
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
1850
|
+
the spectrum where the observed number concentration is zero are
|
|
1851
|
+
removed prior to cost-function evaluation.
|
|
1852
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
1853
|
+
range of diameter bins with non-zero observed concentrations is
|
|
1854
|
+
retained.
|
|
1855
|
+
|
|
1856
|
+
loss : int, optional
|
|
1857
|
+
Loss function.
|
|
1858
|
+
If target is ``"N(D)"`` or ``"H(x)"``, valid options are:
|
|
1859
|
+
|
|
1860
|
+
- ``SSE``: Sum of Squared Errors
|
|
1861
|
+
- ``SAE``: Sum of Absolute Errors
|
|
1862
|
+
- ``MAE``: Mean Absolute Error
|
|
1863
|
+
- ``MSE``: Mean Squared Error
|
|
1864
|
+
- ``RMSE``: Root Mean Squared Error
|
|
1865
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
1866
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
1867
|
+
- ``WD``: Wasserstein Distance
|
|
1868
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
1869
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
1870
|
+
|
|
1871
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
1872
|
+
|
|
1873
|
+
- ``AE``: Absolute Error
|
|
1874
|
+
- ``SE``: Squared Error
|
|
1875
|
+
|
|
1876
|
+
loss_weight: int, optional
|
|
1877
|
+
Weight of this objective when multiple objectives are used.
|
|
1878
|
+
Must be specified if more than one objective is specified.
|
|
1879
|
+
return_loss : bool, optional
|
|
1880
|
+
If True, return both the loss surface and parameters.
|
|
1881
|
+
Default is False.
|
|
1882
|
+
|
|
1883
|
+
Returns
|
|
1884
|
+
-------
|
|
1885
|
+
parameters : numpy.ndarray
|
|
1886
|
+
Best parameters as [Nc, Dc, mu, c].
|
|
1887
|
+
An array of NaN values is returned if no valid solution is found.
|
|
1888
|
+
total_loss : numpy.ndarray, optional
|
|
1889
|
+
2D array of total loss values reshaped to (len(mu), len(c)).
|
|
1890
|
+
Only returned if return_loss=True.
|
|
1891
|
+
|
|
1892
|
+
Notes
|
|
1893
|
+
-----
|
|
1894
|
+
When multiple objectives are provided, losses are normalized and weighted.
|
|
1895
|
+
The best parameters correspond to the minimum total weighted loss.
|
|
1896
|
+
"""
|
|
1897
|
+
# Thurai 2018: mu [-3, 1], c [0-6]
|
|
1898
|
+
|
|
1899
|
+
# Ensure input is numpy array
|
|
1900
|
+
Nc = np.asarray(Nc)
|
|
1901
|
+
Dc = np.asarray(Dc)
|
|
1902
|
+
ND_obs = np.asarray(ND_obs)
|
|
1903
|
+
V = np.asarray(V)
|
|
1904
|
+
|
|
1905
|
+
# Define combinations of parameters for grid search
|
|
1906
|
+
mu_grid, c_grid = np.meshgrid(
|
|
1907
|
+
mu,
|
|
1908
|
+
c,
|
|
1909
|
+
indexing="xy",
|
|
1910
|
+
)
|
|
1911
|
+
mu_arr = mu_grid.ravel()
|
|
1912
|
+
c_arr = c_grid.ravel()
|
|
1385
1913
|
|
|
1386
1914
|
# Perform grid search
|
|
1387
1915
|
with suppress_warnings():
|
|
1388
|
-
# Compute ND
|
|
1389
|
-
ND_preds = NormalizedGammaPSD.formula(D=D[None, :], D50=D50, Nw=Nw, mu=mu_arr[:, None])
|
|
1390
1916
|
|
|
1391
|
-
# Compute
|
|
1392
|
-
|
|
1917
|
+
# Compute N(D)
|
|
1918
|
+
ND_preds = NormalizedGeneralizedGammaPSD.formula(
|
|
1919
|
+
D=D[None, :],
|
|
1920
|
+
i=i,
|
|
1921
|
+
j=j,
|
|
1922
|
+
Nc=Nc,
|
|
1923
|
+
Dc=Dc,
|
|
1924
|
+
mu=mu_arr[:, None],
|
|
1925
|
+
c=c_arr[:, None],
|
|
1926
|
+
)
|
|
1927
|
+
|
|
1928
|
+
# Compute loss
|
|
1929
|
+
total_loss = compute_weighted_loss(
|
|
1393
1930
|
ND_obs=ND_obs,
|
|
1394
1931
|
ND_preds=ND_preds,
|
|
1395
1932
|
D=D,
|
|
1396
1933
|
dD=dD,
|
|
1397
1934
|
V=V,
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
error_order=error_order,
|
|
1935
|
+
objectives=objectives,
|
|
1936
|
+
Nc=Nc,
|
|
1401
1937
|
)
|
|
1402
1938
|
|
|
1403
|
-
#
|
|
1404
|
-
|
|
1939
|
+
# Define best parameters
|
|
1940
|
+
if not np.all(np.isnan(total_loss)):
|
|
1941
|
+
best_index = np.nanargmin(total_loss)
|
|
1942
|
+
mu, c = mu_arr[best_index].item(), c_arr[best_index].item()
|
|
1943
|
+
parameters = np.array([Nc, Dc, mu, c])
|
|
1944
|
+
else:
|
|
1945
|
+
parameters = np.array([np.nan, np.nan, np.nan, np.nan])
|
|
1946
|
+
|
|
1947
|
+
# If asked, return cost function
|
|
1948
|
+
if return_loss:
|
|
1949
|
+
total_loss = total_loss.reshape(mu_grid.shape)
|
|
1950
|
+
return parameters, total_loss
|
|
1951
|
+
return parameters
|
|
1952
|
+
|
|
1953
|
+
|
|
1954
|
+
def get_exponential_parameters_gs(
|
|
1955
|
+
ds,
|
|
1956
|
+
Lambda=None,
|
|
1957
|
+
objectives=None,
|
|
1958
|
+
return_loss=False,
|
|
1959
|
+
):
|
|
1960
|
+
"""Estimate Exponential PSD parameters using Grid Search optimization.
|
|
1405
1961
|
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1962
|
+
The parameter ``N_t`` is computed empirically from the observed DSD,
|
|
1963
|
+
while the shape parameter ``Lambda`` is estimated through
|
|
1964
|
+
grid search by minimizing the error between observed and modeled quantities.
|
|
1409
1965
|
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1966
|
+
Parameters
|
|
1967
|
+
----------
|
|
1968
|
+
ds : xarray.Dataset
|
|
1969
|
+
Input dataset containing PSD observations. Must include:
|
|
1970
|
+
|
|
1971
|
+
- ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
1972
|
+
- ``diameter_bin_center`` : Diameter bin centers [mm]
|
|
1973
|
+
- ``diameter_bin_width`` : Diameter bin widths [mm]
|
|
1974
|
+
- ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
|
|
1975
|
+
|
|
1976
|
+
Lambda : int, float or numpy.ndarray
|
|
1977
|
+
Lambda parameter values to search.
|
|
1978
|
+
objectives: list of dict
|
|
1979
|
+
target : str, optional
|
|
1980
|
+
Target quantity to optimize. Valid options:
|
|
1981
|
+
|
|
1982
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
1983
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
1984
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
1985
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
1986
|
+
- ``"M<p>"`` : Moment of order p
|
|
1987
|
+
|
|
1988
|
+
transformation : str, optional
|
|
1989
|
+
Transformation applied to the target quantity before computing the loss.
|
|
1990
|
+
Valid options:
|
|
1991
|
+
|
|
1992
|
+
- ``"identity"`` : No transformation
|
|
1993
|
+
- ``"log"`` : Logarithmic transformation
|
|
1994
|
+
- ``"sqrt"`` : Square root transformation
|
|
1995
|
+
|
|
1996
|
+
censoring : str
|
|
1997
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
1998
|
+
treated as censored at the edges of the diameter range due to
|
|
1999
|
+
instrumental sensitivity limits:
|
|
2000
|
+
|
|
2001
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
2002
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
2003
|
+
the spectrum where the observed number concentration is zero are
|
|
2004
|
+
removed prior to cost-function evaluation.
|
|
2005
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
2006
|
+
the spectrum where the observed number concentration is zero are
|
|
2007
|
+
removed prior to cost-function evaluation.
|
|
2008
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
2009
|
+
range of diameter bins with non-zero observed concentrations is
|
|
2010
|
+
retained.
|
|
2011
|
+
|
|
2012
|
+
loss : int, optional
|
|
2013
|
+
Loss function.
|
|
2014
|
+
If target is ``"N(D)"``, valid options are:
|
|
2015
|
+
|
|
2016
|
+
- ``SSE``: Sum of Squared Errors
|
|
2017
|
+
- ``SAE``: Sum of Absolute Errors
|
|
2018
|
+
- ``MAE``: Mean Absolute Error
|
|
2019
|
+
- ``MSE``: Mean Squared Error
|
|
2020
|
+
- ``RMSE``: Root Mean Squared Error
|
|
2021
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
2022
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
2023
|
+
- ``WD``: Wasserstein Distance
|
|
2024
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
2025
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
2026
|
+
|
|
2027
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
2028
|
+
|
|
2029
|
+
- ``AE``: Absolute Error
|
|
2030
|
+
- ``SE``: Squared Error
|
|
2031
|
+
|
|
2032
|
+
loss_weight: int, optional
|
|
2033
|
+
Weight of this objective when multiple objectives are used.
|
|
2034
|
+
Must be specified if more than one objective is specified.
|
|
2035
|
+
return_loss : bool, optional
|
|
2036
|
+
If True, return both the loss surface and parameters.
|
|
2037
|
+
Default is False.
|
|
1414
2038
|
|
|
2039
|
+
Returns
|
|
2040
|
+
-------
|
|
2041
|
+
ds_params : xarray.Dataset
|
|
2042
|
+
Dataset containing the estimated Exponential distribution parameters.
|
|
2043
|
+
"""
|
|
2044
|
+
# Use default objectives if not specified
|
|
2045
|
+
if objectives is None:
|
|
2046
|
+
objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
|
|
1415
2047
|
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
# "target": ["ND", "LWC", "Z", "R"]
|
|
1419
|
-
# "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
|
|
1420
|
-
# "error_order": 1, # MAE/MSE ... only for drop_number_concentration
|
|
2048
|
+
# Check objectives
|
|
2049
|
+
objectives = check_objectives(objectives=objectives)
|
|
1421
2050
|
|
|
1422
2051
|
# Compute required variables
|
|
1423
|
-
|
|
2052
|
+
Nt = get_total_number_concentration(
|
|
1424
2053
|
drop_number_concentration=ds["drop_number_concentration"],
|
|
1425
2054
|
diameter_bin_width=ds["diameter_bin_width"],
|
|
1426
2055
|
)
|
|
1427
2056
|
|
|
2057
|
+
# Define search space
|
|
2058
|
+
if Lambda is None:
|
|
2059
|
+
Lambda = np.arange(0.01, 10, step=0.01)
|
|
2060
|
+
|
|
1428
2061
|
# Define kwargs
|
|
1429
2062
|
kwargs = {
|
|
1430
|
-
"D": ds["diameter_bin_center"].
|
|
1431
|
-
"dD": ds["diameter_bin_width"].
|
|
1432
|
-
"
|
|
1433
|
-
"
|
|
1434
|
-
"
|
|
2063
|
+
"D": ds["diameter_bin_center"].to_numpy(),
|
|
2064
|
+
"dD": ds["diameter_bin_width"].to_numpy(),
|
|
2065
|
+
"objectives": objectives,
|
|
2066
|
+
"return_loss": return_loss,
|
|
2067
|
+
"Lambda": Lambda,
|
|
1435
2068
|
}
|
|
1436
2069
|
|
|
1437
|
-
#
|
|
1438
|
-
|
|
2070
|
+
# Define function to create parameters dataset
|
|
2071
|
+
def _create_parameters_dataset(da_parameters):
|
|
2072
|
+
# Add parameters coordinates
|
|
2073
|
+
da_parameters = da_parameters.assign_coords({"parameters": ["N0", "Lambda"]})
|
|
2074
|
+
|
|
2075
|
+
# Create parameters dataset
|
|
2076
|
+
ds_parameters = da_parameters.to_dataset(dim="parameters")
|
|
2077
|
+
|
|
2078
|
+
# Add DSD model name to the attribute
|
|
2079
|
+
ds_parameters.attrs["disdrodb_psd_model"] = "ExponentialPSD"
|
|
2080
|
+
return ds_parameters
|
|
2081
|
+
|
|
2082
|
+
# Return cost function if asked
|
|
2083
|
+
if return_loss:
|
|
2084
|
+
da_parameters, da_cost_function = xr.apply_ufunc(
|
|
2085
|
+
apply_exponential_gs,
|
|
2086
|
+
# Variables varying over time
|
|
2087
|
+
Nt,
|
|
2088
|
+
ds["drop_number_concentration"],
|
|
2089
|
+
ds["fall_velocity"],
|
|
2090
|
+
# Other options
|
|
2091
|
+
kwargs=kwargs,
|
|
2092
|
+
# Settings
|
|
2093
|
+
input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
|
|
2094
|
+
output_core_dims=[["parameters"], ["Lambda_values"]],
|
|
2095
|
+
vectorize=True,
|
|
2096
|
+
dask="parallelized",
|
|
2097
|
+
# Lengths of the new output_core_dims dimensions.
|
|
2098
|
+
dask_gufunc_kwargs={"output_sizes": {"Lambda_values": len(Lambda), "parameters": 2}},
|
|
2099
|
+
output_dtypes=["float64", "float64"],
|
|
2100
|
+
)
|
|
2101
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2102
|
+
ds_parameters["cost_function"] = da_cost_function
|
|
2103
|
+
ds_parameters = ds_parameters.assign_coords({"Lambda_values": Lambda})
|
|
2104
|
+
return ds_parameters
|
|
2105
|
+
|
|
2106
|
+
# Otherwise return just best parameters
|
|
2107
|
+
da_parameters = xr.apply_ufunc(
|
|
1439
2108
|
apply_exponential_gs,
|
|
1440
2109
|
# Variables varying over time
|
|
1441
|
-
|
|
2110
|
+
Nt,
|
|
1442
2111
|
ds["drop_number_concentration"],
|
|
1443
2112
|
ds["fall_velocity"],
|
|
1444
2113
|
# Other options
|
|
@@ -1451,44 +2120,180 @@ def get_exponential_parameters_gs(ds, target="ND", transformation="log", error_o
|
|
|
1451
2120
|
dask_gufunc_kwargs={"output_sizes": {"parameters": 2}}, # lengths of the new output_core_dims dimensions.
|
|
1452
2121
|
output_dtypes=["float64"],
|
|
1453
2122
|
)
|
|
2123
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2124
|
+
return ds_parameters
|
|
1454
2125
|
|
|
1455
|
-
# Add parameters coordinates
|
|
1456
|
-
da_params = da_params.assign_coords({"parameters": ["N0", "Lambda"]})
|
|
1457
2126
|
|
|
1458
|
-
|
|
1459
|
-
|
|
2127
|
+
def get_gamma_parameters_gs(
|
|
2128
|
+
ds,
|
|
2129
|
+
mu=None,
|
|
2130
|
+
Lambda=None,
|
|
2131
|
+
objectives=None,
|
|
2132
|
+
return_loss=False,
|
|
2133
|
+
):
|
|
2134
|
+
"""Estimate Gamma PSD parameters using Grid Search optimization.
|
|
1460
2135
|
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
2136
|
+
The parameter ``N_t`` is computed empirically from the observed DSD,
|
|
2137
|
+
while the shape parameters ``mu`` and ``Lambda`` are estimated through
|
|
2138
|
+
grid search by minimizing the error between observed and modeled quantities.
|
|
2139
|
+
|
|
2140
|
+
Parameters
|
|
2141
|
+
----------
|
|
2142
|
+
ds : xarray.Dataset
|
|
2143
|
+
Input dataset containing PSD observations. Must include:
|
|
2144
|
+
|
|
2145
|
+
- ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2146
|
+
- ``diameter_bin_center`` : Diameter bin centers [mm]
|
|
2147
|
+
- ``diameter_bin_width`` : Diameter bin widths [mm]
|
|
2148
|
+
- ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
|
|
2149
|
+
|
|
2150
|
+
mu : int, float or numpy.ndarray
|
|
2151
|
+
mu parameter values to search.
|
|
2152
|
+
Lambda : int, float or numpy.ndarray
|
|
2153
|
+
Lambda parameter values to search.
|
|
2154
|
+
objectives: list of dict
|
|
2155
|
+
target : str, optional
|
|
2156
|
+
Target quantity to optimize. Valid options:
|
|
2157
|
+
|
|
2158
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2159
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
2160
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
2161
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
2162
|
+
- ``"M<p>"`` : Moment of order p
|
|
2163
|
+
|
|
2164
|
+
transformation : str, optional
|
|
2165
|
+
Transformation applied to the target quantity before computing the loss.
|
|
2166
|
+
Valid options:
|
|
2167
|
+
|
|
2168
|
+
- ``"identity"`` : No transformation
|
|
2169
|
+
- ``"log"`` : Logarithmic transformation
|
|
2170
|
+
- ``"sqrt"`` : Square root transformation
|
|
2171
|
+
|
|
2172
|
+
censoring : str
|
|
2173
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
2174
|
+
treated as censored at the edges of the diameter range due to
|
|
2175
|
+
instrumental sensitivity limits:
|
|
2176
|
+
|
|
2177
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
2178
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
2179
|
+
the spectrum where the observed number concentration is zero are
|
|
2180
|
+
removed prior to cost-function evaluation.
|
|
2181
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
2182
|
+
the spectrum where the observed number concentration is zero are
|
|
2183
|
+
removed prior to cost-function evaluation.
|
|
2184
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
2185
|
+
range of diameter bins with non-zero observed concentrations is
|
|
2186
|
+
retained.
|
|
2187
|
+
|
|
2188
|
+
loss : int, optional
|
|
2189
|
+
Loss function.
|
|
2190
|
+
If target is ``"N(D)"``, valid options are:
|
|
2191
|
+
|
|
2192
|
+
- ``SSE``: Sum of Squared Errors
|
|
2193
|
+
- ``SAE``: Sum of Absolute Errors
|
|
2194
|
+
- ``MAE``: Mean Absolute Error
|
|
2195
|
+
- ``MSE``: Mean Squared Error
|
|
2196
|
+
- ``RMSE``: Root Mean Squared Error
|
|
2197
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
2198
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
2199
|
+
- ``WD``: Wasserstein Distance
|
|
2200
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
2201
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
2202
|
+
|
|
2203
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
2204
|
+
|
|
2205
|
+
- ``AE``: Absolute Error
|
|
2206
|
+
- ``SE``: Squared Error
|
|
2207
|
+
|
|
2208
|
+
loss_weight: int, optional
|
|
2209
|
+
Weight of this objective when multiple objectives are used.
|
|
2210
|
+
Must be specified if more than one objective is specified.
|
|
2211
|
+
return_loss : bool, optional
|
|
2212
|
+
If True, return both the loss surface and parameters.
|
|
2213
|
+
Default is False.
|
|
1464
2214
|
|
|
2215
|
+
Returns
|
|
2216
|
+
-------
|
|
2217
|
+
ds_params : xarray.Dataset
|
|
2218
|
+
Dataset containing the estimated Gamma distribution parameters.
|
|
2219
|
+
"""
|
|
2220
|
+
# Use default objectives if not specified
|
|
2221
|
+
if objectives is None:
|
|
2222
|
+
objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
|
|
1465
2223
|
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
# "target": ["ND", "LWC", "Z", "R"]
|
|
1469
|
-
# "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
|
|
1470
|
-
# "error_order": 1, # MAE/MSE ... only for drop_number_concentration
|
|
2224
|
+
# Check objectives
|
|
2225
|
+
objectives = check_objectives(objectives=objectives)
|
|
1471
2226
|
|
|
1472
2227
|
# Compute required variables
|
|
1473
|
-
|
|
2228
|
+
Nt = get_total_number_concentration(
|
|
1474
2229
|
drop_number_concentration=ds["drop_number_concentration"],
|
|
1475
2230
|
diameter_bin_width=ds["diameter_bin_width"],
|
|
1476
2231
|
)
|
|
1477
2232
|
|
|
2233
|
+
# Define search space
|
|
2234
|
+
if mu is None:
|
|
2235
|
+
mu = np.arange(0, 15, step=0.1)
|
|
2236
|
+
if Lambda is None:
|
|
2237
|
+
Lambda = np.arange(0, 30, step=0.1)
|
|
2238
|
+
|
|
1478
2239
|
# Define kwargs
|
|
1479
2240
|
kwargs = {
|
|
1480
|
-
"D": ds["diameter_bin_center"].
|
|
1481
|
-
"dD": ds["diameter_bin_width"].
|
|
1482
|
-
"
|
|
1483
|
-
"
|
|
1484
|
-
"
|
|
2241
|
+
"D": ds["diameter_bin_center"].to_numpy(),
|
|
2242
|
+
"dD": ds["diameter_bin_width"].to_numpy(),
|
|
2243
|
+
"objectives": objectives,
|
|
2244
|
+
"return_loss": return_loss,
|
|
2245
|
+
"mu": mu,
|
|
2246
|
+
"Lambda": Lambda,
|
|
1485
2247
|
}
|
|
1486
2248
|
|
|
1487
|
-
#
|
|
1488
|
-
|
|
2249
|
+
# Define function to create parameters dataset
|
|
2250
|
+
def _create_parameters_dataset(da_parameters):
|
|
2251
|
+
# Add parameters coordinates
|
|
2252
|
+
da_parameters = da_parameters.assign_coords({"parameters": ["N0", "Lambda", "mu"]})
|
|
2253
|
+
|
|
2254
|
+
# Create parameters dataset
|
|
2255
|
+
ds_parameters = da_parameters.to_dataset(dim="parameters")
|
|
2256
|
+
|
|
2257
|
+
# Add DSD model name to the attribute
|
|
2258
|
+
ds_parameters.attrs["disdrodb_psd_model"] = "GammaPSD"
|
|
2259
|
+
return ds_parameters
|
|
2260
|
+
|
|
2261
|
+
# Return cost function if asked
|
|
2262
|
+
if return_loss:
|
|
2263
|
+
# Define lengths of the new output_core_dims dimensions.
|
|
2264
|
+
output_dict_size = {
|
|
2265
|
+
"mu_values": len(mu),
|
|
2266
|
+
"Lambda_values": len(Lambda),
|
|
2267
|
+
"parameters": 3,
|
|
2268
|
+
}
|
|
2269
|
+
# Compute cost function and parameters
|
|
2270
|
+
da_parameters, da_cost_function = xr.apply_ufunc(
|
|
2271
|
+
apply_gamma_gs,
|
|
2272
|
+
# Variables varying over time
|
|
2273
|
+
Nt,
|
|
2274
|
+
ds["drop_number_concentration"],
|
|
2275
|
+
ds["fall_velocity"],
|
|
2276
|
+
# Other options
|
|
2277
|
+
kwargs=kwargs,
|
|
2278
|
+
# Settings
|
|
2279
|
+
input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
|
|
2280
|
+
output_core_dims=[["parameters"], ["Lambda_values", "mu_values"]],
|
|
2281
|
+
vectorize=True,
|
|
2282
|
+
dask="parallelized",
|
|
2283
|
+
# Lengths of the new output_core_dims dimensions.
|
|
2284
|
+
dask_gufunc_kwargs={"output_sizes": output_dict_size},
|
|
2285
|
+
output_dtypes=["float64", "float64"],
|
|
2286
|
+
)
|
|
2287
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2288
|
+
ds_parameters["cost_function"] = da_cost_function
|
|
2289
|
+
ds_parameters = ds_parameters.assign_coords({"mu_values": mu, "Lambda_values": Lambda})
|
|
2290
|
+
return ds_parameters
|
|
2291
|
+
|
|
2292
|
+
# Otherwise return just best parameters
|
|
2293
|
+
da_parameters = xr.apply_ufunc(
|
|
1489
2294
|
apply_gamma_gs,
|
|
1490
2295
|
# Variables varying over time
|
|
1491
|
-
|
|
2296
|
+
Nt,
|
|
1492
2297
|
ds["drop_number_concentration"],
|
|
1493
2298
|
ds["fall_velocity"],
|
|
1494
2299
|
# Other options
|
|
@@ -1501,44 +2306,186 @@ def get_gamma_parameters_gs(ds, target="ND", transformation="log", error_order=1
|
|
|
1501
2306
|
dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
|
|
1502
2307
|
output_dtypes=["float64"],
|
|
1503
2308
|
)
|
|
2309
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2310
|
+
return ds_parameters
|
|
1504
2311
|
|
|
1505
|
-
# Add parameters coordinates
|
|
1506
|
-
da_params = da_params.assign_coords({"parameters": ["N0", "mu", "Lambda"]})
|
|
1507
|
-
|
|
1508
|
-
# Create parameters dataset
|
|
1509
|
-
ds_params = da_params.to_dataset(dim="parameters")
|
|
1510
2312
|
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
2313
|
+
def get_generalized_gamma_parameters_gs(
|
|
2314
|
+
ds,
|
|
2315
|
+
mu=None,
|
|
2316
|
+
c=None,
|
|
2317
|
+
Lambda=None,
|
|
2318
|
+
objectives=None,
|
|
2319
|
+
return_loss=False,
|
|
2320
|
+
):
|
|
2321
|
+
"""Estimate Generalized Gamma PSD parameters using Grid Search optimization.
|
|
1514
2322
|
|
|
2323
|
+
The parameter ``N_t`` is computed empirically from the observed DSD,
|
|
2324
|
+
while the shape parameters ``mu``, ``c``, and ``Lambda`` are estimated through
|
|
2325
|
+
grid search by minimizing the error between observed and modeled quantities.
|
|
1515
2326
|
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
2327
|
+
Parameters
|
|
2328
|
+
----------
|
|
2329
|
+
ds : xarray.Dataset
|
|
2330
|
+
Input dataset containing PSD observations. Must include:
|
|
2331
|
+
|
|
2332
|
+
- ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2333
|
+
- ``diameter_bin_center`` : Diameter bin centers [mm]
|
|
2334
|
+
- ``diameter_bin_width`` : Diameter bin widths [mm]
|
|
2335
|
+
- ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
|
|
2336
|
+
|
|
2337
|
+
mu : int, float or numpy.ndarray
|
|
2338
|
+
mu parameter values to search.
|
|
2339
|
+
c : int, float or numpy.ndarray
|
|
2340
|
+
c parameter values to search.
|
|
2341
|
+
Lambda : int, float or numpy.ndarray
|
|
2342
|
+
Lambda parameter values to search.
|
|
2343
|
+
objectives: list of dict
|
|
2344
|
+
target : str, optional
|
|
2345
|
+
Target quantity to optimize. Valid options:
|
|
2346
|
+
|
|
2347
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2348
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
2349
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
2350
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
2351
|
+
- ``"M<p>"`` : Moment of order p
|
|
2352
|
+
|
|
2353
|
+
transformation : str, optional
|
|
2354
|
+
Transformation applied to the target quantity before computing the loss.
|
|
2355
|
+
Valid options:
|
|
2356
|
+
|
|
2357
|
+
- ``"identity"`` : No transformation
|
|
2358
|
+
- ``"log"`` : Logarithmic transformation
|
|
2359
|
+
- ``"sqrt"`` : Square root transformation
|
|
2360
|
+
|
|
2361
|
+
censoring : str
|
|
2362
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
2363
|
+
treated as censored at the edges of the diameter range due to
|
|
2364
|
+
instrumental sensitivity limits:
|
|
2365
|
+
|
|
2366
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
2367
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
2368
|
+
the spectrum where the observed number concentration is zero are
|
|
2369
|
+
removed prior to cost-function evaluation.
|
|
2370
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
2371
|
+
the spectrum where the observed number concentration is zero are
|
|
2372
|
+
removed prior to cost-function evaluation.
|
|
2373
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
2374
|
+
range of diameter bins with non-zero observed concentrations is
|
|
2375
|
+
retained.
|
|
2376
|
+
|
|
2377
|
+
loss : int, optional
|
|
2378
|
+
Loss function.
|
|
2379
|
+
If target is ``"N(D)"``, valid options are:
|
|
2380
|
+
|
|
2381
|
+
- ``SSE``: Sum of Squared Errors
|
|
2382
|
+
- ``SAE``: Sum of Absolute Errors
|
|
2383
|
+
- ``MAE``: Mean Absolute Error
|
|
2384
|
+
- ``MSE``: Mean Squared Error
|
|
2385
|
+
- ``RMSE``: Root Mean Squared Error
|
|
2386
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
2387
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
2388
|
+
- ``WD``: Wasserstein Distance
|
|
2389
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
2390
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
2391
|
+
|
|
2392
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
2393
|
+
|
|
2394
|
+
- ``AE``: Absolute Error
|
|
2395
|
+
- ``SE``: Squared Error
|
|
2396
|
+
|
|
2397
|
+
loss_weight: int, optional
|
|
2398
|
+
Weight of this objective when multiple objectives are used.
|
|
2399
|
+
Must be specified if more than one objective is specified.
|
|
2400
|
+
return_loss : bool, optional
|
|
2401
|
+
If True, return both the loss surface and parameters.
|
|
2402
|
+
Default is False.
|
|
2403
|
+
|
|
2404
|
+
Returns
|
|
2405
|
+
-------
|
|
2406
|
+
ds_params : xarray.Dataset
|
|
2407
|
+
Dataset containing the estimated Generalized Gamma distribution parameters.
|
|
2408
|
+
"""
|
|
2409
|
+
# Use default objectives if not specified
|
|
2410
|
+
if objectives is None:
|
|
2411
|
+
objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
|
|
2412
|
+
|
|
2413
|
+
# Check objectives
|
|
2414
|
+
objectives = check_objectives(objectives=objectives)
|
|
1521
2415
|
|
|
1522
2416
|
# Compute required variables
|
|
1523
|
-
|
|
2417
|
+
Nt = get_total_number_concentration(
|
|
1524
2418
|
drop_number_concentration=ds["drop_number_concentration"],
|
|
1525
2419
|
diameter_bin_width=ds["diameter_bin_width"],
|
|
1526
2420
|
)
|
|
1527
2421
|
|
|
2422
|
+
# Define search space
|
|
2423
|
+
if mu is None:
|
|
2424
|
+
mu = np.arange(0, 10, step=0.2)
|
|
2425
|
+
if c is None:
|
|
2426
|
+
c = np.arange(0, 10, step=0.2)
|
|
2427
|
+
if Lambda is None:
|
|
2428
|
+
Lambda = np.arange(0, 20, step=0.2)
|
|
2429
|
+
|
|
1528
2430
|
# Define kwargs
|
|
1529
2431
|
kwargs = {
|
|
1530
|
-
"D": ds["diameter_bin_center"].
|
|
1531
|
-
"dD": ds["diameter_bin_width"].
|
|
1532
|
-
"
|
|
1533
|
-
"
|
|
1534
|
-
"
|
|
2432
|
+
"D": ds["diameter_bin_center"].to_numpy(),
|
|
2433
|
+
"dD": ds["diameter_bin_width"].to_numpy(),
|
|
2434
|
+
"objectives": objectives,
|
|
2435
|
+
"return_loss": return_loss,
|
|
2436
|
+
"mu": mu,
|
|
2437
|
+
"c": c,
|
|
2438
|
+
"Lambda": Lambda,
|
|
1535
2439
|
}
|
|
1536
2440
|
|
|
1537
|
-
#
|
|
1538
|
-
|
|
1539
|
-
|
|
2441
|
+
# Define function to create parameters dataset
|
|
2442
|
+
def _create_parameters_dataset(da_parameters):
|
|
2443
|
+
# Add parameters coordinates
|
|
2444
|
+
da_parameters = da_parameters.assign_coords({"parameters": ["Nt", "Lambda", "mu", "c"]})
|
|
2445
|
+
|
|
2446
|
+
# Create parameters dataset
|
|
2447
|
+
ds_parameters = da_parameters.to_dataset(dim="parameters")
|
|
2448
|
+
|
|
2449
|
+
# Add DSD model name to the attribute
|
|
2450
|
+
ds_parameters.attrs["disdrodb_psd_model"] = "GeneralizedGammaPSD"
|
|
2451
|
+
return ds_parameters
|
|
2452
|
+
|
|
2453
|
+
# Return cost function if asked
|
|
2454
|
+
if return_loss:
|
|
2455
|
+
# Define lengths of the new output_core_dims dimensions.
|
|
2456
|
+
output_dict_size = {
|
|
2457
|
+
"mu_values": len(mu),
|
|
2458
|
+
"Lambda_values": len(Lambda),
|
|
2459
|
+
"c_values": len(c),
|
|
2460
|
+
"parameters": 4,
|
|
2461
|
+
}
|
|
2462
|
+
# Compute
|
|
2463
|
+
da_parameters, da_cost_function = xr.apply_ufunc(
|
|
2464
|
+
apply_generalized_gamma_gs,
|
|
2465
|
+
# Variables varying over time
|
|
2466
|
+
Nt,
|
|
2467
|
+
ds["drop_number_concentration"],
|
|
2468
|
+
ds["fall_velocity"],
|
|
2469
|
+
# Other options
|
|
2470
|
+
kwargs=kwargs,
|
|
2471
|
+
# Settings
|
|
2472
|
+
input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
|
|
2473
|
+
output_core_dims=[["parameters"], ["Lambda_values", "mu_values", "c_values"]],
|
|
2474
|
+
vectorize=True,
|
|
2475
|
+
dask="parallelized",
|
|
2476
|
+
dask_gufunc_kwargs={"output_sizes": output_dict_size},
|
|
2477
|
+
output_dtypes=["float64", "float64"],
|
|
2478
|
+
)
|
|
2479
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2480
|
+
ds_parameters["cost_function"] = da_cost_function
|
|
2481
|
+
ds_parameters = ds_parameters.assign_coords({"mu_values": mu, "Lambda_values": Lambda, "c_values": c})
|
|
2482
|
+
return ds_parameters
|
|
2483
|
+
|
|
2484
|
+
# Otherwise return just best parameters
|
|
2485
|
+
da_parameters = xr.apply_ufunc(
|
|
2486
|
+
apply_generalized_gamma_gs,
|
|
1540
2487
|
# Variables varying over time
|
|
1541
|
-
|
|
2488
|
+
Nt,
|
|
1542
2489
|
ds["drop_number_concentration"],
|
|
1543
2490
|
ds["fall_velocity"],
|
|
1544
2491
|
# Other options
|
|
@@ -1548,49 +2495,289 @@ def get_lognormal_parameters_gs(ds, target="ND", transformation="log", error_ord
|
|
|
1548
2495
|
output_core_dims=[["parameters"]],
|
|
1549
2496
|
vectorize=True,
|
|
1550
2497
|
dask="parallelized",
|
|
1551
|
-
dask_gufunc_kwargs={"output_sizes": {"parameters":
|
|
2498
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 4}}, # lengths of the new output_core_dims dimensions.
|
|
1552
2499
|
output_dtypes=["float64"],
|
|
1553
2500
|
)
|
|
2501
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2502
|
+
return ds_parameters
|
|
1554
2503
|
|
|
1555
|
-
# Add parameters coordinates
|
|
1556
|
-
da_params = da_params.assign_coords({"parameters": ["Nt", "mu", "sigma"]})
|
|
1557
2504
|
|
|
1558
|
-
|
|
1559
|
-
|
|
2505
|
+
def get_lognormal_parameters_gs(
|
|
2506
|
+
ds,
|
|
2507
|
+
mu=None,
|
|
2508
|
+
sigma=None,
|
|
2509
|
+
objectives=None,
|
|
2510
|
+
return_loss=False,
|
|
2511
|
+
):
|
|
2512
|
+
"""Estimate Lognormal PSD parameters using Grid Search optimization.
|
|
1560
2513
|
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
2514
|
+
The parameter ``N_t`` is computed empirically from the observed DSD,
|
|
2515
|
+
while the shape parameters ``mu`` and ``sigma`` are estimated through
|
|
2516
|
+
grid search by minimizing the error between observed and modeled quantities.
|
|
2517
|
+
|
|
2518
|
+
Parameters
|
|
2519
|
+
----------
|
|
2520
|
+
ds : xarray.Dataset
|
|
2521
|
+
Input dataset containing PSD observations. Must include:
|
|
2522
|
+
|
|
2523
|
+
- ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2524
|
+
- ``diameter_bin_center`` : Diameter bin centers [mm]
|
|
2525
|
+
- ``diameter_bin_width`` : Diameter bin widths [mm]
|
|
2526
|
+
- ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
|
|
2527
|
+
|
|
2528
|
+
mu : int, float or numpy.ndarray
|
|
2529
|
+
mu parameter values to search.
|
|
2530
|
+
sigma : int, float or numpy.ndarray
|
|
2531
|
+
sigma parameter values to search.
|
|
2532
|
+
objectives: list of dict
|
|
2533
|
+
target : str, optional
|
|
2534
|
+
Target quantity to optimize. Valid options:
|
|
2535
|
+
|
|
2536
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2537
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
2538
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
2539
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
2540
|
+
- ``"M<p>"`` : Moment of order p
|
|
2541
|
+
|
|
2542
|
+
transformation : str, optional
|
|
2543
|
+
Transformation applied to the target quantity before computing the loss.
|
|
2544
|
+
Valid options:
|
|
2545
|
+
|
|
2546
|
+
- ``"identity"`` : No transformation
|
|
2547
|
+
- ``"log"`` : Logarithmic transformation
|
|
2548
|
+
- ``"sqrt"`` : Square root transformation
|
|
2549
|
+
|
|
2550
|
+
censoring : str
|
|
2551
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
2552
|
+
treated as censored at the edges of the diameter range due to
|
|
2553
|
+
instrumental sensitivity limits:
|
|
2554
|
+
|
|
2555
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
2556
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
2557
|
+
the spectrum where the observed number concentration is zero are
|
|
2558
|
+
removed prior to cost-function evaluation.
|
|
2559
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
2560
|
+
the spectrum where the observed number concentration is zero are
|
|
2561
|
+
removed prior to cost-function evaluation.
|
|
2562
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
2563
|
+
range of diameter bins with non-zero observed concentrations is
|
|
2564
|
+
retained.
|
|
2565
|
+
|
|
2566
|
+
loss : int, optional
|
|
2567
|
+
Loss function.
|
|
2568
|
+
If target is ``"N(D)"``, valid options are:
|
|
2569
|
+
|
|
2570
|
+
- ``SSE``: Sum of Squared Errors
|
|
2571
|
+
- ``SAE``: Sum of Absolute Errors
|
|
2572
|
+
- ``MAE``: Mean Absolute Error
|
|
2573
|
+
- ``MSE``: Mean Squared Error
|
|
2574
|
+
- ``RMSE``: Root Mean Squared Error
|
|
2575
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
2576
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
2577
|
+
- ``WD``: Wasserstein Distance
|
|
2578
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
2579
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
2580
|
+
|
|
2581
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
2582
|
+
|
|
2583
|
+
- ``AE``: Absolute Error
|
|
2584
|
+
- ``SE``: Squared Error
|
|
2585
|
+
|
|
2586
|
+
loss_weight: int, optional
|
|
2587
|
+
Weight of this objective when multiple objectives are used.
|
|
2588
|
+
Must be specified if more than one objective is specified.
|
|
2589
|
+
return_loss : bool, optional
|
|
2590
|
+
If True, return both the loss surface and parameters.
|
|
2591
|
+
Default is False.
|
|
2592
|
+
|
|
2593
|
+
Returns
|
|
2594
|
+
-------
|
|
2595
|
+
ds_params : xarray.Dataset
|
|
2596
|
+
Dataset containing the estimated Lognormal distribution parameters.
|
|
2597
|
+
"""
|
|
2598
|
+
# Use default objectives if not specified
|
|
2599
|
+
if objectives is None:
|
|
2600
|
+
objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
|
|
2601
|
+
|
|
2602
|
+
# Check objectives
|
|
2603
|
+
objectives = check_objectives(objectives=objectives)
|
|
2604
|
+
|
|
2605
|
+
# Compute required variables
|
|
2606
|
+
Nt = get_total_number_concentration(
|
|
2607
|
+
drop_number_concentration=ds["drop_number_concentration"],
|
|
2608
|
+
diameter_bin_width=ds["diameter_bin_width"],
|
|
2609
|
+
)
|
|
2610
|
+
|
|
2611
|
+
# Define search space
|
|
2612
|
+
if mu is None:
|
|
2613
|
+
mu = np.arange(-4, 1, step=0.1)
|
|
2614
|
+
if sigma is None:
|
|
2615
|
+
sigma = np.arange(0, 3, step=0.2)
|
|
2616
|
+
|
|
2617
|
+
# Define kwargs
|
|
2618
|
+
kwargs = {
|
|
2619
|
+
"D": ds["diameter_bin_center"].to_numpy(),
|
|
2620
|
+
"dD": ds["diameter_bin_width"].to_numpy(),
|
|
2621
|
+
"objectives": objectives,
|
|
2622
|
+
"return_loss": return_loss,
|
|
2623
|
+
"mu": mu,
|
|
2624
|
+
"sigma": sigma,
|
|
2625
|
+
}
|
|
1564
2626
|
|
|
2627
|
+
# Define function to create parameters dataset
|
|
2628
|
+
def _create_parameters_dataset(da_parameters):
|
|
2629
|
+
# Add parameters coordinates
|
|
2630
|
+
da_parameters = da_parameters.assign_coords({"parameters": ["Nt", "mu", "sigma"]})
|
|
2631
|
+
|
|
2632
|
+
# Create parameters dataset
|
|
2633
|
+
ds_parameters = da_parameters.to_dataset(dim="parameters")
|
|
2634
|
+
|
|
2635
|
+
# Add DSD model name to the attribute
|
|
2636
|
+
ds_parameters.attrs["disdrodb_psd_model"] = "LognormalPSD"
|
|
2637
|
+
return ds_parameters
|
|
2638
|
+
|
|
2639
|
+
# Return cost function if asked
|
|
2640
|
+
if return_loss:
|
|
2641
|
+
da_parameters, da_cost_function = xr.apply_ufunc(
|
|
2642
|
+
apply_lognormal_gs,
|
|
2643
|
+
# Variables varying over time
|
|
2644
|
+
Nt,
|
|
2645
|
+
ds["drop_number_concentration"],
|
|
2646
|
+
ds["fall_velocity"],
|
|
2647
|
+
# Other options
|
|
2648
|
+
kwargs=kwargs,
|
|
2649
|
+
# Settings
|
|
2650
|
+
input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
|
|
2651
|
+
output_core_dims=[["parameters"], ["sigma_values", "mu_values"]],
|
|
2652
|
+
vectorize=True,
|
|
2653
|
+
dask="parallelized",
|
|
2654
|
+
# Lengths of the new output_core_dims dimensions.
|
|
2655
|
+
dask_gufunc_kwargs={"output_sizes": {"mu_values": len(mu), "sigma_values": len(sigma), "parameters": 3}},
|
|
2656
|
+
output_dtypes=["float64", "float64"],
|
|
2657
|
+
)
|
|
2658
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2659
|
+
ds_parameters["cost_function"] = da_cost_function
|
|
2660
|
+
ds_parameters = ds_parameters.assign_coords({"mu_values": mu, "sigma_values": sigma})
|
|
2661
|
+
return ds_parameters
|
|
2662
|
+
|
|
2663
|
+
# Otherwise return just best parameters
|
|
2664
|
+
da_parameters = xr.apply_ufunc(
|
|
2665
|
+
apply_lognormal_gs,
|
|
2666
|
+
# Variables varying over time
|
|
2667
|
+
Nt,
|
|
2668
|
+
ds["drop_number_concentration"],
|
|
2669
|
+
ds["fall_velocity"],
|
|
2670
|
+
# Other options
|
|
2671
|
+
kwargs=kwargs,
|
|
2672
|
+
# Settings
|
|
2673
|
+
input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
|
|
2674
|
+
output_core_dims=[["parameters"]],
|
|
2675
|
+
vectorize=True,
|
|
2676
|
+
dask="parallelized",
|
|
2677
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
|
|
2678
|
+
output_dtypes=["float64"],
|
|
2679
|
+
)
|
|
2680
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2681
|
+
return ds_parameters
|
|
1565
2682
|
|
|
1566
|
-
def get_normalized_gamma_parameters_gs(ds, target="ND", transformation="log", error_order=1):
|
|
1567
|
-
r"""Estimate $\mu$ of a Normalized Gamma distribution using Grid Search.
|
|
1568
2683
|
|
|
1569
|
-
|
|
1570
|
-
|
|
2684
|
+
def get_normalized_gamma_parameters_gs(
|
|
2685
|
+
ds,
|
|
2686
|
+
mu=None,
|
|
2687
|
+
objectives=None,
|
|
2688
|
+
return_loss=False,
|
|
2689
|
+
):
|
|
2690
|
+
"""Estimate Normalized Gamma PSD parameters using Grid Search optimization.
|
|
2691
|
+
|
|
2692
|
+
The parameters ``N_w`` and ``D50`` are computed empirically from the observed DSD
|
|
2693
|
+
moments, while the shape parameter ``mu`` is estimated through
|
|
2694
|
+
grid search by minimizing the error between observed and modeled quantities.
|
|
1571
2695
|
|
|
1572
2696
|
Parameters
|
|
1573
2697
|
----------
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
2698
|
+
ds : xarray.Dataset
|
|
2699
|
+
Input dataset containing PSD observations. Must include:
|
|
2700
|
+
|
|
2701
|
+
- ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2702
|
+
- ``diameter_bin_center`` : Diameter bin centers [mm]
|
|
2703
|
+
- ``diameter_bin_width`` : Diameter bin widths [mm]
|
|
2704
|
+
- ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
|
|
2705
|
+
|
|
2706
|
+
mu : int, float or numpy.ndarray
|
|
2707
|
+
mu parameter values to search.
|
|
2708
|
+
objectives: list of dict
|
|
2709
|
+
target : str, optional
|
|
2710
|
+
Target quantity to optimize. Valid options:
|
|
2711
|
+
|
|
2712
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2713
|
+
- ``"H(x)"`` : Normalized drop number concentration [-]
|
|
2714
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
2715
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
2716
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
2717
|
+
- ``"M<p>"`` : Moment of order p
|
|
2718
|
+
|
|
2719
|
+
transformation : str, optional
|
|
2720
|
+
Transformation applied to the target quantity before computing the loss.
|
|
2721
|
+
Valid options:
|
|
2722
|
+
|
|
2723
|
+
- ``"identity"`` : No transformation
|
|
2724
|
+
- ``"log"`` : Logarithmic transformation
|
|
2725
|
+
- ``"sqrt"`` : Square root transformation
|
|
2726
|
+
|
|
2727
|
+
censoring : str
|
|
2728
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
2729
|
+
treated as censored at the edges of the diameter range due to
|
|
2730
|
+
instrumental sensitivity limits:
|
|
2731
|
+
|
|
2732
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
2733
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
2734
|
+
the spectrum where the observed number concentration is zero are
|
|
2735
|
+
removed prior to cost-function evaluation.
|
|
2736
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
2737
|
+
the spectrum where the observed number concentration is zero are
|
|
2738
|
+
removed prior to cost-function evaluation.
|
|
2739
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
2740
|
+
range of diameter bins with non-zero observed concentrations is
|
|
2741
|
+
retained.
|
|
2742
|
+
|
|
2743
|
+
loss : int, optional
|
|
2744
|
+
Loss function.
|
|
2745
|
+
If target is ``"N(D)"`` or ``"H(x)"``, valid options are:
|
|
2746
|
+
|
|
2747
|
+
- ``SSE``: Sum of Squared Errors
|
|
2748
|
+
- ``SAE``: Sum of Absolute Errors
|
|
2749
|
+
- ``MAE``: Mean Absolute Error
|
|
2750
|
+
- ``MSE``: Mean Squared Error
|
|
2751
|
+
- ``RMSE``: Root Mean Squared Error
|
|
2752
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
2753
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
2754
|
+
- ``WD``: Wasserstein Distance
|
|
2755
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
2756
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
2757
|
+
|
|
2758
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
2759
|
+
|
|
2760
|
+
- ``AE``: Absolute Error
|
|
2761
|
+
- ``SE``: Squared Error
|
|
2762
|
+
|
|
2763
|
+
loss_weight: int, optional
|
|
2764
|
+
Weight of this objective when multiple objectives are used.
|
|
2765
|
+
Must be specified if more than one objective is specified.
|
|
2766
|
+
return_loss : bool, optional
|
|
2767
|
+
If True, return both the loss surface and parameters.
|
|
2768
|
+
Default is False.
|
|
1585
2769
|
|
|
1586
2770
|
Returns
|
|
1587
2771
|
-------
|
|
1588
2772
|
ds_params : xarray.Dataset
|
|
1589
2773
|
Dataset containing the estimated Normalized Gamma distribution parameters.
|
|
1590
2774
|
"""
|
|
1591
|
-
#
|
|
1592
|
-
|
|
1593
|
-
|
|
2775
|
+
# Use default objectives if not specified
|
|
2776
|
+
if objectives is None:
|
|
2777
|
+
objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
|
|
2778
|
+
|
|
2779
|
+
# Check objectives
|
|
2780
|
+
objectives = check_objectives(objectives=objectives)
|
|
1594
2781
|
|
|
1595
2782
|
# Compute required variables
|
|
1596
2783
|
drop_number_concentration = ds["drop_number_concentration"]
|
|
@@ -1608,28 +2795,69 @@ def get_normalized_gamma_parameters_gs(ds, target="ND", transformation="log", er
|
|
|
1608
2795
|
diameter_bin_width=diameter_bin_width, # mm
|
|
1609
2796
|
moment=4,
|
|
1610
2797
|
)
|
|
1611
|
-
|
|
1612
|
-
|
|
2798
|
+
Nw = get_normalized_intercept_parameter_from_moments(moment_3=m3, moment_4=m4)
|
|
2799
|
+
D50 = get_median_volume_drop_diameter(
|
|
1613
2800
|
drop_number_concentration=drop_number_concentration,
|
|
1614
2801
|
diameter=diameter, # m
|
|
1615
2802
|
diameter_bin_width=diameter_bin_width, # mm
|
|
1616
2803
|
)
|
|
1617
2804
|
|
|
2805
|
+
# Define search space
|
|
2806
|
+
if mu is None:
|
|
2807
|
+
mu = np.arange(-4, 30, step=0.01)
|
|
2808
|
+
|
|
1618
2809
|
# Define kwargs
|
|
1619
2810
|
kwargs = {
|
|
1620
|
-
"D": ds["diameter_bin_center"].
|
|
1621
|
-
"dD": ds["diameter_bin_width"].
|
|
1622
|
-
"
|
|
1623
|
-
"
|
|
1624
|
-
"
|
|
2811
|
+
"D": ds["diameter_bin_center"].to_numpy(),
|
|
2812
|
+
"dD": ds["diameter_bin_width"].to_numpy(),
|
|
2813
|
+
"objectives": objectives,
|
|
2814
|
+
"return_loss": return_loss,
|
|
2815
|
+
"mu": mu,
|
|
1625
2816
|
}
|
|
1626
2817
|
|
|
1627
|
-
#
|
|
1628
|
-
|
|
2818
|
+
# Define function to create parameters dataset
|
|
2819
|
+
def _create_parameters_dataset(da_parameters):
|
|
2820
|
+
# Add parameters coordinates
|
|
2821
|
+
da_parameters = da_parameters.assign_coords({"parameters": ["Nw", "D50", "mu"]})
|
|
2822
|
+
|
|
2823
|
+
# Create parameters dataset
|
|
2824
|
+
ds_parameters = da_parameters.to_dataset(dim="parameters")
|
|
2825
|
+
|
|
2826
|
+
# Add DSD model name to the attribute
|
|
2827
|
+
ds_parameters.attrs["disdrodb_psd_model"] = "NormalizedGammaPSD"
|
|
2828
|
+
return ds_parameters
|
|
2829
|
+
|
|
2830
|
+
# Return cost function if asked
|
|
2831
|
+
if return_loss:
|
|
2832
|
+
da_parameters, da_cost_function = xr.apply_ufunc(
|
|
2833
|
+
apply_normalized_gamma_gs,
|
|
2834
|
+
# Variables varying over time
|
|
2835
|
+
Nw,
|
|
2836
|
+
D50,
|
|
2837
|
+
ds["drop_number_concentration"],
|
|
2838
|
+
ds["fall_velocity"],
|
|
2839
|
+
# Other options
|
|
2840
|
+
kwargs=kwargs,
|
|
2841
|
+
# Settings
|
|
2842
|
+
input_core_dims=[[], [], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
|
|
2843
|
+
output_core_dims=[["parameters"], ["mu_values"]],
|
|
2844
|
+
vectorize=True,
|
|
2845
|
+
dask="parallelized",
|
|
2846
|
+
# Lengths of the new output_core_dims dimensions.
|
|
2847
|
+
dask_gufunc_kwargs={"output_sizes": {"mu_values": len(mu), "parameters": 3}},
|
|
2848
|
+
output_dtypes=["float64", "float64"],
|
|
2849
|
+
)
|
|
2850
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2851
|
+
ds_parameters["cost_function"] = da_cost_function
|
|
2852
|
+
ds_parameters = ds_parameters.assign_coords({"mu_values": mu})
|
|
2853
|
+
return ds_parameters
|
|
2854
|
+
|
|
2855
|
+
# Otherwise return just best parameters
|
|
2856
|
+
da_parameters = xr.apply_ufunc(
|
|
1629
2857
|
apply_normalized_gamma_gs,
|
|
1630
2858
|
# Variables varying over time
|
|
1631
|
-
|
|
1632
|
-
|
|
2859
|
+
Nw,
|
|
2860
|
+
D50,
|
|
1633
2861
|
ds["drop_number_concentration"],
|
|
1634
2862
|
ds["fall_velocity"],
|
|
1635
2863
|
# Other options
|
|
@@ -1642,16 +2870,357 @@ def get_normalized_gamma_parameters_gs(ds, target="ND", transformation="log", er
|
|
|
1642
2870
|
dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
|
|
1643
2871
|
output_dtypes=["float64"],
|
|
1644
2872
|
)
|
|
2873
|
+
ds_parameters = _create_parameters_dataset(da_parameters)
|
|
2874
|
+
return ds_parameters
|
|
1645
2875
|
|
|
1646
|
-
# Add parameters coordinates
|
|
1647
|
-
da_params = da_params.assign_coords({"parameters": ["Nw", "mu", "D50"]})
|
|
1648
2876
|
|
|
1649
|
-
|
|
1650
|
-
|
|
2877
|
+
def get_normalized_generalized_gamma_parameters_gs(
|
|
2878
|
+
ds,
|
|
2879
|
+
i,
|
|
2880
|
+
j,
|
|
2881
|
+
mu=None,
|
|
2882
|
+
c=None,
|
|
2883
|
+
objectives=None,
|
|
2884
|
+
return_loss=False,
|
|
2885
|
+
):
|
|
2886
|
+
"""Estimate Normalized Generalized Gamma PSD parameters using Grid Search optimization.
|
|
1651
2887
|
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
2888
|
+
The parameters ``N_c`` and ``Dc`` are computed empirically from the observed DSD
|
|
2889
|
+
moments, while the shape parameters ``mu`` and ``c`` are estimated through
|
|
2890
|
+
grid search by minimizing the error between observed and modeled quantities.
|
|
2891
|
+
|
|
2892
|
+
Parameters
|
|
2893
|
+
----------
|
|
2894
|
+
ds : xarray.Dataset
|
|
2895
|
+
Input dataset containing PSD observations. Must include:
|
|
2896
|
+
|
|
2897
|
+
- ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2898
|
+
- ``diameter_bin_center`` : Diameter bin centers [mm]
|
|
2899
|
+
- ``diameter_bin_width`` : Diameter bin widths [mm]
|
|
2900
|
+
- ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
|
|
2901
|
+
|
|
2902
|
+
i : int
|
|
2903
|
+
Moment order i of the NormalizedGeneralizedGammaPSD.
|
|
2904
|
+
j : int
|
|
2905
|
+
Moment order j of the NormalizedGeneralizedGammaPSD.
|
|
2906
|
+
mu : int, float or numpy.ndarray
|
|
2907
|
+
mu parameter values to search.
|
|
2908
|
+
c : int, float or numpy.ndarray
|
|
2909
|
+
c parameter values to search.
|
|
2910
|
+
objectives: list of dict
|
|
2911
|
+
target : str, optional
|
|
2912
|
+
Target quantity to optimize. Valid options:
|
|
2913
|
+
|
|
2914
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
2915
|
+
- ``"H(x)"`` : Normalized drop number concentration [-]
|
|
2916
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
2917
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
2918
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
2919
|
+
- ``"M<p>"`` : Moment of order p
|
|
2920
|
+
|
|
2921
|
+
transformation : str, optional
|
|
2922
|
+
Transformation applied to the target quantity before computing the loss.
|
|
2923
|
+
Valid options:
|
|
2924
|
+
|
|
2925
|
+
- ``"identity"`` : No transformation
|
|
2926
|
+
- ``"log"`` : Logarithmic transformation
|
|
2927
|
+
- ``"sqrt"`` : Square root transformation
|
|
2928
|
+
|
|
2929
|
+
censoring : str
|
|
2930
|
+
Specifies whether the observed particle size distribution (PSD) is
|
|
2931
|
+
treated as censored at the edges of the diameter range due to
|
|
2932
|
+
instrumental sensitivity limits:
|
|
2933
|
+
|
|
2934
|
+
- ``"none"`` : No censoring is applied. All diameter bins are used.
|
|
2935
|
+
- ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
|
|
2936
|
+
the spectrum where the observed number concentration is zero are
|
|
2937
|
+
removed prior to cost-function evaluation.
|
|
2938
|
+
- ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
|
|
2939
|
+
the spectrum where the observed number concentration is zero are
|
|
2940
|
+
removed prior to cost-function evaluation.
|
|
2941
|
+
- ``"both"`` : Both left- and right-censored PSD. Only the contiguous
|
|
2942
|
+
range of diameter bins with non-zero observed concentrations is
|
|
2943
|
+
retained.
|
|
2944
|
+
|
|
2945
|
+
loss : int, optional
|
|
2946
|
+
Loss function.
|
|
2947
|
+
If target is ``"N(D)"`` or ``"H(x)"``, valid options are:
|
|
2948
|
+
|
|
2949
|
+
- ``SSE``: Sum of Squared Errors
|
|
2950
|
+
- ``SAE``: Sum of Absolute Errors
|
|
2951
|
+
- ``MAE``: Mean Absolute Error
|
|
2952
|
+
- ``MSE``: Mean Squared Error
|
|
2953
|
+
- ``RMSE``: Root Mean Squared Error
|
|
2954
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
2955
|
+
- ``KLDiv``: Kullback-Leibler Divergence
|
|
2956
|
+
- ``WD``: Wasserstein Distance
|
|
2957
|
+
- ``JSD``: Jensen-Shannon Distance
|
|
2958
|
+
- ``KS``: Kolmogorov-Smirnov Statistic
|
|
2959
|
+
|
|
2960
|
+
If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
|
|
2961
|
+
|
|
2962
|
+
- ``AE``: Absolute Error
|
|
2963
|
+
- ``SE``: Squared Error
|
|
2964
|
+
|
|
2965
|
+
loss_weight: int, optional
|
|
2966
|
+
Weight of this objective when multiple objectives are used.
|
|
2967
|
+
Must be specified if more than one objective is specified.
|
|
2968
|
+
return_loss : bool, optional
|
|
2969
|
+
If True, return both the loss surface and parameters.
|
|
2970
|
+
Default is False.
|
|
2971
|
+
|
|
2972
|
+
Returns
|
|
2973
|
+
-------
|
|
2974
|
+
ds_params : xarray.Dataset
|
|
2975
|
+
Dataset containing the estimated Normalized Generalized Gamma distribution parameters.
|
|
2976
|
+
"""
|
|
2977
|
+
# Use default objectives if not specified
|
|
2978
|
+
if objectives is None:
|
|
2979
|
+
objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
|
|
2980
|
+
|
|
2981
|
+
# Check objectives
|
|
2982
|
+
objectives = check_objectives(objectives=objectives)
|
|
2983
|
+
|
|
2984
|
+
# Compute required variables
|
|
2985
|
+
drop_number_concentration = ds["drop_number_concentration"]
|
|
2986
|
+
diameter_bin_width = ds["diameter_bin_width"]
|
|
2987
|
+
diameter = ds["diameter_bin_center"] / 1000 # conversion from mm to m
|
|
2988
|
+
Mi = get_moment(
|
|
2989
|
+
drop_number_concentration=drop_number_concentration,
|
|
2990
|
+
diameter=diameter, # m
|
|
2991
|
+
diameter_bin_width=diameter_bin_width, # mm
|
|
2992
|
+
moment=i,
|
|
2993
|
+
)
|
|
2994
|
+
Mj = get_moment(
|
|
2995
|
+
drop_number_concentration=drop_number_concentration,
|
|
2996
|
+
diameter=diameter, # m
|
|
2997
|
+
diameter_bin_width=diameter_bin_width, # mm
|
|
2998
|
+
moment=j,
|
|
2999
|
+
)
|
|
3000
|
+
Dc = NormalizedGeneralizedGammaPSD.compute_Dc(i=i, j=j, Mi=Mi, Mj=Mj)
|
|
3001
|
+
Nc = NormalizedGeneralizedGammaPSD.compute_Nc(i=i, j=j, Mi=Mi, Mj=Mj)
|
|
3002
|
+
|
|
3003
|
+
# Define search space
|
|
3004
|
+
if mu is None:
|
|
3005
|
+
mu = np.arange(-6, 10, step=0.1)
|
|
3006
|
+
# mu = np.arange(-7, 30, step=0.01)
|
|
3007
|
+
if c is None:
|
|
3008
|
+
c = np.arange(0.01, 10, step=0.1)
|
|
3009
|
+
|
|
3010
|
+
# Define kwargs
|
|
3011
|
+
kwargs = {
|
|
3012
|
+
"i": i,
|
|
3013
|
+
"j": j,
|
|
3014
|
+
"D": ds["diameter_bin_center"].to_numpy(),
|
|
3015
|
+
"dD": ds["diameter_bin_width"].to_numpy(),
|
|
3016
|
+
"objectives": objectives,
|
|
3017
|
+
"return_loss": return_loss,
|
|
3018
|
+
"mu": mu,
|
|
3019
|
+
"c": c,
|
|
3020
|
+
}
|
|
3021
|
+
|
|
3022
|
+
# Define function to create parameters dataset
|
|
3023
|
+
def _create_parameters_dataset(da_parameters, i, j):
|
|
3024
|
+
# Add parameters coordinates
|
|
3025
|
+
da_parameters = da_parameters.assign_coords({"parameters": ["Nc", "Dc", "mu", "c"]})
|
|
3026
|
+
|
|
3027
|
+
# Create parameters dataset
|
|
3028
|
+
ds_parameters = da_parameters.to_dataset(dim="parameters")
|
|
3029
|
+
|
|
3030
|
+
# Add Nc and Dc
|
|
3031
|
+
ds_parameters["Dc"].attrs["moment_orders"] = f"{i}, {j}"
|
|
3032
|
+
ds_parameters["Nc"].attrs["moment_orders"] = f"{i}, {j}"
|
|
3033
|
+
|
|
3034
|
+
# Add DSD model name to the attribute
|
|
3035
|
+
ds_parameters.attrs["disdrodb_psd_model"] = "NormalizedGeneralizedGammaPSD"
|
|
3036
|
+
ds_parameters.attrs["disdrodb_psd_model_kwargs"] = f"{{'i': {i}, 'j': {j}}}"
|
|
3037
|
+
return ds_parameters
|
|
3038
|
+
|
|
3039
|
+
# Return cost function if asked
|
|
3040
|
+
if return_loss:
|
|
3041
|
+
da_parameters, da_cost_function = xr.apply_ufunc(
|
|
3042
|
+
apply_normalized_generalized_gamma_gs,
|
|
3043
|
+
# Variables varying over time
|
|
3044
|
+
Nc,
|
|
3045
|
+
Dc,
|
|
3046
|
+
ds["drop_number_concentration"],
|
|
3047
|
+
ds["fall_velocity"],
|
|
3048
|
+
# Other options
|
|
3049
|
+
kwargs=kwargs,
|
|
3050
|
+
# Settings
|
|
3051
|
+
input_core_dims=[[], [], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
|
|
3052
|
+
output_core_dims=[["parameters"], ["c_values", "mu_values"]],
|
|
3053
|
+
vectorize=True,
|
|
3054
|
+
dask="parallelized",
|
|
3055
|
+
# Lengths of the new output_core_dims dimensions.
|
|
3056
|
+
dask_gufunc_kwargs={"output_sizes": {"mu_values": len(mu), "c_values": len(c), "parameters": 4}},
|
|
3057
|
+
output_dtypes=["float64", "float64", "float64"],
|
|
3058
|
+
)
|
|
3059
|
+
ds_parameters = _create_parameters_dataset(da_parameters, i=i, j=j)
|
|
3060
|
+
ds_parameters["cost_function"] = da_cost_function
|
|
3061
|
+
ds_parameters = ds_parameters.assign_coords({"mu_values": mu, "c_values": c})
|
|
3062
|
+
return ds_parameters
|
|
3063
|
+
|
|
3064
|
+
# Otherwise return just best parameters
|
|
3065
|
+
da_parameters = xr.apply_ufunc(
|
|
3066
|
+
apply_normalized_generalized_gamma_gs,
|
|
3067
|
+
# Variables varying over time
|
|
3068
|
+
Nc,
|
|
3069
|
+
Dc,
|
|
3070
|
+
ds["drop_number_concentration"],
|
|
3071
|
+
ds["fall_velocity"],
|
|
3072
|
+
# Other options
|
|
3073
|
+
kwargs=kwargs,
|
|
3074
|
+
# Settings
|
|
3075
|
+
input_core_dims=[[], [], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
|
|
3076
|
+
output_core_dims=[["parameters"]],
|
|
3077
|
+
vectorize=True,
|
|
3078
|
+
dask="parallelized",
|
|
3079
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 4}}, # lengths of the new output_core_dims dimensions.
|
|
3080
|
+
output_dtypes=["float64"],
|
|
3081
|
+
)
|
|
3082
|
+
ds_parameters = _create_parameters_dataset(da_parameters, i=i, j=j)
|
|
3083
|
+
return ds_parameters
|
|
3084
|
+
|
|
3085
|
+
|
|
3086
|
+
def fit_ngg_on_normalized_space(
|
|
3087
|
+
x,
|
|
3088
|
+
ND_norm,
|
|
3089
|
+
# PSD parameters
|
|
3090
|
+
i,
|
|
3091
|
+
j,
|
|
3092
|
+
mu=None,
|
|
3093
|
+
c=None,
|
|
3094
|
+
# Optimization options
|
|
3095
|
+
transformation="log",
|
|
3096
|
+
loss="SSE",
|
|
3097
|
+
# Output options
|
|
3098
|
+
return_loss=False,
|
|
3099
|
+
):
|
|
3100
|
+
"""Fit a NormalizedGeneralizedGammaPSD model in normalized space.
|
|
3101
|
+
|
|
3102
|
+
This function performs a grid search optimization to find the best parameters
|
|
3103
|
+
(mu, c) for the NormalizedGeneralizedGammaPSD model by minimizing a cost function.
|
|
3104
|
+
|
|
3105
|
+
Parameters
|
|
3106
|
+
----------
|
|
3107
|
+
x : numpy.ndarray
|
|
3108
|
+
Normalized diameter parameter (D/Dc) [-].
|
|
3109
|
+
ND_norm : numpy.ndarray
|
|
3110
|
+
Observed normalized PSD data (N(D)/Nc [-].
|
|
3111
|
+
i : int
|
|
3112
|
+
Moment order i of the NormalizedGeneralizedGammaPSD.
|
|
3113
|
+
j : int
|
|
3114
|
+
Moment order j of the NormalizedGeneralizedGammaPSD.
|
|
3115
|
+
mu : int, float or numpy.ndarray
|
|
3116
|
+
mu parameter values to search.
|
|
3117
|
+
c : int, float or numpy.ndarray
|
|
3118
|
+
c parameter values to search.
|
|
3119
|
+
transformation : str, optional
|
|
3120
|
+
Transformation applied to the target quantity before computing the loss.
|
|
3121
|
+
The default is ``"log"``.
|
|
3122
|
+
Valid options:
|
|
3123
|
+
|
|
3124
|
+
- ``"identity"`` : No transformation
|
|
3125
|
+
- ``"log"`` : Logarithmic transformation
|
|
3126
|
+
- ``"sqrt"`` : Square root transformation
|
|
3127
|
+
|
|
3128
|
+
loss : int, optional
|
|
3129
|
+
Loss function. The default is ``SSE``.
|
|
3130
|
+
Valid options are:
|
|
3131
|
+
|
|
3132
|
+
- ``SSE``: Sum of Squared Errors
|
|
3133
|
+
- ``SAE``: Sum of Absolute Errors
|
|
3134
|
+
- ``MAE``: Mean Absolute Error
|
|
3135
|
+
- ``MSE``: Mean Squared Error
|
|
3136
|
+
- ``RMSE``: Root Mean Squared Error
|
|
3137
|
+
- ``relMAE``: Relative Mean Absolute Error
|
|
3138
|
+
|
|
3139
|
+
return_loss : bool, optional
|
|
3140
|
+
If True, return both the loss surface and parameters.
|
|
3141
|
+
Default is False.
|
|
3142
|
+
|
|
3143
|
+
Returns
|
|
3144
|
+
-------
|
|
3145
|
+
parameters : numpy.ndarray
|
|
3146
|
+
Best parameters [mu, c].
|
|
3147
|
+
An array of NaN values is returned if no valid solution is found.
|
|
3148
|
+
total_loss : numpy.ndarray, optional
|
|
3149
|
+
2D array of total loss values reshaped to (len(mu), len(c)).
|
|
3150
|
+
Only returned if return_loss=True.
|
|
3151
|
+
|
|
3152
|
+
"""
|
|
3153
|
+
# Ensure input is numpy array
|
|
3154
|
+
x = np.asarray(x)
|
|
3155
|
+
ND_norm = np.asarray(ND_norm)
|
|
3156
|
+
|
|
3157
|
+
# Define search space
|
|
3158
|
+
if mu is None:
|
|
3159
|
+
mu = np.arange(-6, 20, step=0.1)
|
|
3160
|
+
if c is None:
|
|
3161
|
+
c = np.arange(0.01, 20, step=0.1)
|
|
3162
|
+
|
|
3163
|
+
# Define combinations of parameters for grid search
|
|
3164
|
+
mu_grid, c_grid = np.meshgrid(
|
|
3165
|
+
mu,
|
|
3166
|
+
c,
|
|
3167
|
+
indexing="xy",
|
|
3168
|
+
)
|
|
3169
|
+
mu_arr = mu_grid.ravel()
|
|
3170
|
+
c_arr = c_grid.ravel()
|
|
3171
|
+
|
|
3172
|
+
# Define objectives
|
|
3173
|
+
transformation = check_transformation(transformation)
|
|
3174
|
+
valid_loss = ["SSE", "SAE", "MAE", "MSE", "RMSE", "relMAE"]
|
|
3175
|
+
if loss not in valid_loss:
|
|
3176
|
+
raise ValueError(f"Invalid loss {loss}. Valid loss are {valid_loss}")
|
|
3177
|
+
|
|
3178
|
+
objectives = [
|
|
3179
|
+
{
|
|
3180
|
+
"target": "N(D)", # dummy. Do not change
|
|
3181
|
+
"censoring": "none", # dummy. Do not change
|
|
3182
|
+
"transformation": transformation,
|
|
3183
|
+
"loss": loss,
|
|
3184
|
+
},
|
|
3185
|
+
]
|
|
3186
|
+
|
|
3187
|
+
# Perform grid search
|
|
3188
|
+
with suppress_warnings():
|
|
3189
|
+
|
|
3190
|
+
# Compute N(D)/Nc
|
|
3191
|
+
ND_norm_preds = NormalizedGeneralizedGammaPSD.normalized_formula(
|
|
3192
|
+
x=x[None, :],
|
|
3193
|
+
i=i,
|
|
3194
|
+
j=j,
|
|
3195
|
+
mu=mu_arr[:, None],
|
|
3196
|
+
c=c_arr[:, None],
|
|
3197
|
+
)
|
|
3198
|
+
|
|
3199
|
+
# Compute loss
|
|
3200
|
+
total_loss = compute_weighted_loss(
|
|
3201
|
+
ND_obs=ND_norm,
|
|
3202
|
+
ND_preds=ND_norm_preds,
|
|
3203
|
+
D=x,
|
|
3204
|
+
dD=None,
|
|
3205
|
+
V=None,
|
|
3206
|
+
objectives=objectives,
|
|
3207
|
+
)
|
|
3208
|
+
|
|
3209
|
+
# Define best parameters
|
|
3210
|
+
if not np.all(np.isnan(total_loss)):
|
|
3211
|
+
best_index = np.nanargmin(total_loss)
|
|
3212
|
+
mu_best, c_best = mu_arr[best_index].item(), c_arr[best_index].item()
|
|
3213
|
+
parameters = np.array([mu_best, c_best])
|
|
3214
|
+
else:
|
|
3215
|
+
parameters = np.array([np.nan, np.nan])
|
|
3216
|
+
|
|
3217
|
+
# If asked, return cost function
|
|
3218
|
+
if return_loss:
|
|
3219
|
+
total_loss = total_loss.reshape(mu_grid.shape)
|
|
3220
|
+
total_loss = xr.DataArray(total_loss, dims=["c_values", "mu_values"])
|
|
3221
|
+
total_loss = total_loss.assign_coords({"mu_values": mu, "c_values": c})
|
|
3222
|
+
return parameters, total_loss
|
|
3223
|
+
return parameters
|
|
1655
3224
|
|
|
1656
3225
|
|
|
1657
3226
|
####-----------------------------------------------------------------.
|
|
@@ -1930,10 +3499,40 @@ def _get_exponential_parameters_mom(ds: xr.Dataset, mom_method: str) -> xr.Datas
|
|
|
1930
3499
|
|
|
1931
3500
|
|
|
1932
3501
|
####--------------------------------------------------------------------------------------.
|
|
1933
|
-
####
|
|
3502
|
+
#### GLOBAL DICTIONARIES
|
|
1934
3503
|
|
|
1935
|
-
####--------------------------------------------------------------------------------------.
|
|
1936
3504
|
ATTRS_PARAMS_DICT = {
|
|
3505
|
+
"LognormalPSD": {
|
|
3506
|
+
"Nt": {
|
|
3507
|
+
"standard_name": "number_concentration_of_particles",
|
|
3508
|
+
"units": "m-3",
|
|
3509
|
+
"long_name": "Total Number Concentration",
|
|
3510
|
+
},
|
|
3511
|
+
"mu": {
|
|
3512
|
+
"description": "Mean of the Lognormal PSD",
|
|
3513
|
+
"units": "log(mm)",
|
|
3514
|
+
"long_name": "Mean of the Lognormal PSD",
|
|
3515
|
+
},
|
|
3516
|
+
"sigma": {
|
|
3517
|
+
"standard_name": "Standard Deviation of the Lognormal PSD",
|
|
3518
|
+
"units": "",
|
|
3519
|
+
"long_name": "Standard Deviation of the Lognormal PSD",
|
|
3520
|
+
},
|
|
3521
|
+
},
|
|
3522
|
+
"ExponentialPSD": {
|
|
3523
|
+
"N0": {
|
|
3524
|
+
"description": "Intercept parameter of the Exponential PSD",
|
|
3525
|
+
"standard_name": "particle_size_distribution_intercept",
|
|
3526
|
+
"units": "mm-1 m-3",
|
|
3527
|
+
"long_name": "ExponentialPSD intercept parameter",
|
|
3528
|
+
},
|
|
3529
|
+
"Lambda": {
|
|
3530
|
+
"description": "Slope (rate) parameter of the Exponential PSD",
|
|
3531
|
+
"standard_name": "particle_size_distribution_slope",
|
|
3532
|
+
"units": "mm-1",
|
|
3533
|
+
"long_name": "ExponentialPSD slope parameter",
|
|
3534
|
+
},
|
|
3535
|
+
},
|
|
1937
3536
|
"GammaPSD": {
|
|
1938
3537
|
"N0": {
|
|
1939
3538
|
"description": "Intercept parameter of the Gamma PSD",
|
|
@@ -1972,35 +3571,53 @@ ATTRS_PARAMS_DICT = {
|
|
|
1972
3571
|
"long_name": "NormalizedGammaPSD Median Volume Drop Diameter",
|
|
1973
3572
|
},
|
|
1974
3573
|
},
|
|
1975
|
-
"
|
|
3574
|
+
"GeneralizedGammaPSD": {
|
|
1976
3575
|
"Nt": {
|
|
1977
|
-
"standard_name": "
|
|
3576
|
+
"standard_name": "number_concentration_of_particles",
|
|
1978
3577
|
"units": "m-3",
|
|
1979
3578
|
"long_name": "Total Number Concentration",
|
|
1980
3579
|
},
|
|
3580
|
+
"Lambda": {
|
|
3581
|
+
"description": "Slope (rate) parameter of the Generalized Gamma PSD",
|
|
3582
|
+
"standard_name": "particle_size_distribution_slope",
|
|
3583
|
+
"units": "mm-1",
|
|
3584
|
+
"long_name": "GeneralizedGammaPSD slope parameter",
|
|
3585
|
+
},
|
|
1981
3586
|
"mu": {
|
|
1982
|
-
"description": "
|
|
1983
|
-
"
|
|
1984
|
-
"
|
|
3587
|
+
"description": "Shape parameter of the Generalized Gamma PSD",
|
|
3588
|
+
"standard_name": "particle_size_distribution_shape",
|
|
3589
|
+
"units": "",
|
|
3590
|
+
"long_name": "GeneralizedGammaPSD shape parameter",
|
|
1985
3591
|
},
|
|
1986
|
-
"
|
|
1987
|
-
"
|
|
3592
|
+
"c": {
|
|
3593
|
+
"description": "Shape parameter of the Generalized Gamma PSD",
|
|
3594
|
+
"standard_name": "particle_size_distribution_shape",
|
|
1988
3595
|
"units": "",
|
|
1989
|
-
"long_name": "
|
|
3596
|
+
"long_name": "GeneralizedGammaPSD shape parameter c",
|
|
1990
3597
|
},
|
|
1991
3598
|
},
|
|
1992
|
-
"
|
|
1993
|
-
"
|
|
1994
|
-
"
|
|
1995
|
-
"standard_name": "particle_size_distribution_intercept",
|
|
3599
|
+
"NormalizedGeneralizedGammaPSD": {
|
|
3600
|
+
"Nc": {
|
|
3601
|
+
"standard_name": "characteristic intercept",
|
|
1996
3602
|
"units": "mm-1 m-3",
|
|
1997
|
-
"long_name": "
|
|
3603
|
+
"long_name": "NormalizedGeneralizedGammaPSD Characteristic Intercept Parameter",
|
|
1998
3604
|
},
|
|
1999
|
-
"
|
|
2000
|
-
"
|
|
2001
|
-
"
|
|
2002
|
-
"
|
|
2003
|
-
|
|
3605
|
+
"Dc": {
|
|
3606
|
+
"standard_name": "characteristic_diameter",
|
|
3607
|
+
"units": "mm",
|
|
3608
|
+
"long_name": "NormalizedGeneralizedGammaPSD Characteristic Diameter",
|
|
3609
|
+
},
|
|
3610
|
+
"mu": {
|
|
3611
|
+
"description": "Shape parameter of the Normalized Generalized Gamma PSD",
|
|
3612
|
+
"standard_name": "particle_size_distribution_shape",
|
|
3613
|
+
"units": "",
|
|
3614
|
+
"long_name": "NormalizedGeneralizedGammaPSD Shape Parameter",
|
|
3615
|
+
},
|
|
3616
|
+
"c": {
|
|
3617
|
+
"description": "Shape parameter of the Normalized Generalized Gamma PSD",
|
|
3618
|
+
"standard_name": "particle_size_distribution_shape",
|
|
3619
|
+
"units": "",
|
|
3620
|
+
"long_name": "NormalizedGeneralizedGammaPSD Shape Parameter c",
|
|
2004
3621
|
},
|
|
2005
3622
|
},
|
|
2006
3623
|
}
|
|
@@ -2035,6 +3652,8 @@ OPTIMIZATION_ROUTINES_DICT = {
|
|
|
2035
3652
|
"NormalizedGammaPSD": get_normalized_gamma_parameters_gs,
|
|
2036
3653
|
"LognormalPSD": get_lognormal_parameters_gs,
|
|
2037
3654
|
"ExponentialPSD": get_exponential_parameters_gs,
|
|
3655
|
+
"GeneralizedGammaPSD": get_generalized_gamma_parameters_gs,
|
|
3656
|
+
"NormalizedGeneralizedGammaPSD": get_normalized_generalized_gamma_parameters_gs,
|
|
2038
3657
|
},
|
|
2039
3658
|
"ML": {
|
|
2040
3659
|
"GammaPSD": get_gamma_parameters,
|
|
@@ -2056,39 +3675,181 @@ def available_optimization(psd_model):
|
|
|
2056
3675
|
return [opt for opt in list(OPTIMIZATION_ROUTINES_DICT) if psd_model in OPTIMIZATION_ROUTINES_DICT[opt]]
|
|
2057
3676
|
|
|
2058
3677
|
|
|
3678
|
+
def get_psd_model_parameter_names(psd_model):
|
|
3679
|
+
"""Get psd_model parameter names."""
|
|
3680
|
+
return list(ATTRS_PARAMS_DICT[psd_model].keys())
|
|
3681
|
+
|
|
3682
|
+
|
|
3683
|
+
def check_psd_parameters(psd_model, parameters):
|
|
3684
|
+
"""Check valid psd_model parameters."""
|
|
3685
|
+
valid_params = get_psd_model_parameter_names(psd_model)
|
|
3686
|
+
for param in parameters:
|
|
3687
|
+
if param not in valid_params:
|
|
3688
|
+
raise ValueError(
|
|
3689
|
+
f"Invalid parameter '{param}' for PSD model '{psd_model}'. Valid parameters are {valid_params}.",
|
|
3690
|
+
)
|
|
3691
|
+
return parameters
|
|
3692
|
+
|
|
3693
|
+
|
|
2059
3694
|
####--------------------------------------------------------------------------------------.
|
|
2060
|
-
####
|
|
3695
|
+
#### CONFIGURATION CHECKERS
|
|
3696
|
+
#### - GS
|
|
2061
3697
|
|
|
2062
3698
|
|
|
2063
|
-
def
|
|
2064
|
-
"""Check valid
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
3699
|
+
def check_fixed_parameters(psd_model, fixed_parameters):
|
|
3700
|
+
"""Check valid fixed_parameters argument."""
|
|
3701
|
+
if fixed_parameters is None:
|
|
3702
|
+
if psd_model == "NormalizedGeneralizedGammaPSD":
|
|
3703
|
+
raise ValueError(
|
|
3704
|
+
"For NormalizedGeneralizedGammaPSD fixed_parameters must include 'i' and 'j' moment orders.",
|
|
3705
|
+
)
|
|
3706
|
+
return None
|
|
3707
|
+
if not isinstance(fixed_parameters, dict):
|
|
3708
|
+
raise ValueError("fixed_parameters must be a dictionary.")
|
|
3709
|
+
|
|
3710
|
+
# Extract list of parameters
|
|
3711
|
+
parameters = set(fixed_parameters.keys())
|
|
3712
|
+
|
|
3713
|
+
# Check for NormalizedGeneralizedGammaPSD i and j are provided
|
|
3714
|
+
if psd_model == "NormalizedGeneralizedGammaPSD":
|
|
3715
|
+
if "i" not in parameters or "j" not in parameters:
|
|
3716
|
+
raise ValueError(
|
|
3717
|
+
"fixed_parameters for NormalizedGeneralizedGammaPSD must include 'i' and 'j' moment orders.",
|
|
3718
|
+
)
|
|
3719
|
+
parameters = parameters.difference({"i", "j"})
|
|
2072
3720
|
|
|
3721
|
+
# Check validity of fixed_parameters keys (only PSD parameters are allowed)
|
|
3722
|
+
_ = check_psd_parameters(psd_model=psd_model, parameters=parameters)
|
|
2073
3723
|
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
3724
|
+
# Check value validity
|
|
3725
|
+
for param_name, param_value in fixed_parameters.items():
|
|
3726
|
+
if isinstance(param_value, str):
|
|
3727
|
+
raise ValueError(
|
|
3728
|
+
f"Invalid value for '{param_name}': strings are not allowed.",
|
|
3729
|
+
)
|
|
3730
|
+
if not np.isscalar(param_value):
|
|
3731
|
+
raise ValueError(
|
|
3732
|
+
f"""Invalid value for '{param_name}': expected scalar,
|
|
3733
|
+
, got {type(param_value).__name__}.""",
|
|
3734
|
+
)
|
|
3735
|
+
fixed_parameters[param_name] = float(param_value)
|
|
3736
|
+
return fixed_parameters
|
|
3737
|
+
|
|
3738
|
+
|
|
3739
|
+
def check_search_space_parameters(search_space, psd_model):
|
|
3740
|
+
"""Check search_space parameters are PSD model parameters."""
|
|
3741
|
+
if search_space is None:
|
|
3742
|
+
return None
|
|
3743
|
+
parameters = list(search_space.keys())
|
|
3744
|
+
_ = check_psd_parameters(psd_model=psd_model, parameters=parameters)
|
|
3745
|
+
return search_space
|
|
3746
|
+
|
|
3747
|
+
|
|
3748
|
+
def check_search_space(search_space):
|
|
3749
|
+
"""Check valid search_space dictionary."""
|
|
3750
|
+
if search_space is None:
|
|
3751
|
+
return None
|
|
3752
|
+
if not isinstance(search_space, dict):
|
|
3753
|
+
raise ValueError("search_space must be a dictionary.")
|
|
3754
|
+
if len(search_space) == 0:
|
|
3755
|
+
return None
|
|
3756
|
+
# Check validity of each parameter search space specification
|
|
3757
|
+
for param_name, space in search_space.items():
|
|
3758
|
+
if not isinstance(space, dict) or "min" not in space or "max" not in space or "step" not in space:
|
|
3759
|
+
raise ValueError(
|
|
3760
|
+
f"Search space for '{param_name}' must be a dict with 'min', 'max', and 'step' keys. " f"Got: {space}",
|
|
3761
|
+
)
|
|
3762
|
+
# Validate bounds
|
|
3763
|
+
min_val = space["min"]
|
|
3764
|
+
max_val = space["max"]
|
|
3765
|
+
step = space.get("step", None)
|
|
3766
|
+
if min_val >= max_val:
|
|
3767
|
+
raise ValueError(
|
|
3768
|
+
f"Invalid search bounds for '{param_name}': min ({min_val}) >= max ({max_val}). " f"Require min < max.",
|
|
3769
|
+
)
|
|
3770
|
+
if step is None:
|
|
3771
|
+
raise ValueError(
|
|
3772
|
+
f"Search space for '{param_name}' must include 'step' key. Got: {space}",
|
|
3773
|
+
)
|
|
3774
|
+
if step <= 0:
|
|
3775
|
+
raise ValueError(
|
|
3776
|
+
f"Invalid step size for '{param_name}': step ({step}) must be positive.",
|
|
3777
|
+
)
|
|
3778
|
+
return search_space
|
|
2080
3779
|
|
|
2081
3780
|
|
|
2082
|
-
def
|
|
2083
|
-
"""
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
3781
|
+
def define_gs_parameters(psd_model, fixed_parameters=None, search_space=None):
|
|
3782
|
+
"""Define PSD model parameters for Grid Search optimization routines.
|
|
3783
|
+
|
|
3784
|
+
This function constructs a dictionary of parameter values ready for grid search,
|
|
3785
|
+
converting search space ranges into numpy arrays.
|
|
3786
|
+
|
|
3787
|
+
Parameters
|
|
3788
|
+
----------
|
|
3789
|
+
fixed_parameters : dict, optional
|
|
3790
|
+
Dictionary with parameter names as keys and scalar values as values.
|
|
3791
|
+
Example: {"mu": 2.0}
|
|
3792
|
+
search_space : dict, optional
|
|
3793
|
+
Dictionary defining search ranges for parameters.
|
|
3794
|
+
Each parameter can have:
|
|
3795
|
+
- 'min' : float, Minimum value
|
|
3796
|
+
- 'max' : float, Maximum value
|
|
3797
|
+
- 'step' : float, Step size for linspace interval
|
|
3798
|
+
Example: {"Lambda": {"min": 0, "max": 10, "step": 0.2}}
|
|
3799
|
+
|
|
3800
|
+
Returns
|
|
3801
|
+
-------
|
|
3802
|
+
dict
|
|
3803
|
+
Dictionary with PSD parameter names as keys and values as:
|
|
3804
|
+
- scalar (int or float)
|
|
3805
|
+
- numpy.ndarray for grid search
|
|
3806
|
+
Empty dict if both inputs are None or empty
|
|
3807
|
+
|
|
3808
|
+
"""
|
|
3809
|
+
# Check validity of inputs
|
|
3810
|
+
search_space = check_search_space(search_space=search_space)
|
|
3811
|
+
search_space = check_search_space_parameters(search_space=search_space, psd_model=psd_model)
|
|
3812
|
+
fixed_parameters = check_fixed_parameters(psd_model=psd_model, fixed_parameters=fixed_parameters)
|
|
3813
|
+
|
|
3814
|
+
# Return empty dict if both inputs are empty
|
|
3815
|
+
if (fixed_parameters is None or len(fixed_parameters) == 0) and (search_space is None or len(search_space) == 0):
|
|
3816
|
+
return {}
|
|
3817
|
+
|
|
3818
|
+
# Define parameters dictionary (initialize with None values)
|
|
3819
|
+
required_parameters_dict = {
|
|
3820
|
+
"NormalizedGeneralizedGammaPSD": ["mu", "c", "i", "j"],
|
|
3821
|
+
"NormalizedGammaPSD": ["mu"],
|
|
3822
|
+
"GeneralizedGammaPSD": ["Lambda", "mu", "c"],
|
|
3823
|
+
"LognormalPSD": ["mu", "sigma"],
|
|
3824
|
+
"GammaPSD": ["Lambda", "mu"],
|
|
3825
|
+
"ExponentialPSD": ["Lambda"],
|
|
3826
|
+
}
|
|
3827
|
+
required_parameters = required_parameters_dict[psd_model]
|
|
3828
|
+
parameters = dict.fromkeys(required_parameters)
|
|
3829
|
+
|
|
3830
|
+
# Process fixed_parameters (scalar initial values)
|
|
3831
|
+
if fixed_parameters is not None:
|
|
3832
|
+
for param_name, param_value in fixed_parameters.items():
|
|
3833
|
+
parameters[param_name] = param_value
|
|
3834
|
+
|
|
3835
|
+
# Check if this parameter has a search space range
|
|
3836
|
+
if search_space is not None:
|
|
3837
|
+
for param_name, space in search_space.items():
|
|
3838
|
+
# Extract search space bounds
|
|
3839
|
+
min_val = space["min"]
|
|
3840
|
+
max_val = space["max"]
|
|
3841
|
+
step = space.get("step", None)
|
|
3842
|
+
# Create array of values for this parameter
|
|
3843
|
+
parameters[param_name] = np.arange(min_val, max_val + step, step)
|
|
3844
|
+
else:
|
|
3845
|
+
# Use scalar value
|
|
3846
|
+
parameters[param_name] = param_value
|
|
2090
3847
|
|
|
3848
|
+
return parameters
|
|
2091
3849
|
|
|
3850
|
+
|
|
3851
|
+
# -----------------------------------------------------------------
|
|
3852
|
+
#### - ML
|
|
2092
3853
|
def check_likelihood(likelihood):
|
|
2093
3854
|
"""Check valid likelihood argument."""
|
|
2094
3855
|
valid_likelihood = ["multinomial", "poisson"]
|
|
@@ -2126,6 +3887,8 @@ def check_optimizer(optimizer):
|
|
|
2126
3887
|
return optimizer
|
|
2127
3888
|
|
|
2128
3889
|
|
|
3890
|
+
# -----------------------------------------------------------------
|
|
3891
|
+
#### - MOM
|
|
2129
3892
|
def check_mom_methods(mom_methods, psd_model, allow_none=False):
|
|
2130
3893
|
"""Check valid mom_methods arguments."""
|
|
2131
3894
|
if isinstance(mom_methods, (str, type(None))):
|
|
@@ -2142,6 +3905,21 @@ def check_mom_methods(mom_methods, psd_model, allow_none=False):
|
|
|
2142
3905
|
return mom_methods
|
|
2143
3906
|
|
|
2144
3907
|
|
|
3908
|
+
# -----------------------------------------------------------------
|
|
3909
|
+
#### - WRAPPERS
|
|
3910
|
+
|
|
3911
|
+
|
|
3912
|
+
def check_psd_model(psd_model, optimization):
|
|
3913
|
+
"""Check valid psd_model argument."""
|
|
3914
|
+
valid_psd_models = list(OPTIMIZATION_ROUTINES_DICT[optimization])
|
|
3915
|
+
if psd_model not in valid_psd_models:
|
|
3916
|
+
msg = (
|
|
3917
|
+
f"{optimization} optimization is not available for 'psd_model' {psd_model}. "
|
|
3918
|
+
f"Accepted PSD models are {valid_psd_models}."
|
|
3919
|
+
)
|
|
3920
|
+
raise NotImplementedError(msg)
|
|
3921
|
+
|
|
3922
|
+
|
|
2145
3923
|
def check_optimization(optimization):
|
|
2146
3924
|
"""Check valid optimization argument."""
|
|
2147
3925
|
valid_optimization = list(OPTIMIZATION_ROUTINES_DICT)
|
|
@@ -2152,8 +3930,8 @@ def check_optimization(optimization):
|
|
|
2152
3930
|
return optimization
|
|
2153
3931
|
|
|
2154
3932
|
|
|
2155
|
-
def
|
|
2156
|
-
"""Check valid
|
|
3933
|
+
def check_optimization_settings(optimization_settings, optimization, psd_model):
|
|
3934
|
+
"""Check valid optimization_settings."""
|
|
2157
3935
|
dict_arguments = {
|
|
2158
3936
|
"ML": {
|
|
2159
3937
|
"init_method": None,
|
|
@@ -2163,9 +3941,8 @@ def check_optimization_kwargs(optimization_kwargs, optimization, psd_model):
|
|
|
2163
3941
|
"optimizer": check_optimizer,
|
|
2164
3942
|
},
|
|
2165
3943
|
"GS": {
|
|
2166
|
-
"
|
|
2167
|
-
"
|
|
2168
|
-
"error_order": None,
|
|
3944
|
+
"objectives": check_objectives,
|
|
3945
|
+
"search_space": check_search_space,
|
|
2169
3946
|
},
|
|
2170
3947
|
"MOM": {
|
|
2171
3948
|
"mom_methods": None,
|
|
@@ -2174,38 +3951,55 @@ def check_optimization_kwargs(optimization_kwargs, optimization, psd_model):
|
|
|
2174
3951
|
optimization = check_optimization(optimization)
|
|
2175
3952
|
check_psd_model(psd_model=psd_model, optimization=optimization)
|
|
2176
3953
|
|
|
3954
|
+
# Check fixed parameters validity for optimization="GS"
|
|
3955
|
+
if optimization == "GS":
|
|
3956
|
+
optimization_settings["fixed_parameters"] = check_fixed_parameters(
|
|
3957
|
+
psd_model=psd_model,
|
|
3958
|
+
fixed_parameters=optimization_settings.get("fixed_parameters", None),
|
|
3959
|
+
)
|
|
3960
|
+
|
|
2177
3961
|
# Retrieve the expected arguments for the given optimization method
|
|
2178
3962
|
expected_arguments = dict_arguments.get(optimization, {})
|
|
2179
3963
|
|
|
2180
|
-
# Check for missing arguments in
|
|
2181
|
-
# missing_args = [arg for arg in expected_arguments if arg not in
|
|
3964
|
+
# Check for missing arguments in optimization_settings
|
|
3965
|
+
# missing_args = [arg for arg in expected_arguments if arg not in optimization_settings]
|
|
2182
3966
|
# if missing_args:
|
|
2183
3967
|
# raise ValueError(f"Missing required arguments for {optimization} optimization: {missing_args}")
|
|
2184
3968
|
|
|
2185
3969
|
# Validate arguments values
|
|
2186
3970
|
_ = [
|
|
2187
|
-
check(
|
|
3971
|
+
check(optimization_settings[arg])
|
|
2188
3972
|
for arg, check in expected_arguments.items()
|
|
2189
|
-
if callable(check) and arg in
|
|
3973
|
+
if callable(check) and arg in optimization_settings
|
|
2190
3974
|
]
|
|
2191
3975
|
|
|
2192
3976
|
# Further special checks
|
|
2193
|
-
if optimization == "MOM" and "mom_methods" in
|
|
2194
|
-
_ = check_mom_methods(mom_methods=
|
|
2195
|
-
if optimization == "ML" and
|
|
2196
|
-
_ = check_mom_methods(mom_methods=
|
|
3977
|
+
if optimization == "MOM" and "mom_methods" in optimization_settings:
|
|
3978
|
+
_ = check_mom_methods(mom_methods=optimization_settings["mom_methods"], psd_model=psd_model)
|
|
3979
|
+
if optimization == "ML" and optimization_settings.get("init_method", None) is not None:
|
|
3980
|
+
_ = check_mom_methods(mom_methods=optimization_settings["init_method"], psd_model=psd_model, allow_none=True)
|
|
2197
3981
|
|
|
2198
3982
|
|
|
2199
3983
|
####--------------------------------------------------------------------------------------.
|
|
2200
3984
|
#### Wrappers for fitting
|
|
2201
3985
|
|
|
2202
3986
|
|
|
2203
|
-
def
|
|
3987
|
+
def _format_optimization_settings(settings):
|
|
3988
|
+
if isinstance(settings, dict):
|
|
3989
|
+
return ", ".join(f"{k}: {v}" for k, v in settings.items())
|
|
3990
|
+
if isinstance(settings, list):
|
|
3991
|
+
blocks = []
|
|
3992
|
+
for d in settings:
|
|
3993
|
+
opt_str = _format_optimization_settings(d)
|
|
3994
|
+
blocks.append(opt_str)
|
|
3995
|
+
return " | ".join(blocks)
|
|
3996
|
+
raise TypeError("optimization_settings must be dict or list of dict")
|
|
3997
|
+
|
|
3998
|
+
|
|
3999
|
+
def _finalize_attributes(ds_params, psd_model, optimization, optimization_settings):
|
|
2204
4000
|
ds_params.attrs["disdrodb_psd_model"] = psd_model
|
|
2205
4001
|
ds_params.attrs["disdrodb_psd_optimization"] = optimization
|
|
2206
|
-
ds_params.attrs["
|
|
2207
|
-
[f"{k}: {v}" for k, v in optimization_kwargs.items()],
|
|
2208
|
-
)
|
|
4002
|
+
ds_params.attrs["disdrodb_psd_optimization_settings"] = _format_optimization_settings(optimization_settings)
|
|
2209
4003
|
return ds_params
|
|
2210
4004
|
|
|
2211
4005
|
|
|
@@ -2248,12 +4042,12 @@ def get_mom_parameters(ds: xr.Dataset, psd_model: str, mom_methods=None) -> xr.D
|
|
|
2248
4042
|
ds_params = ds_params.assign_coords({"mom_method": mom_methods})
|
|
2249
4043
|
|
|
2250
4044
|
# Add model attributes
|
|
2251
|
-
|
|
4045
|
+
optimization_settings = {"mom_methods": mom_methods}
|
|
2252
4046
|
ds_params = _finalize_attributes(
|
|
2253
4047
|
ds_params=ds_params,
|
|
2254
4048
|
psd_model=psd_model,
|
|
2255
4049
|
optimization="MOM",
|
|
2256
|
-
|
|
4050
|
+
optimization_settings=optimization_settings,
|
|
2257
4051
|
)
|
|
2258
4052
|
return ds_params
|
|
2259
4053
|
|
|
@@ -2272,14 +4066,16 @@ def get_ml_parameters(
|
|
|
2272
4066
|
|
|
2273
4067
|
Parameters
|
|
2274
4068
|
----------
|
|
2275
|
-
|
|
4069
|
+
ds : xarray.Dataset
|
|
2276
4070
|
Input dataset containing drop number concentration data and diameter information.
|
|
2277
4071
|
It must include the following variables:
|
|
4072
|
+
|
|
2278
4073
|
- ``drop_number_concentration``: The number concentration of drops.
|
|
2279
4074
|
- ``diameter_bin_width``": The width of each diameter bin.
|
|
2280
4075
|
- ``diameter_bin_lower``: The lower bounds of the diameter bins.
|
|
2281
4076
|
- ``diameter_bin_upper``: The upper bounds of the diameter bins.
|
|
2282
4077
|
- ``diameter_bin_center``: The center values of the diameter bins.
|
|
4078
|
+
|
|
2283
4079
|
psd_model : str
|
|
2284
4080
|
The PSD model to fit. See ``available_psd_models()``.
|
|
2285
4081
|
init_method: str or list
|
|
@@ -2339,7 +4135,7 @@ def get_ml_parameters(
|
|
|
2339
4135
|
ds_params = ds_params.assign_coords({"init_method": init_method})
|
|
2340
4136
|
|
|
2341
4137
|
# Add model attributes
|
|
2342
|
-
|
|
4138
|
+
optimization_settings = {
|
|
2343
4139
|
"init_method": init_method,
|
|
2344
4140
|
"probability_method": "probability_method",
|
|
2345
4141
|
"likelihood": likelihood,
|
|
@@ -2350,47 +4146,191 @@ def get_ml_parameters(
|
|
|
2350
4146
|
ds_params=ds_params,
|
|
2351
4147
|
psd_model=psd_model,
|
|
2352
4148
|
optimization="ML",
|
|
2353
|
-
|
|
4149
|
+
optimization_settings=optimization_settings,
|
|
2354
4150
|
)
|
|
2355
4151
|
|
|
2356
4152
|
# Return dataset with parameters
|
|
2357
4153
|
return ds_params
|
|
2358
4154
|
|
|
2359
4155
|
|
|
2360
|
-
def get_gs_parameters(ds, psd_model,
|
|
2361
|
-
"""
|
|
2362
|
-
# Check valid psd_model
|
|
2363
|
-
check_psd_model(psd_model, optimization="GS")
|
|
4156
|
+
def get_gs_parameters(ds, psd_model, fixed_parameters=None, objectives=None, search_space=None, return_loss=False):
|
|
4157
|
+
"""Estimate PSD model parameters using Grid Search optimization with multiple objectives.
|
|
2364
4158
|
|
|
2365
|
-
|
|
2366
|
-
|
|
4159
|
+
This function estimates particle size distribution (PSD) model parameters
|
|
4160
|
+
by minimizing a weighted combination of errors across multiple objectives through
|
|
4161
|
+
grid search over the parameter space.
|
|
2367
4162
|
|
|
2368
|
-
|
|
2369
|
-
|
|
4163
|
+
Parameters
|
|
4164
|
+
----------
|
|
4165
|
+
ds : xarray.Dataset
|
|
4166
|
+
Input dataset containing PSD observations. Must include:
|
|
4167
|
+
|
|
4168
|
+
- ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
4169
|
+
- ``diameter_bin_center`` : Diameter bin centers [mm]
|
|
4170
|
+
- ``diameter_bin_width`` : Diameter bin widths [mm]
|
|
4171
|
+
- ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if any objective targets 'R')
|
|
4172
|
+
|
|
4173
|
+
psd_model : str
|
|
4174
|
+
Name of the PSD model to fit. Valid options are:
|
|
4175
|
+
|
|
4176
|
+
- ``"GammaPSD"`` : Gamma distribution
|
|
4177
|
+
- ``"NormalizedGammaPSD"`` : Normalized gamma distribution
|
|
4178
|
+
- ``"LognormalPSD"`` : Lognormal distribution
|
|
4179
|
+
- ``"ExponentialPSD"`` : Exponential distribution
|
|
4180
|
+
- ``"NormalizedGeneralizedGammaPSD"`` : Normalized generalized gamma distribution
|
|
4181
|
+
|
|
4182
|
+
objectives : list of dict
|
|
4183
|
+
List of optimization objectives. Each objective dict must contain:
|
|
4184
|
+
|
|
4185
|
+
- ``"target"`` : str
|
|
4186
|
+
Target quantity to optimize. Valid options:
|
|
4187
|
+
|
|
4188
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
4189
|
+
- ``"H(x)"`` : Normalized drop number concentration [-]. Only for Normalized PSD models.
|
|
4190
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
4191
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
4192
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
4193
|
+
- ``"M<p>"`` : Moment of order p
|
|
4194
|
+
|
|
4195
|
+
- ``"transformation"`` : str
|
|
4196
|
+
Transformation applied before computing the error. Valid options:
|
|
4197
|
+
|
|
4198
|
+
- ``"identity"`` : No transformation
|
|
4199
|
+
- ``"log"`` : Logarithmic transformation
|
|
4200
|
+
- ``"sqrt"`` : Square root transformation
|
|
4201
|
+
|
|
4202
|
+
- ``"censoring"`` : str
|
|
4203
|
+
Censoring applied to observed PSD. Valid options:
|
|
4204
|
+
|
|
4205
|
+
- ``"none"`` : No censoring applied
|
|
4206
|
+
- ``"left"`` : Left-censored (remove leading zero bins)
|
|
4207
|
+
- ``"right"`` : Right-censored (remove trailing zero bins)
|
|
4208
|
+
- ``"both"`` : Both sides censored
|
|
4209
|
+
|
|
4210
|
+
- ``"loss"`` : str
|
|
4211
|
+
Error metric.
|
|
4212
|
+
For ``"N(D)"`` and ``"H(x)"`` valid options are ``"SSE"``, ``"SAE"``,
|
|
4213
|
+
``"MAE"``, ``"MSE"``, ``"RMSE"``, ``"relMAE"``
|
|
4214
|
+
``"KLDiv"``, ``"JSD"``, ``"WD"``, ``"KS"``.
|
|
4215
|
+
|
|
4216
|
+
For ``"R"``, ``"Z"``, ``"LWC"``, and ``"M<p>"`` valid options are
|
|
4217
|
+
``"AE"``, ``"SE"``.
|
|
4218
|
+
|
|
4219
|
+
- ``"loss_weight"`` : float, optional
|
|
4220
|
+
Weight for this objective in the combined loss (default: 1.0 for single objective).
|
|
4221
|
+
When multiple objectives are provided, weights are normalized to sum to 1.0.
|
|
4222
|
+
|
|
4223
|
+
fixed_parameters : dict, optional
|
|
4224
|
+
Initial parameter values for the PSD model. Keys are parameter names,
|
|
4225
|
+
values are scalars. Example: {"mu": 2.0, "Lambda": 1.5}
|
|
4226
|
+
search_space : dict, optional
|
|
4227
|
+
Search space configuration for parameters. Each parameter can define:
|
|
4228
|
+
|
|
4229
|
+
- ``"min"`` : float, Minimum value
|
|
4230
|
+
- ``"max"`` : float, Maximum value
|
|
4231
|
+
- ``"step"`` : float, Step size for parameter grid
|
|
2370
4232
|
|
|
2371
|
-
|
|
4233
|
+
Example: {"mu": {"min": 0, "max": 10, "step": 0.2}}
|
|
4234
|
+
|
|
4235
|
+
Returns
|
|
4236
|
+
-------
|
|
4237
|
+
ds_params : xarray.Dataset
|
|
4238
|
+
Dataset containing the estimated PSD model parameters.
|
|
4239
|
+
Variables depend on the selected ``psd_model``:
|
|
4240
|
+
|
|
4241
|
+
- ``GammaPSD`` : ``N0``, ``mu``, ``Lambda``
|
|
4242
|
+
- ``NormalizedGammaPSD`` : ``Nw``, ``mu``, ``D50``
|
|
4243
|
+
- ``LognormalPSD`` : ``Nt``, ``mu``, ``sigma``
|
|
4244
|
+
- ``ExponentialPSD`` : ``N0``, ``Lambda``
|
|
4245
|
+
- ``NormalizedGeneralizedGammaPSD`` : ``Nc``, ``Dc``, ``mu``, ``c``
|
|
4246
|
+
|
|
4247
|
+
Each parameter variable includes attributes with name, units, and description.
|
|
4248
|
+
Dataset attributes contain optimization metadata.
|
|
4249
|
+
|
|
4250
|
+
Raises
|
|
4251
|
+
------
|
|
4252
|
+
ValueError
|
|
4253
|
+
If objectives structure is invalid or fixed_parameters/search_space bounds are invalid
|
|
4254
|
+
NotImplementedError
|
|
4255
|
+
If psd_model is not supported for GS optimization
|
|
4256
|
+
|
|
4257
|
+
Notes
|
|
4258
|
+
-----
|
|
4259
|
+
Grid search optimization explores a predefined parameter space to find
|
|
4260
|
+
the combination that minimizes the specified loss across all objectives.
|
|
4261
|
+
When multiple objectives are provided, losses are combined using normalized weights.
|
|
4262
|
+
|
|
4263
|
+
If ``drop_number_concentration`` values are all zeros or contain
|
|
4264
|
+
non-finite values, the output PSD parameters are set to NaN.
|
|
4265
|
+
|
|
4266
|
+
Examples
|
|
4267
|
+
--------
|
|
4268
|
+
Single objective optimization:
|
|
4269
|
+
|
|
4270
|
+
>>> objectives = [{
|
|
4271
|
+
... "target": "N(D)",
|
|
4272
|
+
... "transformation": "log",
|
|
4273
|
+
... "censoring": "none",
|
|
4274
|
+
... "loss": "MAE"
|
|
4275
|
+
... }]
|
|
4276
|
+
>>> ds_params = get_gs_parameters(ds, psd_model="GammaPSD", objectives=objectives)
|
|
4277
|
+
|
|
4278
|
+
Multi-objective optimization:
|
|
4279
|
+
|
|
4280
|
+
>>> objectives = [
|
|
4281
|
+
... {
|
|
4282
|
+
... "target": "N(D)",
|
|
4283
|
+
... "transformation": "identity",
|
|
4284
|
+
... "censoring": "left",
|
|
4285
|
+
... "loss": "MAE",
|
|
4286
|
+
... "loss_weight": 0.6
|
|
4287
|
+
... },
|
|
4288
|
+
... {
|
|
4289
|
+
... "target": "LWC",
|
|
4290
|
+
... "transformation": "log",
|
|
4291
|
+
... "censoring": "both",
|
|
4292
|
+
... "loss": "AE",
|
|
4293
|
+
... "loss_weight": 0.4
|
|
4294
|
+
... }
|
|
4295
|
+
... ]
|
|
4296
|
+
>>> search_space = {
|
|
4297
|
+
... "mu": {"min": 0, "max": 10, "step": 0.2},
|
|
4298
|
+
... "Lambda": {"min": 0.1, "max": 5, "step": 0.1}
|
|
4299
|
+
... }
|
|
4300
|
+
>>> ds_params = get_gs_parameters(
|
|
4301
|
+
... ds, psd_model="GammaPSD", search_space=search_space
|
|
4302
|
+
... )
|
|
4303
|
+
"""
|
|
4304
|
+
# Validate inputs
|
|
4305
|
+
check_psd_model(psd_model, optimization="GS")
|
|
4306
|
+
objectives = check_objectives(objectives)
|
|
4307
|
+
if objectives is None:
|
|
4308
|
+
objectives = DEFAULT_OBJECTIVES
|
|
4309
|
+
|
|
4310
|
+
# Define PSD model parameters (scalars or arrays for grid search)
|
|
4311
|
+
parameters = define_gs_parameters(
|
|
4312
|
+
psd_model=psd_model,
|
|
4313
|
+
fixed_parameters=fixed_parameters,
|
|
4314
|
+
search_space=search_space,
|
|
4315
|
+
)
|
|
4316
|
+
|
|
4317
|
+
# Ensure fall velocity is available if any objective needs it (e.g., R target)
|
|
2372
4318
|
if "fall_velocity" not in ds:
|
|
2373
4319
|
ds["fall_velocity"] = get_rain_fall_velocity_from_ds(ds)
|
|
2374
4320
|
|
|
2375
|
-
# Retrieve
|
|
4321
|
+
# Retrieve model-specific grid search function
|
|
2376
4322
|
func = OPTIMIZATION_ROUTINES_DICT["GS"][psd_model]
|
|
2377
4323
|
|
|
2378
|
-
#
|
|
2379
|
-
ds_params = func(ds,
|
|
4324
|
+
# Call model-specific function with unpacked parameters, objectives, and return_loss=False
|
|
4325
|
+
ds_params = func(ds, **parameters, objectives=objectives, return_loss=return_loss)
|
|
2380
4326
|
|
|
2381
|
-
#
|
|
2382
|
-
optimization_kwargs = {
|
|
2383
|
-
"target": target,
|
|
2384
|
-
"transformation": transformation,
|
|
2385
|
-
"error_order": error_order,
|
|
2386
|
-
}
|
|
4327
|
+
# Finalize dataset attributes with optimization metadata
|
|
2387
4328
|
ds_params = _finalize_attributes(
|
|
2388
4329
|
ds_params=ds_params,
|
|
2389
4330
|
psd_model=psd_model,
|
|
2390
4331
|
optimization="GS",
|
|
2391
|
-
|
|
4332
|
+
optimization_settings=objectives,
|
|
2392
4333
|
)
|
|
2393
|
-
# Return dataset with parameters
|
|
2394
4334
|
return ds_params
|
|
2395
4335
|
|
|
2396
4336
|
|
|
@@ -2417,14 +4357,142 @@ def estimate_model_parameters(
|
|
|
2417
4357
|
ds,
|
|
2418
4358
|
psd_model,
|
|
2419
4359
|
optimization,
|
|
2420
|
-
|
|
4360
|
+
optimization_settings=None,
|
|
2421
4361
|
):
|
|
2422
|
-
"""
|
|
4362
|
+
"""Estimate particle size distribution model parameters.
|
|
4363
|
+
|
|
4364
|
+
This is the main interface function for fitting PSD models to observed data.
|
|
4365
|
+
It supports three optimization methods: Maximum Likelihood (ML), Method of
|
|
4366
|
+
Moments (MOM), and Grid Search (GS).
|
|
4367
|
+
|
|
4368
|
+
Parameters
|
|
4369
|
+
----------
|
|
4370
|
+
ds : xarray.Dataset
|
|
4371
|
+
Input dataset containing PSD observations. Must include:
|
|
4372
|
+
|
|
4373
|
+
- ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
4374
|
+
- ``diameter_bin_center`` : Diameter bin centers [mm]
|
|
4375
|
+
- ``diameter_bin_width`` : Diameter bin widths [mm]
|
|
4376
|
+
|
|
4377
|
+
Additional variables required for specific optimization methods:
|
|
4378
|
+
|
|
4379
|
+
- For ML: ``diameter_bin_lower``, ``diameter_bin_upper``
|
|
4380
|
+
- For GS with target='R': ``fall_velocity`` (auto-computed if missing)
|
|
4381
|
+
- For MOM: Moment variables ``M0``, ``M1``, ..., ``M6`` (depending on method)
|
|
4382
|
+
psd_model : str
|
|
4383
|
+
Name of the PSD model to fit. Valid options:
|
|
4384
|
+
|
|
4385
|
+
- ``"GammaPSD"`` : Gamma distribution
|
|
4386
|
+
- ``"NormalizedGammaPSD"`` : Normalized gamma distribution
|
|
4387
|
+
- ``"LognormalPSD"`` : Lognormal distribution
|
|
4388
|
+
- ``"ExponentialPSD"`` : Exponential distribution
|
|
4389
|
+
|
|
4390
|
+
Use ``available_optimization(psd_model)`` to check which optimization
|
|
4391
|
+
methods are available for a given model.
|
|
4392
|
+
optimization : str
|
|
4393
|
+
Optimization method to use. Valid options:
|
|
4394
|
+
|
|
4395
|
+
- ``"ML"`` : Maximum Likelihood estimation
|
|
4396
|
+
- ``"MOM"`` : Method of Moments
|
|
4397
|
+
- ``"GS"`` : Grid Search
|
|
4398
|
+
optimization_settings : dict, optional
|
|
4399
|
+
Dictionary of keyword arguments specific to the chosen optimization method.
|
|
4400
|
+
|
|
4401
|
+
For ``optimization="ML"``:
|
|
4402
|
+
|
|
4403
|
+
- ``init_method`` : str or list, Method(s) of moments for parameter initialization
|
|
4404
|
+
- ``probability_method`` : str, Method to compute probabilities (default: 'cdf')
|
|
4405
|
+
- ``likelihood`` : str, Likelihood function ('multinomial' or 'poisson', default: 'multinomial')
|
|
4406
|
+
- ``truncated_likelihood`` : bool, Use truncated likelihood (default: True)
|
|
4407
|
+
- ``optimizer`` : str, Optimization algorithm (default: 'Nelder-Mead')
|
|
4408
|
+
|
|
4409
|
+
For ``optimization="GS"``:
|
|
4410
|
+
|
|
4411
|
+
- ``fixed_parameters`` : dict, optional
|
|
4412
|
+
Allows to specify PSD model parameters to fixed value(s).
|
|
4413
|
+
For example for psd_model=GammaPSD one can use fixed_parameters={"mu": 3}
|
|
4414
|
+
For psd_model=NormalizedGeneralizedGammaPSD, it's mandatory to
|
|
4415
|
+
specify i and j moment order with: fixed_parameters={"i": 3, "j": 4}
|
|
4416
|
+
- ``objectives``: dict, optional
|
|
4417
|
+
List of optimization objectives. If None (default), use DEFAULT_OBJECTIVES.
|
|
4418
|
+
Each objective dict must contain:
|
|
4419
|
+
|
|
4420
|
+
- ``"target"`` : str
|
|
4421
|
+
Target quantity to optimize. Valid options:
|
|
4422
|
+
- ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
|
|
4423
|
+
- ``"H(x)"`` : Normalized drop number concentration [-]. Only for Normalized PSD models.
|
|
4424
|
+
- ``"R"`` : Rain rate [mm h⁻¹]
|
|
4425
|
+
- ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
|
|
4426
|
+
- ``"LWC"`` : Liquid water content [g m⁻³]
|
|
4427
|
+
- ``"M<p>"`` : Moment of order p
|
|
4428
|
+
- ``"transformation"`` : str
|
|
4429
|
+
Transformation applied before computing the error. Valid options:
|
|
4430
|
+
- ``"identity"`` : No transformation
|
|
4431
|
+
- ``"log"`` : Logarithmic transformation
|
|
4432
|
+
- ``"sqrt"`` : Square root transformation
|
|
4433
|
+
- ``"censoring"`` : str
|
|
4434
|
+
Censoring applied to observed PSD. Valid options:
|
|
4435
|
+
- ``"none"`` : No censoring applied
|
|
4436
|
+
- ``"left"`` : Left-censored (remove leading zero bins)
|
|
4437
|
+
- ``"right"`` : Right-censored (remove trailing zero bins)
|
|
4438
|
+
- ``"both"`` : Both sides censored
|
|
4439
|
+
- ``"loss"`` : str
|
|
4440
|
+
Error metric.
|
|
4441
|
+
For ``"N(D)"`` and ``"H(x)"`` valid options are
|
|
4442
|
+
``"SSE"``, ``"SAE"``, ``"MAE"``, ``"MSE"``, ``"RMSE"``, ``"relMAE"``
|
|
4443
|
+
``"KLDiv"``, ``"JSD"``, ``"WD"``, ``"KS"``.
|
|
4444
|
+
For ``"R"``, ``"Z"``, ``"LWC"``, and ``"M<p>"`` valid options are
|
|
4445
|
+
``"AE"``, ``"SE"``.
|
|
4446
|
+
- ``"loss_weight"`` : float, optional
|
|
4447
|
+
Weight for this objective in the combined loss (default: 1.0 for single objective).
|
|
4448
|
+
When multiple objectives are provided, weights are normalized to sum to 1.0.
|
|
4449
|
+
|
|
4450
|
+
|
|
4451
|
+
- ``search_space``, dict, optional
|
|
4452
|
+
Search space configuration for parameters. If None (default), use reasonable defaults.
|
|
4453
|
+
|
|
4454
|
+
Each parameter can define:
|
|
4455
|
+
|
|
4456
|
+
- ``"min"`` : float, Minimum value
|
|
4457
|
+
- ``"max"`` : float, Maximum value
|
|
4458
|
+
- ``"step"`` : float, Step size for parameter grid
|
|
4459
|
+
|
|
4460
|
+
Example:
|
|
4461
|
+
{"mu": {"min": 0, "max": 10, "step": 0.2},
|
|
4462
|
+
"Lambda": {"min": 0.1, "max": 5, "step": 0.1}}
|
|
4463
|
+
|
|
4464
|
+
For ``optimization="MOM"``:
|
|
4465
|
+
|
|
4466
|
+
- ``mom_methods`` : str or list, Method(s) of moments to use (e.g., 'M234')
|
|
4467
|
+
|
|
4468
|
+
Returns
|
|
4469
|
+
-------
|
|
4470
|
+
ds_params : xarray.Dataset
|
|
4471
|
+
Dataset containing the estimated PSD model parameters with attributes.
|
|
4472
|
+
Variables depend on the selected ``psd_model``:
|
|
4473
|
+
|
|
4474
|
+
- ``GammaPSD`` : ``N0``, ``mu``, ``Lambda``
|
|
4475
|
+
- ``NormalizedGammaPSD`` : ``Nw``, ``mu``, ``Dm``
|
|
4476
|
+
- ``LognormalPSD`` : ``Nt``, ``mu``, ``sigma``
|
|
4477
|
+
- ``ExponentialPSD`` : ``N0``, ``Lambda``
|
|
4478
|
+
|
|
4479
|
+
Each parameter variable includes attributes with parameter name, units,
|
|
4480
|
+
and optimization metadata.
|
|
4481
|
+
|
|
4482
|
+
Dataset attributes include:
|
|
4483
|
+
|
|
4484
|
+
- ``disdrodb_psd_model`` : The fitted PSD model name
|
|
4485
|
+
- ``disdrodb_psd_optimization`` : The optimization method used
|
|
4486
|
+
- ``disdrodb_psd_optimization_settings`` : String representation of kwargs
|
|
4487
|
+
"""
|
|
2423
4488
|
# Check inputs arguments
|
|
2424
|
-
|
|
4489
|
+
optimization_settings = {} if optimization_settings is None else optimization_settings
|
|
2425
4490
|
optimization = check_optimization(optimization)
|
|
2426
|
-
|
|
2427
|
-
|
|
4491
|
+
check_optimization_settings(
|
|
4492
|
+
optimization_settings=optimization_settings,
|
|
4493
|
+
optimization=optimization,
|
|
4494
|
+
psd_model=psd_model,
|
|
4495
|
+
)
|
|
2428
4496
|
# Check N(D)
|
|
2429
4497
|
# --> If all 0, set to np.nan
|
|
2430
4498
|
# --> If any is not finite --> set to np.nan
|
|
@@ -2441,7 +4509,7 @@ def estimate_model_parameters(
|
|
|
2441
4509
|
func = dict_func[optimization]
|
|
2442
4510
|
|
|
2443
4511
|
# Retrieve parameters
|
|
2444
|
-
ds_params = func(ds, psd_model=psd_model, **
|
|
4512
|
+
ds_params = func(ds, psd_model=psd_model, **optimization_settings)
|
|
2445
4513
|
|
|
2446
4514
|
# Add parameters attributes (and units)
|
|
2447
4515
|
for var, attrs in ATTRS_PARAMS_DICT[psd_model].items():
|