disdrodb 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. disdrodb/__init__.py +4 -0
  2. disdrodb/_version.py +2 -2
  3. disdrodb/accessor/methods.py +14 -0
  4. disdrodb/api/checks.py +8 -7
  5. disdrodb/api/io.py +81 -29
  6. disdrodb/api/path.py +17 -14
  7. disdrodb/api/search.py +15 -18
  8. disdrodb/cli/disdrodb_open_products_options.py +38 -0
  9. disdrodb/cli/disdrodb_run.py +2 -2
  10. disdrodb/cli/disdrodb_run_station.py +4 -4
  11. disdrodb/configs.py +1 -1
  12. disdrodb/data_transfer/download_data.py +70 -1
  13. disdrodb/etc/configs/attributes.yaml +62 -8
  14. disdrodb/etc/configs/encodings.yaml +28 -0
  15. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_SSE.yaml +8 -0
  16. disdrodb/etc/products/L2M/MODELS/GAMMA_ML.yaml +1 -1
  17. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_SSE.yaml +8 -0
  18. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_SSE.yaml +8 -0
  19. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +1 -1
  20. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_SSE.yaml +8 -0
  21. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_SSE.yaml +8 -0
  22. disdrodb/etc/products/L2M/global.yaml +4 -4
  23. disdrodb/fall_velocity/graupel.py +8 -8
  24. disdrodb/fall_velocity/hail.py +2 -2
  25. disdrodb/fall_velocity/rain.py +33 -5
  26. disdrodb/issue/checks.py +1 -1
  27. disdrodb/l0/l0_reader.py +1 -1
  28. disdrodb/l0/l0a_processing.py +2 -2
  29. disdrodb/l0/l0b_nc_processing.py +5 -5
  30. disdrodb/l0/l0b_processing.py +20 -24
  31. disdrodb/l0/l0c_processing.py +18 -13
  32. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +4 -0
  33. disdrodb/l0/readers/PARSIVEL2/VIETNAM/IGE_PARSIVEL2.py +239 -0
  34. disdrodb/l0/template_tools.py +13 -13
  35. disdrodb/l1/classification.py +10 -6
  36. disdrodb/l2/empirical_dsd.py +25 -15
  37. disdrodb/l2/processing.py +32 -14
  38. disdrodb/metadata/download.py +1 -1
  39. disdrodb/metadata/geolocation.py +4 -4
  40. disdrodb/metadata/reader.py +3 -3
  41. disdrodb/metadata/search.py +10 -8
  42. disdrodb/psd/__init__.py +4 -0
  43. disdrodb/psd/fitting.py +2660 -592
  44. disdrodb/psd/gof_metrics.py +389 -0
  45. disdrodb/psd/grid_search.py +1066 -0
  46. disdrodb/psd/models.py +1281 -145
  47. disdrodb/routines/l2.py +6 -6
  48. disdrodb/routines/options_validation.py +8 -8
  49. disdrodb/scattering/axis_ratio.py +70 -2
  50. disdrodb/scattering/permittivity.py +13 -10
  51. disdrodb/scattering/routines.py +10 -10
  52. disdrodb/summary/routines.py +23 -20
  53. disdrodb/utils/archiving.py +29 -22
  54. disdrodb/utils/attrs.py +6 -4
  55. disdrodb/utils/dataframe.py +4 -4
  56. disdrodb/utils/encoding.py +3 -1
  57. disdrodb/utils/event.py +9 -9
  58. disdrodb/utils/logger.py +4 -7
  59. disdrodb/utils/manipulations.py +2 -2
  60. disdrodb/utils/subsetting.py +1 -1
  61. disdrodb/utils/time.py +8 -7
  62. disdrodb/viz/plots.py +25 -17
  63. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/METADATA +44 -33
  64. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/RECORD +68 -66
  65. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/entry_points.txt +1 -0
  66. disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +0 -6
  67. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +0 -6
  68. disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +0 -6
  69. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_LOG_ND_MAE.yaml +0 -6
  70. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_ND_MAE.yaml +0 -6
  71. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +0 -6
  72. disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_Z_MAE.yaml +0 -6
  73. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/WHEEL +0 -0
  74. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/licenses/LICENSE +0 -0
  75. {disdrodb-0.5.0.dist-info → disdrodb-0.5.1.dist-info}/top_level.txt +0 -0
disdrodb/psd/fitting.py CHANGED
@@ -16,6 +16,8 @@
16
16
  # -----------------------------------------------------------------------------.
17
17
  """Routines for PSD fitting."""
18
18
 
19
+ import copy
20
+
19
21
  import numpy as np
20
22
  import scipy.stats as ss
21
23
  import xarray as xr
@@ -31,7 +33,19 @@ from disdrodb.l2.empirical_dsd import (
31
33
  get_normalized_intercept_parameter_from_moments,
32
34
  get_total_number_concentration,
33
35
  )
34
- from disdrodb.psd.models import ExponentialPSD, GammaPSD, LognormalPSD, NormalizedGammaPSD
36
+ from disdrodb.psd.grid_search import (
37
+ check_objectives,
38
+ check_transformation,
39
+ compute_weighted_loss,
40
+ )
41
+ from disdrodb.psd.models import (
42
+ ExponentialPSD,
43
+ GammaPSD,
44
+ GeneralizedGammaPSD,
45
+ LognormalPSD,
46
+ NormalizedGammaPSD,
47
+ NormalizedGeneralizedGammaPSD,
48
+ )
35
49
  from disdrodb.utils.manipulations import get_diameter_bin_edges
36
50
  from disdrodb.utils.warnings import suppress_warnings
37
51
 
@@ -57,117 +71,6 @@ from disdrodb.utils.warnings import suppress_warnings
57
71
  # - LogNormal,Exponential, Gamma: Nt
58
72
  # --> get_total_number_concentration(drop_number_concentration, diameter_bin_width)
59
73
 
60
-
61
- ####--------------------------------------------------------------------------------------.
62
- #### Goodness of fit (GOF)
63
- def compute_gof_stats(obs, pred, dim=DIAMETER_DIMENSION):
64
- """
65
- Compute various goodness-of-fit (GoF) statistics between obs and predicted values.
66
-
67
- Parameters
68
- ----------
69
- obs: xarray.DataArray
70
- Observations DataArray with at least dimension ``dim``.
71
- pred: xarray.DataArray
72
- Predictions DataArray with at least dimension ``dim``.
73
- dim: str
74
- DataArray dimension over which to compute GOF statistics.
75
- The default is DIAMETER_DIMENSION.
76
-
77
- Returns
78
- -------
79
- ds: xarray.Dataset
80
- Dataset containing the computed GoF statistics.
81
- """
82
- from disdrodb.l2.empirical_dsd import get_mode_diameter
83
-
84
- # Retrieve diameter and diameter bin width
85
- diameter = obs["diameter_bin_center"]
86
- diameter_bin_width = obs["diameter_bin_width"]
87
-
88
- # Compute errors
89
- error = obs - pred
90
-
91
- # Compute max obs and pred
92
- obs_max = obs.max(dim=dim, skipna=False)
93
- pred_max = pred.max(dim=dim, skipna=False)
94
-
95
- # Compute NaN mask
96
- mask_nan = np.logical_or(np.isnan(obs_max), np.isnan(pred_max))
97
-
98
- # Compute GOF statistics
99
- with suppress_warnings():
100
- # Compute Pearson Correlation
101
- pearson_r = xr.corr(obs, pred, dim=dim)
102
-
103
- # Compute Mean Absolute Error (MAE)
104
- mae = np.abs(error).mean(dim=dim, skipna=False)
105
-
106
- # Compute maximum absolute error
107
- max_error = np.abs(error).max(dim=dim, skipna=False)
108
- relative_max_error = xr.where(max_error == 0, 0, xr.where(obs_max == 0, np.nan, max_error / obs_max))
109
-
110
- # Compute deviation of N(D) at distribution mode
111
- mode_deviation = obs_max - pred_max
112
- mode_relative_deviation = xr.where(
113
- mode_deviation == 0,
114
- 0,
115
- xr.where(obs_max == 0, np.nan, mode_deviation / obs_max),
116
- )
117
-
118
- # Compute diameter difference of the distribution mode
119
- diameter_mode_pred = get_mode_diameter(pred, diameter)
120
- diameter_mode_obs = get_mode_diameter(obs, diameter)
121
- diameter_mode_deviation = diameter_mode_obs - diameter_mode_pred
122
-
123
- # Compute difference in total number concentration
124
- total_number_concentration_obs = (obs * diameter_bin_width).sum(dim=dim, skipna=False)
125
- total_number_concentration_pred = (pred * diameter_bin_width).sum(dim=dim, skipna=False)
126
- total_number_concentration_difference = total_number_concentration_pred - total_number_concentration_obs
127
-
128
- # Compute Kullback-Leibler divergence
129
- # - Compute pdf per bin
130
- pk_pdf = obs / total_number_concentration_obs
131
- qk_pdf = pred / total_number_concentration_pred
132
-
133
- # - Compute probabilities per bin
134
- pk = pk_pdf * diameter_bin_width
135
- pk = pk / pk.sum(dim=dim, skipna=False) # this might not be necessary
136
- qk = qk_pdf * diameter_bin_width
137
- qk = qk / qk.sum(dim=dim, skipna=False) # this might not be necessary
138
-
139
- # - Compute log probability ratio
140
- epsilon = 1e-10
141
- pk = xr.where(pk == 0, epsilon, pk)
142
- qk = xr.where(qk == 0, epsilon, qk)
143
- log_prob_ratio = np.log(pk / qk)
144
- log_prob_ratio = log_prob_ratio.where(np.isfinite(log_prob_ratio))
145
-
146
- # - Compute divergence
147
- kl_divergence = (pk * log_prob_ratio).sum(dim=dim, skipna=False)
148
- kl_divergence = xr.where((error == 0).all(dim=dim), 0, kl_divergence)
149
-
150
- # Create an xarray.Dataset to hold the computed statistics
151
- ds = xr.Dataset(
152
- {
153
- "R2": pearson_r**2, # Squared Pearson correlation coefficient
154
- "MAE": mae, # Mean Absolute Error
155
- "MaxAE": max_error, # Maximum Absolute Error
156
- "RelMaxAE": relative_max_error, # Relative Maximum Absolute Error
157
- "PeakDiff": mode_deviation, # Difference at distribution peak
158
- "RelPeakDiff": mode_relative_deviation, # Relative difference at peak
159
- "DmodeDiff": diameter_mode_deviation, # Difference in mode diameters
160
- "NtDiff": total_number_concentration_difference,
161
- "KLDiv": kl_divergence, # Kullback-Leibler divergence
162
- },
163
- )
164
- # Round
165
- ds = ds.round(2)
166
- # Mask where input obs or pred is NaN
167
- ds = ds.where(~mask_nan)
168
- return ds
169
-
170
-
171
74
  ####--------------------------------------------------------------------------------------.
172
75
  #### Maximum Likelihood (ML)
173
76
 
@@ -186,8 +89,9 @@ def get_expected_probabilities(params, cdf_func, pdf_func, bin_edges, probabilit
186
89
  Probability density function (PDF) that takes a value and parameters as inputs.
187
90
  bin_edges : array-like
188
91
  Edges of the bins for which to compute the probabilities.
189
- probability_method : {'cdf', 'pdf'}
190
- Method to compute the probabilities. If 'cdf', use the CDF to compute probabilities.
92
+ probability_method : str
93
+ Method to compute the probabilities. Valid values are 'cdf' and 'pdf'.
94
+ If 'cdf', use the CDF to compute probabilities.
191
95
  If 'pdf', integrate the PDF over each bin range.
192
96
  normalized : bool, optional
193
97
  If True, normalize the probabilities to sum to 1. Default is False.
@@ -365,7 +269,7 @@ def estimate_lognormal_parameters(
365
269
 
366
270
  References
367
271
  ----------
368
- .. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.lognorm.html#scipy.stats.lognorm
272
+ https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.lognorm.html#scipy.stats.lognorm
369
273
  """
370
274
  # Definite initial guess for the parameters
371
275
  scale = np.exp(mu) # mu = np.log(scale)
@@ -477,12 +381,13 @@ def estimate_exponential_parameters(
477
381
  Notes
478
382
  -----
479
383
  The exponential distribution is defined as:
480
- N(D) = N0 * exp(-Lambda * D) = Nt * Lambda * exp(-Lambda * D)
384
+
385
+ N(D) = N0 * exp(-Lambda * D) = Nt * Lambda * exp(-Lambda * D)
481
386
  where Lambda = 1 / scale and N0 = Nt * Lambda.
482
387
 
483
388
  References
484
389
  ----------
485
- .. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.expon.html
390
+ https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.expon.html
486
391
  """
487
392
  # Definite initial guess for parameters
488
393
  scale = 1 / Lambda
@@ -558,8 +463,7 @@ def estimate_gamma_parameters(
558
463
  output_dictionary=True,
559
464
  optimizer="Nelder-Mead",
560
465
  ):
561
- """
562
- Estimate the parameters of a gamma distribution given histogram data.
466
+ r"""Estimate the parameters of a gamma distribution given histogram data.
563
467
 
564
468
  Parameters
565
469
  ----------
@@ -598,13 +502,29 @@ def estimate_gamma_parameters(
598
502
  Notes
599
503
  -----
600
504
  The gamma distribution is defined as:
601
- N(D) = N0 * D**mu * exp(-Lambda*D)
602
- where Lambda = 1/scale, and mu = a - 1 with ``a`` being the shape parameter of the gamma distribution.
603
- N0 is defined as N0 = Nt*Lambda**(mu+1)/gamma(mu+1).
505
+
506
+ .. math::
507
+
508
+ N(D) = N_0 \, D^{\mu} \, \exp(-\Lambda D)
509
+
510
+ where:
511
+
512
+ - :math:`D` is the particle diameter,
513
+ - :math:`\Lambda = 1 / \text{scale}` is the slope parameter,
514
+ - :math:`\mu = a - 1` is the shape parameter, with :math:`a` the gamma distribution shape parameter.
515
+
516
+ The intercept parameter :math:`N_0` is defined as:
517
+
518
+ .. math::
519
+
520
+ N_0 = N_t \, \frac{\Lambda^{\mu + 1}}{\Gamma(\mu + 1)}
521
+
522
+ where :math:`N_t` is the total number concentration and
523
+ :math:`\Gamma(\cdot)` denotes the gamma function.
604
524
 
605
525
  References
606
526
  ----------
607
- .. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html
527
+ https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html
608
528
 
609
529
  """
610
530
  # Define initial guess for parameters
@@ -764,20 +684,20 @@ def get_gamma_parameters(
764
684
  truncated_likelihood=True,
765
685
  optimizer="Nelder-Mead",
766
686
  ):
767
- """
768
- Estimate gamma distribution parameters for drop size distribution (DSD) data.
687
+ """Estimate gamma distribution parameters for drop size distribution (DSD) data.
769
688
 
770
689
  Parameters
771
690
  ----------
772
691
  ds : xarray.Dataset
773
692
  Input dataset containing drop size distribution data. It must include the following variables:
693
+
774
694
  - ``drop_number_concentration``: The number concentration of drops.
775
695
  - ``diameter_bin_width``": The width of each diameter bin.
776
696
  - ``diameter_bin_lower``: The lower bounds of the diameter bins.
777
697
  - ``diameter_bin_upper``: The upper bounds of the diameter bins.
778
698
  - ``diameter_bin_center``: The center values of the diameter bins.
779
- - The moments M0...M6 variables required to compute the initial parameters
780
- with the specified mom_method.
699
+ - The moments M0...M6 variables required to compute the initial parameters with the specified mom_method.
700
+
781
701
  init_method: str or list
782
702
  The method(s) of moments used to initialize the gamma parameters.
783
703
  If None (or 'None'), the scale parameter is set to 1 and mu to 0 (a=1).
@@ -795,9 +715,11 @@ def get_gamma_parameters(
795
715
  -------
796
716
  xarray.Dataset
797
717
  Dataset containing the estimated gamma distribution parameters:
718
+
798
719
  - ``N0``: Intercept parameter.
799
720
  - ``mu``: Shape parameter.
800
721
  - ``Lambda``: Scale parameter.
722
+
801
723
  The dataset will also have an attribute ``disdrodb_psd_model`` set to ``GammaPSD``.
802
724
 
803
725
  Notes
@@ -872,12 +794,14 @@ def get_lognormal_parameters(
872
794
  Parameters
873
795
  ----------
874
796
  ds : xarray.Dataset
875
- Input dataset containing drop size distribution data. It must include the following variables:
876
- - ``drop_number_concentration``: The number concentration of drops.
877
- - ``diameter_bin_width``": The width of each diameter bin.
878
- - ``diameter_bin_lower``: The lower bounds of the diameter bins.
879
- - ``diameter_bin_upper``: The upper bounds of the diameter bins.
880
- - ``diameter_bin_center``: The center values of the diameter bins.
797
+ Input dataset containing drop size distribution data. It must include the following variables:
798
+
799
+ - ``drop_number_concentration``: The number concentration of drops.
800
+ - ``diameter_bin_width``: The width of each diameter bin.
801
+ - ``diameter_bin_lower``: The lower bounds of the diameter bins.
802
+ - ``diameter_bin_upper``: The upper bounds of the diameter bins.
803
+ - ``diameter_bin_center``: The center values of the diameter bins.
804
+
881
805
  probability_method : str, optional
882
806
  Method to compute probabilities. The default value is ``cdf``.
883
807
  likelihood : str, optional
@@ -891,9 +815,11 @@ def get_lognormal_parameters(
891
815
  -------
892
816
  xarray.Dataset
893
817
  Dataset containing the estimated lognormal distribution parameters:
818
+
894
819
  - ``Nt``: Total number concentration.
895
820
  - ``mu``: Mean of the lognormal distribution.
896
821
  - ``sigma``: Standard deviation of the lognormal distribution.
822
+
897
823
  The resulting dataset will have an attribute ``disdrodb_psd_model`` set to ``LognormalPSD``.
898
824
 
899
825
  Notes
@@ -1038,407 +964,1150 @@ def get_exponential_parameters(
1038
964
  #### Grid Search (GS)
1039
965
 
1040
966
 
1041
- def _compute_rain_rate(ND, D, dD, V):
1042
- axis = 1 if ND.ndim == 2 else None
1043
- rain_rate = np.pi / 6 * np.sum(ND * V * (D / 1000) ** 3 * dD, axis=axis) * 3600 * 1000
1044
- return rain_rate # mm/h
1045
-
1046
-
1047
- def _compute_lwc(ND, D, dD, rho_w=1000):
1048
- axis = 1 if ND.ndim == 2 else None
1049
- lwc = np.pi / 6.0 * (rho_w * 1000) * np.sum((D / 1000) ** 3 * ND * dD, axis=axis)
1050
- return lwc # g/m3
1051
-
967
+ DEFAULT_OBJECTIVES = [
968
+ {
969
+ "target": "N(D)",
970
+ "transformation": "identity",
971
+ "loss": "SSE",
972
+ "censoring": "none",
973
+ "loss_weight": 0.8,
974
+ },
975
+ {
976
+ "target": "Z",
977
+ "transformation": "identity",
978
+ "loss": "AE",
979
+ "censoring": "none",
980
+ "loss_weight": 0.2,
981
+ },
982
+ ]
1052
983
 
1053
- def _compute_z(ND, D, dD):
1054
- axis = 1 if ND.ndim == 2 else None
1055
- z = np.sum(((D) ** 6 * ND * dD), axis=axis) # mm⁶·m⁻³
1056
- Z = 10 * np.log10(z)
1057
- return Z
1058
984
 
985
+ def apply_exponential_gs(
986
+ Nt,
987
+ ND_obs,
988
+ V,
989
+ # Coords
990
+ D,
991
+ dD,
992
+ # PSD parameters
993
+ Lambda,
994
+ # Optimization options
995
+ objectives,
996
+ # Output options
997
+ return_loss=False,
998
+ ):
999
+ """Estimate ExponentialPSD model parameters using Grid Search.
1059
1000
 
1060
- def _compute_target_variable_error(target, ND_obs, ND_preds, D, dD, V, relative=False, eps=1e-12):
1061
- # Compute observed and predicted target variables
1062
- if target == "Z":
1063
- obs = _compute_z(ND_obs, D, dD)
1064
- pred = _compute_z(ND_preds, D, dD)
1065
- elif target == "R":
1066
- obs = _compute_rain_rate(ND_obs, D, dD, V)
1067
- pred = _compute_rain_rate(ND_preds, D, dD, V)
1068
- else: # "LWC"
1069
- obs = _compute_lwc(ND_obs, D, dD)
1070
- pred = _compute_lwc(ND_preds, D, dD)
1001
+ This function performs a grid search optimization to find the best parameters
1002
+ (N0, Lambda) for the ExponentialPSD model by minimizing a weighted
1003
+ cost function across one or more objectives.
1071
1004
 
1072
- # Absolute error
1073
- abs_error = np.abs(obs - pred)
1005
+ Parameters
1006
+ ----------
1007
+ Nt : float
1008
+ Total number concentration.
1009
+ ND_obs : numpy.ndarray
1010
+ Observed PSD data [#/mm/m3].
1011
+ V : numpy.ndarray
1012
+ Fall velocity [m/s].
1013
+ D : numpy.ndarray
1014
+ Diameter bins [mm].
1015
+ dD : numpy.ndarray
1016
+ Diameter bin widths [mm].
1017
+ Lambda : int, float or numpy.ndarray
1018
+ Lambda parameter values to search.
1019
+ objectives: list of dict
1020
+ target : str, optional
1021
+ Target quantity to optimize. Valid options:
1022
+
1023
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
1024
+ - ``"R"`` : Rain rate [mm h⁻¹]
1025
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
1026
+ - ``"LWC"`` : Liquid water content [g m⁻³]
1027
+ - ``"M<p>"`` : Moment of order p
1028
+
1029
+ transformation : str, optional
1030
+ Transformation applied to the target quantity before computing the loss.
1031
+ Valid options:
1032
+
1033
+ - ``"identity"`` : No transformation
1034
+ - ``"log"`` : Logarithmic transformation
1035
+ - ``"sqrt"`` : Square root transformation
1036
+
1037
+ censoring : str
1038
+ Specifies whether the observed particle size distribution (PSD) is
1039
+ treated as censored at the edges of the diameter range due to
1040
+ instrumental sensitivity limits:
1041
+
1042
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
1043
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
1044
+ the spectrum where the observed number concentration is zero are
1045
+ removed prior to cost-function evaluation.
1046
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
1047
+ the spectrum where the observed number concentration is zero are
1048
+ removed prior to cost-function evaluation.
1049
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
1050
+ range of diameter bins with non-zero observed concentrations is
1051
+ retained.
1052
+
1053
+ loss : int, optional
1054
+ Loss function.
1055
+ If target is ``"N(D)"``, valid options are:
1056
+
1057
+ - ``SSE``: Sum of Squared Errors
1058
+ - ``SAE``: Sum of Absolute Errors
1059
+ - ``MAE``: Mean Absolute Error
1060
+ - ``MSE``: Mean Squared Error
1061
+ - ``RMSE``: Root Mean Squared Error
1062
+ - ``relMAE``: Relative Mean Absolute Error
1063
+ - ``KLDiv``: Kullback-Leibler Divergence
1064
+ - ``WD``: Wasserstein Distance
1065
+ - ``JSD``: Jensen-Shannon Distance
1066
+ - ``KS``: Kolmogorov-Smirnov Statistic
1067
+
1068
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
1069
+ - ``AE``: Absolute Error
1070
+ - ``SE``: Squared Error
1071
+
1072
+ loss_weight: int, optional
1073
+ Weight of this objective when multiple objectives are used.
1074
+ Must be specified if more than one objective is specified.
1075
+ return_loss : bool, optional
1076
+ If True, return both the loss surface and parameters.
1077
+ Default is False.
1074
1078
 
1075
- # Return relative error if requested
1076
- if relative:
1077
- return abs_error / (np.abs(obs) + eps)
1079
+ Returns
1080
+ -------
1081
+ parameters : numpy.ndarray
1082
+ Best parameters as [N0, Lambda].
1083
+ An array of NaN values is returned if no valid solution is found.
1084
+ total_loss : numpy.ndarray, optional
1085
+ 1D array of total loss values.
1086
+ Only returned if return_loss=True.
1078
1087
 
1079
- return abs_error
1088
+ Notes
1089
+ -----
1090
+ When multiple objectives are provided, losses are normalized and weighted.
1091
+ The best parameters correspond to the minimum total weighted loss.
1092
+ """
1093
+ # Ensure input is numpy array
1094
+ Nt = np.asarray(Nt)
1095
+ ND_obs = np.asarray(ND_obs)
1096
+ V = np.asarray(V)
1080
1097
 
1098
+ # Convert lambda to array if needed
1099
+ if not isinstance(Lambda, np.ndarray):
1100
+ Lambda = np.atleast_1d(Lambda)
1081
1101
 
1082
- def _compute_cost_function(ND_obs, ND_preds, D, dD, V, target, transformation, error_order):
1083
- # Assume ND_obs of shape (D bins) and ND_preds of shape (# params, D bins)
1084
- if target == "ND":
1085
- if transformation == "identity":
1086
- errors = np.mean(np.abs(ND_obs[None, :] - ND_preds) ** error_order, axis=1)
1087
- return errors
1088
- if transformation == "log":
1089
- errors = np.mean(np.abs(np.log(ND_obs[None, :] + 1) - np.log(ND_preds + 1)) ** error_order, axis=1)
1090
- return errors
1091
- if transformation == "sqrt":
1092
- errors = np.mean(np.abs(np.sqrt(ND_obs[None, :]) - np.sqrt(ND_preds)) ** error_order, axis=1)
1093
- return errors
1094
- # if target in ["Z", "R", "LWC"]:
1095
- return _compute_target_variable_error(target, ND_obs, ND_preds, D, dD, V)
1102
+ # Perform grid search
1103
+ with suppress_warnings():
1104
+ # Compute N(D)
1105
+ N0_arr = Nt * Lambda
1106
+ ND_preds = ExponentialPSD.formula(D=D[None, :], N0=N0_arr[:, None], Lambda=Lambda[:, None])
1096
1107
 
1108
+ # Compute loss
1109
+ total_loss = compute_weighted_loss(
1110
+ ND_obs=ND_obs,
1111
+ ND_preds=ND_preds,
1112
+ D=D,
1113
+ dD=dD,
1114
+ V=V,
1115
+ objectives=objectives,
1116
+ )
1097
1117
 
1098
- def define_param_range(center, step, bounds, factor=2, refinement=20):
1099
- """
1100
- Create a refined parameter search range around a center value, constrained to bounds.
1118
+ # Define best parameters
1119
+ if not np.all(np.isnan(total_loss)):
1120
+ best_index = np.nanargmin(total_loss)
1121
+ N0 = N0_arr[best_index].item()
1122
+ Lambda_best = Lambda[best_index].item()
1123
+ parameters = np.array([N0, Lambda_best])
1124
+ else:
1125
+ parameters = np.array([np.nan, np.nan])
1101
1126
 
1102
- Parameters
1103
- ----------
1104
- center : float
1105
- Center of the range (e.g., current best estimate).
1106
- step : float
1107
- Coarse step size used in the first search.
1108
- bounds : tuple of (float, float)
1109
- Lower and upper bounds (can include -np.inf, np.inf).
1110
- factor : float, optional
1111
- How wide the refined range extends from the center (in multiples of step).
1112
- Default = 2.
1113
- refinement : int, optional
1114
- Factor to refine the step size (smaller step = finer grid).
1115
- Default = 20.
1127
+ # If asked, return cost function
1128
+ if return_loss:
1129
+ return parameters, total_loss
1116
1130
 
1117
- Returns
1118
- -------
1119
- np.ndarray
1120
- Array of values constrained to bounds.
1121
- """
1122
- lower = max(center - factor * step, bounds[0])
1123
- upper = min(center + factor * step, bounds[1])
1124
- new_step = step / refinement
1125
- return np.arange(lower, upper, new_step)
1131
+ return parameters
1126
1132
 
1127
1133
 
1128
- def apply_exponential_gs(
1134
+ def apply_gamma_gs(
1129
1135
  Nt,
1130
1136
  ND_obs,
1131
1137
  V,
1132
1138
  # Coords
1133
1139
  D,
1134
1140
  dD,
1135
- # Error options
1136
- target,
1137
- transformation,
1138
- error_order,
1141
+ # PSD parameters
1142
+ mu,
1143
+ Lambda,
1144
+ # Optimization options
1145
+ objectives,
1146
+ # Output options
1147
+ return_loss=False,
1139
1148
  ):
1140
- """Apply Grid Search for the ExponentialPSD distribution."""
1141
- # Define set of mu values
1142
- lambda_arr = np.arange(0.01, 20, step=0.01)
1149
+ """Estimate GammaPSD model parameters using Grid Search.
1150
+
1151
+ This function performs a grid search optimization to find the best parameters
1152
+ (mu, Lambda) for the GammaPSD model by minimizing a weighted
1153
+ cost function across one or more objectives.
1154
+
1155
+ Parameters
1156
+ ----------
1157
+ Nt : float
1158
+ Total number concentration.
1159
+ ND_obs : numpy.ndarray
1160
+ Observed PSD data [#/mm/m3].
1161
+ V : numpy.ndarray
1162
+ Fall velocity [m/s].
1163
+ D : numpy.ndarray
1164
+ Diameter bins [mm].
1165
+ dD : numpy.ndarray
1166
+ Diameter bin widths [mm].
1167
+ mu : int, float or numpy.ndarray
1168
+ mu parameter values to search.
1169
+ Lambda : int, float or numpy.ndarray
1170
+ Lambda parameter values to search.
1171
+ objectives: list of dict
1172
+ target : str, optional
1173
+ Target quantity to optimize. Valid options:
1174
+
1175
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
1176
+ - ``"R"`` : Rain rate [mm h⁻¹]
1177
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
1178
+ - ``"LWC"`` : Liquid water content [g m⁻³]
1179
+ - ``"M<p>"`` : Moment of order p
1180
+
1181
+ transformation : str, optional
1182
+ Transformation applied to the target quantity before computing the loss.
1183
+ Valid options:
1184
+
1185
+ - ``"identity"`` : No transformation
1186
+ - ``"log"`` : Logarithmic transformation
1187
+ - ``"sqrt"`` : Square root transformation
1188
+
1189
+ censoring : str
1190
+ Specifies whether the observed particle size distribution (PSD) is
1191
+ treated as censored at the edges of the diameter range due to
1192
+ instrumental sensitivity limits:
1193
+
1194
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
1195
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
1196
+ the spectrum where the observed number concentration is zero are
1197
+ removed prior to cost-function evaluation.
1198
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
1199
+ the spectrum where the observed number concentration is zero are
1200
+ removed prior to cost-function evaluation.
1201
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
1202
+ range of diameter bins with non-zero observed concentrations is
1203
+ retained.
1204
+
1205
+ loss : int, optional
1206
+ Loss function.
1207
+ If target is ``"N(D)"``, valid options are:
1208
+
1209
+ - ``SSE``: Sum of Squared Errors
1210
+ - ``SAE``: Sum of Absolute Errors
1211
+ - ``MAE``: Mean Absolute Error
1212
+ - ``MSE``: Mean Squared Error
1213
+ - ``RMSE``: Root Mean Squared Error
1214
+ - ``relMAE``: Relative Mean Absolute Error
1215
+ - ``KLDiv``: Kullback-Leibler Divergence
1216
+ - ``WD``: Wasserstein Distance
1217
+ - ``JSD``: Jensen-Shannon Distance
1218
+ - ``KS``: Kolmogorov-Smirnov Statistic
1219
+
1220
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
1221
+
1222
+ - ``AE``: Absolute Error
1223
+ - ``SE``: Squared Error
1224
+
1225
+ loss_weight: int, optional
1226
+ Weight of this objective when multiple objectives are used.
1227
+ Must be specified if more than one objective is specified.
1228
+ return_loss : bool, optional
1229
+ If True, return both the loss surface and parameters.
1230
+ Default is False.
1231
+
1232
+ Returns
1233
+ -------
1234
+ parameters : numpy.ndarray
1235
+ Best parameters as [N0, Lambda, mu].
1236
+ An array of NaN values is returned if no valid solution is found.
1237
+ total_loss : numpy.ndarray, optional
1238
+ 2D array of total loss values reshaped to (len(mu), len(Lambda)).
1239
+ Only returned if return_loss=True.
1240
+
1241
+ Notes
1242
+ -----
1243
+ When multiple objectives are provided, losses are normalized and weighted
1244
+ The best parameters correspond to the minimum total weighted loss.
1245
+ """
1246
+ # Ensure input is numpy array
1247
+ Nt = np.asarray(Nt)
1248
+ ND_obs = np.asarray(ND_obs)
1249
+ V = np.asarray(V)
1250
+
1251
+ # Define combinations of parameters for grid search
1252
+ mu_grid, Lambda_grid = np.meshgrid(
1253
+ mu,
1254
+ Lambda,
1255
+ indexing="xy",
1256
+ )
1257
+ mu_arr = mu_grid.ravel()
1258
+ Lambda_arr = Lambda_grid.ravel()
1143
1259
 
1144
1260
  # Perform grid search
1145
1261
  with suppress_warnings():
1146
- # Compute ND
1147
- N0_arr = Nt * lambda_arr
1148
- ND_preds = ExponentialPSD.formula(D=D[None, :], N0=N0_arr[:, None], Lambda=lambda_arr[:, None])
1262
+ # Compute N(D)
1263
+ N0 = np.exp(np.log(Nt) + (mu_arr[:, None] + 1) * np.log(Lambda_arr[:, None]) - gammaln(mu_arr[:, None] + 1))
1264
+ ND_preds = GammaPSD.formula(D=D[None, :], N0=N0, Lambda=Lambda_arr[:, None], mu=mu_arr[:, None])
1149
1265
 
1150
- # Compute errors
1151
- errors = _compute_cost_function(
1266
+ # Compute loss
1267
+ total_loss = compute_weighted_loss(
1152
1268
  ND_obs=ND_obs,
1153
1269
  ND_preds=ND_preds,
1154
1270
  D=D,
1155
1271
  dD=dD,
1156
1272
  V=V,
1157
- target=target,
1158
- transformation=transformation,
1159
- error_order=error_order,
1273
+ objectives=objectives,
1160
1274
  )
1161
- # Replace inf with NaN
1162
- errors[~np.isfinite(errors)] = np.nan
1163
1275
 
1164
- # If all invalid, return NaN parameters
1165
- if np.all(np.isnan(errors)):
1166
- return np.array([np.nan, np.nan])
1276
+ # Define best parameters
1277
+ if not np.all(np.isnan(total_loss)):
1278
+ best_index = np.nanargmin(total_loss)
1279
+ N0_best = N0[best_index].item()
1280
+ mu_best = mu_arr[best_index].item()
1281
+ Lambda_best = Lambda_arr[best_index].item()
1282
+ parameters = np.array([N0_best, Lambda_best, mu_best])
1283
+ else:
1284
+ parameters = np.array([np.nan, np.nan, np.nan])
1285
+
1286
+ # If asked, return cost function
1287
+ if return_loss:
1288
+ total_loss = total_loss.reshape(mu_grid.shape)
1289
+ return parameters, total_loss
1290
+
1291
+ return parameters
1292
+
1293
+
1294
+ def apply_generalized_gamma_gs(
1295
+ Nt,
1296
+ ND_obs,
1297
+ V,
1298
+ # Coords
1299
+ D,
1300
+ dD,
1301
+ # PSD parameters
1302
+ mu,
1303
+ c,
1304
+ Lambda,
1305
+ # Optimization options
1306
+ objectives,
1307
+ # Output options
1308
+ return_loss=False,
1309
+ ):
1310
+ """Estimate GeneralizedGammaPSD model parameters using Grid Search.
1311
+
1312
+ This function performs a grid search optimization to find the best parameters
1313
+ (mu, c, Lambda) for the GeneralizedGammaPSD model by minimizing a weighted
1314
+ cost function across one or more objectives.
1315
+
1316
+ Parameters
1317
+ ----------
1318
+ Nt : float
1319
+ Total number concentration.
1320
+ ND_obs : numpy.ndarray
1321
+ Observed PSD data [#/mm/m3].
1322
+ V : numpy.ndarray
1323
+ Fall velocity [m/s].
1324
+ D : numpy.ndarray
1325
+ Diameter bins [mm].
1326
+ dD : numpy.ndarray
1327
+ Diameter bin widths [mm].
1328
+ mu : int, float or numpy.ndarray
1329
+ mu parameter values to search.
1330
+ c : int, float or numpy.ndarray
1331
+ c parameter values to search.
1332
+ Lambda : int, float or numpy.ndarray
1333
+ Lambda parameter values to search.
1334
+ objectives: list of dict
1335
+ target : str, optional
1336
+ Target quantity to optimize. Valid options:
1337
+
1338
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
1339
+ - ``"R"`` : Rain rate [mm h⁻¹]
1340
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
1341
+ - ``"LWC"`` : Liquid water content [g m⁻³]
1342
+ - ``"M<p>"`` : Moment of order p
1343
+
1344
+ transformation : str, optional
1345
+ Transformation applied to the target quantity before computing the loss.
1346
+ Valid options:
1347
+
1348
+ - ``"identity"`` : No transformation
1349
+ - ``"log"`` : Logarithmic transformation
1350
+ - ``"sqrt"`` : Square root transformation
1351
+
1352
+ censoring : str
1353
+ Specifies whether the observed particle size distribution (PSD) is
1354
+ treated as censored at the edges of the diameter range due to
1355
+ instrumental sensitivity limits:
1356
+
1357
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
1358
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
1359
+ the spectrum where the observed number concentration is zero are
1360
+ removed prior to cost-function evaluation.
1361
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
1362
+ the spectrum where the observed number concentration is zero are
1363
+ removed prior to cost-function evaluation.
1364
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
1365
+ range of diameter bins with non-zero observed concentrations is
1366
+ retained.
1367
+
1368
+ loss : int, optional
1369
+ Loss function.
1370
+ If target is ``"N(D)"``, valid options are:
1371
+
1372
+ - ``SSE``: Sum of Squared Errors
1373
+ - ``SAE``: Sum of Absolute Errors
1374
+ - ``MAE``: Mean Absolute Error
1375
+ - ``MSE``: Mean Squared Error
1376
+ - ``RMSE``: Root Mean Squared Error
1377
+ - ``relMAE``: Relative Mean Absolute Error
1378
+ - ``KLDiv``: Kullback-Leibler Divergence
1379
+ - ``WD``: Wasserstein Distance
1380
+ - ``JSD``: Jensen-Shannon Distance
1381
+ - ``KS``: Kolmogorov-Smirnov Statistic
1382
+
1383
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
1384
+
1385
+ - ``AE``: Absolute Error
1386
+ - ``SE``: Squared Error
1387
+
1388
+ loss_weight: int, optional
1389
+ Weight of this objective when multiple objectives are used.
1390
+ Must be specified if more than one objective is specified.
1391
+ return_loss : bool, optional
1392
+ If True, return both the loss surface and parameters.
1393
+ Default is False.
1167
1394
 
1168
- # Otherwise, choose the best index
1169
- best_index = np.nanargmin(errors)
1170
- return np.array([N0_arr[best_index].item(), lambda_arr[best_index].item()])
1395
+ Returns
1396
+ -------
1397
+ parameters : numpy.ndarray
1398
+ Best parameters as [Lambda, mu, c].
1399
+ An array of NaN values is returned if no valid solution is found.
1400
+ total_loss : numpy.ndarray, optional
1401
+ 3D array of total loss values reshaped to (len(mu), len(Lambda), len(c)).
1402
+ Only returned if return_loss=True.
1171
1403
 
1404
+ Notes
1405
+ -----
1406
+ When multiple objectives are provided, losses are normalized and weighted.
1407
+ The best parameters correspond to the minimum total weighted loss.
1408
+ """
1409
+ # Ensure input is numpy array
1410
+ Nt = np.asarray(Nt)
1411
+ ND_obs = np.asarray(ND_obs)
1412
+ V = np.asarray(V)
1172
1413
 
1173
- def _apply_gamma_gs(mu_values, lambda_values, Nt, ND_obs, D, dD, V, target, transformation, error_order):
1174
- """Routine for GammaPSD parameters grid search."""
1175
1414
  # Define combinations of parameters for grid search
1176
- combo = np.meshgrid(mu_values, lambda_values, indexing="xy")
1177
- mu_arr = combo[0].ravel()
1178
- lambda_arr = combo[1].ravel()
1415
+ mu_grid, Lambda_grid, c_grid = np.meshgrid(
1416
+ mu,
1417
+ Lambda,
1418
+ c,
1419
+ indexing="xy",
1420
+ )
1421
+ mu_arr = mu_grid.ravel()
1422
+ Lambda_arr = Lambda_grid.ravel()
1423
+ c_arr = c_grid.ravel()
1179
1424
 
1180
1425
  # Perform grid search
1181
1426
  with suppress_warnings():
1182
- # Compute ND
1183
- N0 = np.exp(np.log(Nt) + (mu_arr[:, None] + 1) * np.log(lambda_arr[:, None]) - gammaln(mu_arr[:, None] + 1))
1184
- ND_preds = GammaPSD.formula(D=D[None, :], N0=N0, Lambda=lambda_arr[:, None], mu=mu_arr[:, None])
1427
+ # Compute N(D)
1428
+ ND_preds = GeneralizedGammaPSD.formula(
1429
+ D=D[None, :],
1430
+ Nt=Nt,
1431
+ Lambda=Lambda_arr[:, None],
1432
+ mu=mu_arr[:, None],
1433
+ c=c_arr[:, None],
1434
+ )
1185
1435
 
1186
- # Compute errors
1187
- errors = _compute_cost_function(
1436
+ # Compute loss
1437
+ total_loss = compute_weighted_loss(
1188
1438
  ND_obs=ND_obs,
1189
1439
  ND_preds=ND_preds,
1190
1440
  D=D,
1191
1441
  dD=dD,
1192
1442
  V=V,
1193
- target=target,
1194
- transformation=transformation,
1195
- error_order=error_order,
1443
+ objectives=objectives,
1196
1444
  )
1197
1445
 
1198
- # Replace inf with NaN
1199
- errors[~np.isfinite(errors)] = np.nan
1446
+ # Define best parameters
1447
+ if not np.all(np.isnan(total_loss)):
1448
+ best_index = np.nanargmin(total_loss)
1449
+ mu_best = mu_arr[best_index].item()
1450
+ c_best = c_arr[best_index].item()
1451
+ Lambda_best = Lambda_arr[best_index].item()
1452
+ parameters = np.array([Nt, Lambda_best, mu_best, c_best])
1453
+ else:
1454
+ parameters = np.array([np.nan, np.nan, np.nan, np.nan])
1200
1455
 
1201
- # If all invalid, return NaN parameters
1202
- if np.all(np.isnan(errors)):
1203
- return np.array([np.nan, np.nan, np.nan])
1456
+ # If asked, return cost function
1457
+ if return_loss:
1458
+ total_loss = total_loss.reshape(mu_grid.shape)
1459
+ return parameters, total_loss
1204
1460
 
1205
- # Otherwise, choose the best index
1206
- best_index = np.nanargmin(errors)
1207
- return N0[best_index].item(), mu_arr[best_index].item(), lambda_arr[best_index].item()
1461
+ return parameters
1208
1462
 
1209
1463
 
1210
- def apply_gamma_gs(
1464
+ def apply_lognormal_gs(
1211
1465
  Nt,
1212
1466
  ND_obs,
1213
1467
  V,
1214
1468
  # Coords
1215
1469
  D,
1216
1470
  dD,
1217
- # Error options
1218
- target,
1219
- transformation,
1220
- error_order,
1471
+ # PSD parameters
1472
+ mu,
1473
+ sigma,
1474
+ # Optimization options
1475
+ objectives,
1476
+ # Output options
1477
+ return_loss=False,
1221
1478
  ):
1222
- """Estimate GammaPSD model parameters using Grid Search."""
1223
- # Define parameters bounds
1224
- mu_bounds = (-1, 40)
1225
- lambda_bounds = (0, 60)
1226
-
1227
- # Define initial set of parameters
1228
- mu_step = 0.25
1229
- lambda_step = 0.5
1230
- mu_values = np.arange(0, 40, step=mu_step)
1231
- lambda_values = np.arange(0, 60, step=lambda_step)
1232
-
1233
- # First round of GS
1234
- N0, mu, Lambda = _apply_gamma_gs(
1235
- mu_values=mu_values,
1236
- lambda_values=lambda_values,
1237
- Nt=Nt,
1238
- ND_obs=ND_obs,
1239
- D=D,
1240
- dD=dD,
1241
- V=V,
1242
- target=target,
1243
- transformation=transformation,
1244
- error_order=error_order,
1245
- )
1246
- if np.isnan(N0): # if np.nan, return immediately
1247
- return np.array([N0, mu, Lambda])
1248
-
1249
- # Second round of GS
1250
- mu_values = define_param_range(mu, mu_step, bounds=mu_bounds)
1251
- lambda_values = define_param_range(Lambda, lambda_step, bounds=lambda_bounds)
1252
-
1253
- N0, mu, Lambda = _apply_gamma_gs(
1254
- mu_values=mu_values,
1255
- lambda_values=lambda_values,
1256
- Nt=Nt,
1257
- ND_obs=ND_obs,
1258
- D=D,
1259
- dD=dD,
1260
- V=V,
1261
- target=target,
1262
- transformation=transformation,
1263
- error_order=error_order,
1264
- )
1479
+ """Estimate LognormalPSD model parameters using Grid Search.
1480
+
1481
+ This function performs a grid search optimization to find the best parameters
1482
+ (mu, sigma) for the LognormalPSD model by minimizing a weighted
1483
+ cost function across one or more objectives.
1484
+
1485
+ Parameters
1486
+ ----------
1487
+ Nt : float
1488
+ Total number concentration.
1489
+ ND_obs : numpy.ndarray
1490
+ Observed PSD data [#/mm/m3].
1491
+ V : numpy.ndarray
1492
+ Fall velocity [m/s].
1493
+ D : numpy.ndarray
1494
+ Diameter bins [mm].
1495
+ dD : numpy.ndarray
1496
+ Diameter bin widths [mm].
1497
+ mu : int, float or numpy.ndarray
1498
+ mu parameter values to search.
1499
+ sigma : int, float or numpy.ndarray
1500
+ sigma parameter values to search.
1501
+ objectives: list of dict
1502
+ target : str, optional
1503
+ Target quantity to optimize. Valid options:
1504
+
1505
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
1506
+ - ``"R"`` : Rain rate [mm h⁻¹]
1507
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
1508
+ - ``"LWC"`` : Liquid water content [g m⁻³]
1509
+ - ``"M<p>"`` : Moment of order p
1510
+
1511
+ transformation : str, optional
1512
+ Transformation applied to the target quantity before computing the loss.
1513
+ Valid options:
1514
+
1515
+ - ``"identity"`` : No transformation
1516
+ - ``"log"`` : Logarithmic transformation
1517
+ - ``"sqrt"`` : Square root transformation
1518
+
1519
+ censoring : str
1520
+ Specifies whether the observed particle size distribution (PSD) is
1521
+ treated as censored at the edges of the diameter range due to
1522
+ instrumental sensitivity limits:
1523
+
1524
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
1525
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
1526
+ the spectrum where the observed number concentration is zero are
1527
+ removed prior to cost-function evaluation.
1528
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
1529
+ the spectrum where the observed number concentration is zero are
1530
+ removed prior to cost-function evaluation.
1531
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
1532
+ range of diameter bins with non-zero observed concentrations is
1533
+ retained.
1534
+
1535
+ loss : int, optional
1536
+ Loss function.
1537
+ If target is ``"N(D)"``, valid options are:
1538
+
1539
+ - ``SSE``: Sum of Squared Errors
1540
+ - ``SAE``: Sum of Absolute Errors
1541
+ - ``MAE``: Mean Absolute Error
1542
+ - ``MSE``: Mean Squared Error
1543
+ - ``RMSE``: Root Mean Squared Error
1544
+ - ``relMAE``: Relative Mean Absolute Error
1545
+ - ``KLDiv``: Kullback-Leibler Divergence
1546
+ - ``WD``: Wasserstein Distance
1547
+ - ``JSD``: Jensen-Shannon Distance
1548
+ - ``KS``: Kolmogorov-Smirnov Statistic
1549
+
1550
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
1551
+
1552
+ - ``AE``: Absolute Error
1553
+ - ``SE``: Squared Error
1554
+
1555
+ loss_weight: int, optional
1556
+ Weight of this objective when multiple objectives are used.
1557
+ Must be specified if more than one objective is specified.
1558
+ return_loss : bool, optional
1559
+ If True, return both the loss surface and parameters.
1560
+ Default is False.
1265
1561
 
1266
- return np.array([N0, mu, Lambda])
1562
+ Returns
1563
+ -------
1564
+ parameters : numpy.ndarray
1565
+ Best parameters as [mu, sigma].
1566
+ An array of NaN values is returned if no valid solution is found.
1567
+ total_loss : numpy.ndarray, optional
1568
+ 2D array of total loss values reshaped to (len(mu), len(sigma)).
1569
+ Only returned if return_loss=True.
1267
1570
 
1571
+ Notes
1572
+ -----
1573
+ When multiple objectives are provided, losses are normalized and weighted.
1574
+ The best parameters correspond to the minimum total weighted loss
1575
+ """
1576
+ # Ensure input is numpy array
1577
+ Nt = np.asarray(Nt)
1578
+ ND_obs = np.asarray(ND_obs)
1579
+ V = np.asarray(V)
1268
1580
 
1269
- def _apply_lognormal_gs(mu_values, sigma_values, Nt, ND_obs, D, dD, V, target, transformation, error_order):
1270
- """Routine for LognormalPSD parameters grid search."""
1271
1581
  # Define combinations of parameters for grid search
1272
- combo = np.meshgrid(mu_values, sigma_values, indexing="xy")
1273
- mu_arr = combo[0].ravel()
1274
- sigma_arr = combo[1].ravel()
1582
+ mu_grid, sigma_grid = np.meshgrid(
1583
+ mu,
1584
+ sigma,
1585
+ indexing="xy",
1586
+ )
1587
+ mu_arr = mu_grid.ravel()
1588
+ sigma_arr = sigma_grid.ravel()
1275
1589
 
1276
1590
  # Perform grid search
1277
1591
  with suppress_warnings():
1278
- # Compute ND
1592
+ # Compute N(D)
1279
1593
  ND_preds = LognormalPSD.formula(D=D[None, :], Nt=Nt, mu=mu_arr[:, None], sigma=sigma_arr[:, None])
1280
1594
 
1281
- # Compute errors
1282
- errors = _compute_cost_function(
1595
+ # Compute loss
1596
+ total_loss = compute_weighted_loss(
1283
1597
  ND_obs=ND_obs,
1284
1598
  ND_preds=ND_preds,
1285
1599
  D=D,
1286
1600
  dD=dD,
1287
1601
  V=V,
1288
- target=target,
1289
- transformation=transformation,
1290
- error_order=error_order,
1602
+ objectives=objectives,
1291
1603
  )
1292
1604
 
1293
- # Replace inf with NaN
1294
- errors[~np.isfinite(errors)] = np.nan
1605
+ # Define best parameters
1606
+ if not np.all(np.isnan(total_loss)):
1607
+ best_index = np.nanargmin(total_loss)
1608
+ mu_best = mu_arr[best_index].item()
1609
+ sigma_best = sigma_arr[best_index].item()
1610
+ parameters = np.array([Nt, mu_best, sigma_best])
1611
+ else:
1612
+ parameters = np.array([np.nan, np.nan, np.nan])
1295
1613
 
1296
- # If all invalid, return NaN parameters
1297
- if np.all(np.isnan(errors)):
1298
- return np.array([np.nan, np.nan, np.nan])
1614
+ # If asked, return cost function
1615
+ if return_loss:
1616
+ total_loss = total_loss.reshape(mu_grid.shape)
1617
+ return parameters, total_loss
1299
1618
 
1300
- # Otherwise, choose the best index
1301
- best_index = np.nanargmin(errors)
1302
- return Nt, mu_arr[best_index].item(), sigma_arr[best_index].item()
1619
+ return parameters
1303
1620
 
1304
1621
 
1305
- def apply_lognormal_gs(
1306
- Nt,
1622
+ def apply_normalized_gamma_gs(
1623
+ Nw,
1624
+ D50,
1307
1625
  ND_obs,
1308
1626
  V,
1309
1627
  # Coords
1310
1628
  D,
1311
1629
  dD,
1312
- # Error options
1313
- target,
1314
- transformation,
1315
- error_order,
1630
+ # PSD parameters
1631
+ mu,
1632
+ # Optimization options
1633
+ objectives,
1634
+ # Output options
1635
+ return_loss=False,
1316
1636
  ):
1317
- """Estimate LognormalPSD model parameters using Grid Search."""
1318
- # Define parameters bounds
1319
- sigma_bounds = (0, np.inf) # > 0
1320
- scale_bounds = (0, np.inf) # > 0
1321
- # mu_bounds = (- np.inf, np.inf) # mu = np.log(scale)
1322
-
1323
- # Define initial set of parameters
1324
- # --> Typically sigma between 0 and 3
1325
- # --> Typically mu between -2 and 2
1326
- scale_step = 0.2
1327
- sigma_step = 0.2
1328
- scale_values = np.arange(scale_step, 20, step=scale_step)
1329
- mu_values = np.log(scale_values)
1330
- sigma_values = np.arange(0, 3, step=sigma_step)
1331
-
1332
- # First round of GS
1333
- Nt, mu, sigma = _apply_lognormal_gs(
1334
- mu_values=mu_values,
1335
- sigma_values=sigma_values,
1336
- Nt=Nt,
1337
- ND_obs=ND_obs,
1338
- D=D,
1339
- dD=dD,
1340
- V=V,
1341
- target=target,
1342
- transformation=transformation,
1343
- error_order=error_order,
1344
- )
1345
- if np.isnan(mu): # if np.nan, return immediately
1346
- return np.array([Nt, mu, sigma])
1637
+ """Estimate NormalizedGammaPSD model parameters using Grid Search.
1638
+
1639
+ This function performs a grid search optimization to find the best parameter
1640
+ (mu) for the NormalizedGammaPSD model by minimizing a weighted
1641
+ cost function across one or more objectives.
1642
+
1643
+ Parameters
1644
+ ----------
1645
+ Nw : float
1646
+ Normalized intercept parameter.
1647
+ D50 : float
1648
+ Median volume diameter parameter.
1649
+ ND_obs : numpy.ndarray
1650
+ Observed PSD data [#/mm/m3].
1651
+ V : numpy.ndarray
1652
+ Fall velocity [m/s].
1653
+ D : numpy.ndarray
1654
+ Diameter bins [mm].
1655
+ dD : numpy.ndarray
1656
+ Diameter bin widths [mm].
1657
+ mu : int, float or numpy.ndarray
1658
+ mu parameter values to search.
1659
+ objectives: list of dict
1660
+ target : str, optional
1661
+ Target quantity to optimize. Valid options:
1662
+
1663
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
1664
+ - ``"H(x)"`` : Normalized drop number concentration [-]
1665
+ - ``"R"`` : Rain rate [mm h⁻¹]
1666
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
1667
+ - ``"LWC"`` : Liquid water content [g m⁻³]
1668
+ - ``"M<p>"`` : Moment of order p
1669
+
1670
+ transformation : str, optional
1671
+ Transformation applied to the target quantity before computing the loss.
1672
+ Valid options:
1673
+
1674
+ - ``"identity"`` : No transformation
1675
+ - ``"log"`` : Logarithmic transformation
1676
+ - ``"sqrt"`` : Square root transformation
1677
+
1678
+ censoring : str
1679
+ Specifies whether the observed particle size distribution (PSD) is
1680
+ treated as censored at the edges of the diameter range due to
1681
+ instrumental sensitivity limits:
1682
+
1683
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
1684
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
1685
+ the spectrum where the observed number concentration is zero are
1686
+ removed prior to cost-function evaluation.
1687
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
1688
+ the spectrum where the observed number concentration is zero are
1689
+ removed prior to cost-function evaluation.
1690
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
1691
+ range of diameter bins with non-zero observed concentrations is
1692
+ retained.
1693
+
1694
+ loss : int, optional
1695
+ Loss function.
1696
+ If target is ``"N(D)"`` or ``"H(x)"``, valid options are:
1697
+
1698
+ - ``SSE``: Sum of Squared Errors
1699
+ - ``SAE``: Sum of Absolute Errors
1700
+ - ``MAE``: Mean Absolute Error
1701
+ - ``MSE``: Mean Squared Error
1702
+ - ``RMSE``: Root Mean Squared Error
1703
+ - ``relMAE``: Relative Mean Absolute Error
1704
+ - ``KLDiv``: Kullback-Leibler Divergence
1705
+ - ``WD``: Wasserstein Distance
1706
+ - ``JSD``: Jensen-Shannon Distance
1707
+ - ``KS``: Kolmogorov-Smirnov Statistic
1708
+
1709
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
1710
+
1711
+ - ``AE``: Absolute Error
1712
+ - ``SE``: Squared Error
1713
+
1714
+ loss_weight: int, optional
1715
+ Weight of this objective when multiple objectives are used.
1716
+ Must be specified if more than one objective is specified.
1717
+ return_loss : bool, optional
1718
+ If True, return both the loss surface and parameters.
1719
+ Default is False.
1347
1720
 
1348
- # Second round of GS
1349
- sigma_values = define_param_range(sigma, sigma_step, bounds=sigma_bounds)
1350
- scale_values = define_param_range(np.exp(mu), scale_step, bounds=scale_bounds)
1721
+ Returns
1722
+ -------
1723
+ parameters : numpy.ndarray
1724
+ Best parameters as [Nw, mu, D50].
1725
+ An array of NaN values is returned if no valid solution is found.
1726
+ total_loss : numpy.ndarray, optional
1727
+ 1D array of total loss values.
1728
+ Only returned if return_loss=True.
1729
+
1730
+ Notes
1731
+ -----
1732
+ When multiple objectives are provided, losses are normalized and weighted.
1733
+ The best parameters correspond to the minimum total weighted loss
1734
+ """
1735
+ # Ensure input is numpy array
1736
+ Nw = np.asarray(Nw)
1737
+ D50 = np.asarray(D50)
1738
+ ND_obs = np.asarray(ND_obs)
1739
+ V = np.asarray(V)
1740
+
1741
+ # Convert mu to array if needed
1742
+ mu_arr = np.atleast_1d(mu) if not isinstance(mu, np.ndarray) else mu
1743
+
1744
+ # Perform grid search
1351
1745
  with suppress_warnings():
1352
- mu_values = np.log(scale_values)
1353
- Nt, mu, sigma = _apply_lognormal_gs(
1354
- mu_values=mu_values,
1355
- sigma_values=sigma_values,
1356
- Nt=Nt,
1357
- ND_obs=ND_obs,
1358
- D=D,
1359
- dD=dD,
1360
- V=V,
1361
- target=target,
1362
- transformation=transformation,
1363
- error_order=error_order,
1364
- )
1746
+ # Compute N(D)
1747
+ ND_preds = NormalizedGammaPSD.formula(D=D[None, :], D50=D50, Nw=Nw, mu=mu_arr[:, None])
1748
+
1749
+ # Compute loss
1750
+ total_loss = compute_weighted_loss(
1751
+ ND_obs=ND_obs,
1752
+ ND_preds=ND_preds,
1753
+ D=D,
1754
+ dD=dD,
1755
+ V=V,
1756
+ objectives=objectives,
1757
+ Nc=Nw,
1758
+ )
1365
1759
 
1366
- return np.array([Nt, mu, sigma])
1760
+ # Define best parameters
1761
+ if not np.all(np.isnan(total_loss)):
1762
+ best_index = np.nanargmin(total_loss)
1763
+ mu_best = mu_arr[best_index].item()
1764
+ parameters = np.array([Nw, D50, mu_best])
1765
+ else:
1766
+ parameters = np.array([np.nan, np.nan, np.nan])
1367
1767
 
1768
+ # If asked, return cost function
1769
+ if return_loss:
1770
+ return parameters, total_loss
1368
1771
 
1369
- def apply_normalized_gamma_gs(
1370
- Nw,
1371
- D50,
1772
+ return parameters
1773
+
1774
+
1775
+ def apply_normalized_generalized_gamma_gs(
1776
+ Nc,
1777
+ Dc,
1372
1778
  ND_obs,
1373
1779
  V,
1374
1780
  # Coords
1375
1781
  D,
1376
1782
  dD,
1377
- # Error options
1378
- target,
1379
- transformation,
1380
- error_order,
1783
+ # PSD parameters
1784
+ i,
1785
+ j,
1786
+ mu,
1787
+ c,
1788
+ # Optimization options
1789
+ objectives,
1790
+ # Output options
1791
+ return_loss=False,
1381
1792
  ):
1382
- """Estimate NormalizedGammaPSD model parameters using Grid Search."""
1383
- # Define set of mu values
1384
- mu_arr = np.arange(-4, 30, step=0.01)
1793
+ """Estimate NormalizedGeneralizedGammaPSD model parameters using Grid Search.
1794
+
1795
+ This function performs a grid search optimization to find the best parameters
1796
+ (mu, c) for the NormalizedGeneralizedGammaPSD model by minimizing a weighted
1797
+ cost function across one or more objectives.
1798
+
1799
+ Parameters
1800
+ ----------
1801
+ Nc : float
1802
+ Normalized intercept parameter.
1803
+ Dc : float
1804
+ Normalized characteristic diameter parameter.
1805
+ ND_obs : numpy.ndarray
1806
+ Observed PSD data [#/mm/m3].
1807
+ V : numpy.ndarray
1808
+ Fall velocity [m/s].
1809
+ D : numpy.ndarray
1810
+ Diameter bins [mm].
1811
+ dD : numpy.ndarray
1812
+ Diameter bin widths [mm].
1813
+ i : int
1814
+ Moment order i of the NormalizedGeneralizedGammaPSD.
1815
+ j : int
1816
+ Moment order j of the NormalizedGeneralizedGammaPSD.
1817
+ mu : int, float or numpy.ndarray
1818
+ mu parameter values to search.
1819
+ c : int, float or numpy.ndarray
1820
+ c parameter values to search.
1821
+ objectives: list of dict
1822
+ target : str, optional
1823
+ Target quantity to optimize. Valid options:
1824
+
1825
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
1826
+ - ``"H(x)"`` : Normalized drop number concentration [-]
1827
+ - ``"R"`` : Rain rate [mm h⁻¹]
1828
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
1829
+ - ``"LWC"`` : Liquid water content [g m⁻³]
1830
+ - ``"M<p>"`` : Moment of order p
1831
+
1832
+ transformation : str, optional
1833
+ Transformation applied to the target quantity before computing the loss.
1834
+ Valid options:
1835
+
1836
+ - ``"identity"`` : No transformation
1837
+ - ``"log"`` : Logarithmic transformation
1838
+ - ``"sqrt"`` : Square root transformation
1839
+
1840
+ censoring : str
1841
+ Specifies whether the observed particle size distribution (PSD) is
1842
+ treated as censored at the edges of the diameter range due to
1843
+ instrumental sensitivity limits:
1844
+
1845
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
1846
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
1847
+ the spectrum where the observed number concentration is zero are
1848
+ removed prior to cost-function evaluation.
1849
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
1850
+ the spectrum where the observed number concentration is zero are
1851
+ removed prior to cost-function evaluation.
1852
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
1853
+ range of diameter bins with non-zero observed concentrations is
1854
+ retained.
1855
+
1856
+ loss : int, optional
1857
+ Loss function.
1858
+ If target is ``"N(D)"`` or ``"H(x)"``, valid options are:
1859
+
1860
+ - ``SSE``: Sum of Squared Errors
1861
+ - ``SAE``: Sum of Absolute Errors
1862
+ - ``MAE``: Mean Absolute Error
1863
+ - ``MSE``: Mean Squared Error
1864
+ - ``RMSE``: Root Mean Squared Error
1865
+ - ``relMAE``: Relative Mean Absolute Error
1866
+ - ``KLDiv``: Kullback-Leibler Divergence
1867
+ - ``WD``: Wasserstein Distance
1868
+ - ``JSD``: Jensen-Shannon Distance
1869
+ - ``KS``: Kolmogorov-Smirnov Statistic
1870
+
1871
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
1872
+
1873
+ - ``AE``: Absolute Error
1874
+ - ``SE``: Squared Error
1875
+
1876
+ loss_weight: int, optional
1877
+ Weight of this objective when multiple objectives are used.
1878
+ Must be specified if more than one objective is specified.
1879
+ return_loss : bool, optional
1880
+ If True, return both the loss surface and parameters.
1881
+ Default is False.
1882
+
1883
+ Returns
1884
+ -------
1885
+ parameters : numpy.ndarray
1886
+ Best parameters as [Nc, Dc, mu, c].
1887
+ An array of NaN values is returned if no valid solution is found.
1888
+ total_loss : numpy.ndarray, optional
1889
+ 2D array of total loss values reshaped to (len(mu), len(c)).
1890
+ Only returned if return_loss=True.
1891
+
1892
+ Notes
1893
+ -----
1894
+ When multiple objectives are provided, losses are normalized and weighted.
1895
+ The best parameters correspond to the minimum total weighted loss.
1896
+ """
1897
+ # Thurai 2018: mu [-3, 1], c [0-6]
1898
+
1899
+ # Ensure input is numpy array
1900
+ Nc = np.asarray(Nc)
1901
+ Dc = np.asarray(Dc)
1902
+ ND_obs = np.asarray(ND_obs)
1903
+ V = np.asarray(V)
1904
+
1905
+ # Define combinations of parameters for grid search
1906
+ mu_grid, c_grid = np.meshgrid(
1907
+ mu,
1908
+ c,
1909
+ indexing="xy",
1910
+ )
1911
+ mu_arr = mu_grid.ravel()
1912
+ c_arr = c_grid.ravel()
1385
1913
 
1386
1914
  # Perform grid search
1387
1915
  with suppress_warnings():
1388
- # Compute ND
1389
- ND_preds = NormalizedGammaPSD.formula(D=D[None, :], D50=D50, Nw=Nw, mu=mu_arr[:, None])
1390
1916
 
1391
- # Compute errors
1392
- errors = _compute_cost_function(
1917
+ # Compute N(D)
1918
+ ND_preds = NormalizedGeneralizedGammaPSD.formula(
1919
+ D=D[None, :],
1920
+ i=i,
1921
+ j=j,
1922
+ Nc=Nc,
1923
+ Dc=Dc,
1924
+ mu=mu_arr[:, None],
1925
+ c=c_arr[:, None],
1926
+ )
1927
+
1928
+ # Compute loss
1929
+ total_loss = compute_weighted_loss(
1393
1930
  ND_obs=ND_obs,
1394
1931
  ND_preds=ND_preds,
1395
1932
  D=D,
1396
1933
  dD=dD,
1397
1934
  V=V,
1398
- target=target,
1399
- transformation=transformation,
1400
- error_order=error_order,
1935
+ objectives=objectives,
1936
+ Nc=Nc,
1401
1937
  )
1402
1938
 
1403
- # Replace inf with NaN
1404
- errors[~np.isfinite(errors)] = np.nan
1939
+ # Define best parameters
1940
+ if not np.all(np.isnan(total_loss)):
1941
+ best_index = np.nanargmin(total_loss)
1942
+ mu, c = mu_arr[best_index].item(), c_arr[best_index].item()
1943
+ parameters = np.array([Nc, Dc, mu, c])
1944
+ else:
1945
+ parameters = np.array([np.nan, np.nan, np.nan, np.nan])
1946
+
1947
+ # If asked, return cost function
1948
+ if return_loss:
1949
+ total_loss = total_loss.reshape(mu_grid.shape)
1950
+ return parameters, total_loss
1951
+ return parameters
1952
+
1953
+
1954
+ def get_exponential_parameters_gs(
1955
+ ds,
1956
+ Lambda=None,
1957
+ objectives=None,
1958
+ return_loss=False,
1959
+ ):
1960
+ """Estimate Exponential PSD parameters using Grid Search optimization.
1405
1961
 
1406
- # If all invalid, return NaN parameters
1407
- if np.all(np.isnan(errors)):
1408
- return np.array([np.nan, np.nan, np.nan])
1962
+ The parameter ``N_t`` is computed empirically from the observed DSD,
1963
+ while the shape parameter ``Lambda`` is estimated through
1964
+ grid search by minimizing the error between observed and modeled quantities.
1409
1965
 
1410
- # Otherwise, choose the best index
1411
- best_index = np.nanargmin(errors)
1412
- mu = mu_arr[best_index]
1413
- return np.array([Nw, mu, D50])
1966
+ Parameters
1967
+ ----------
1968
+ ds : xarray.Dataset
1969
+ Input dataset containing PSD observations. Must include:
1970
+
1971
+ - ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
1972
+ - ``diameter_bin_center`` : Diameter bin centers [mm]
1973
+ - ``diameter_bin_width`` : Diameter bin widths [mm]
1974
+ - ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
1975
+
1976
+ Lambda : int, float or numpy.ndarray
1977
+ Lambda parameter values to search.
1978
+ objectives: list of dict
1979
+ target : str, optional
1980
+ Target quantity to optimize. Valid options:
1981
+
1982
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
1983
+ - ``"R"`` : Rain rate [mm h⁻¹]
1984
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
1985
+ - ``"LWC"`` : Liquid water content [g m⁻³]
1986
+ - ``"M<p>"`` : Moment of order p
1987
+
1988
+ transformation : str, optional
1989
+ Transformation applied to the target quantity before computing the loss.
1990
+ Valid options:
1991
+
1992
+ - ``"identity"`` : No transformation
1993
+ - ``"log"`` : Logarithmic transformation
1994
+ - ``"sqrt"`` : Square root transformation
1995
+
1996
+ censoring : str
1997
+ Specifies whether the observed particle size distribution (PSD) is
1998
+ treated as censored at the edges of the diameter range due to
1999
+ instrumental sensitivity limits:
2000
+
2001
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
2002
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
2003
+ the spectrum where the observed number concentration is zero are
2004
+ removed prior to cost-function evaluation.
2005
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
2006
+ the spectrum where the observed number concentration is zero are
2007
+ removed prior to cost-function evaluation.
2008
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
2009
+ range of diameter bins with non-zero observed concentrations is
2010
+ retained.
2011
+
2012
+ loss : int, optional
2013
+ Loss function.
2014
+ If target is ``"N(D)"``, valid options are:
2015
+
2016
+ - ``SSE``: Sum of Squared Errors
2017
+ - ``SAE``: Sum of Absolute Errors
2018
+ - ``MAE``: Mean Absolute Error
2019
+ - ``MSE``: Mean Squared Error
2020
+ - ``RMSE``: Root Mean Squared Error
2021
+ - ``relMAE``: Relative Mean Absolute Error
2022
+ - ``KLDiv``: Kullback-Leibler Divergence
2023
+ - ``WD``: Wasserstein Distance
2024
+ - ``JSD``: Jensen-Shannon Distance
2025
+ - ``KS``: Kolmogorov-Smirnov Statistic
2026
+
2027
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
2028
+
2029
+ - ``AE``: Absolute Error
2030
+ - ``SE``: Squared Error
2031
+
2032
+ loss_weight: int, optional
2033
+ Weight of this objective when multiple objectives are used.
2034
+ Must be specified if more than one objective is specified.
2035
+ return_loss : bool, optional
2036
+ If True, return both the loss surface and parameters.
2037
+ Default is False.
1414
2038
 
2039
+ Returns
2040
+ -------
2041
+ ds_params : xarray.Dataset
2042
+ Dataset containing the estimated Exponential distribution parameters.
2043
+ """
2044
+ # Use default objectives if not specified
2045
+ if objectives is None:
2046
+ objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
1415
2047
 
1416
- def get_exponential_parameters_gs(ds, target="ND", transformation="log", error_order=1):
1417
- """Estimate the parameters of an Exponential distribution using Grid Search."""
1418
- # "target": ["ND", "LWC", "Z", "R"]
1419
- # "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
1420
- # "error_order": 1, # MAE/MSE ... only for drop_number_concentration
2048
+ # Check objectives
2049
+ objectives = check_objectives(objectives=objectives)
1421
2050
 
1422
2051
  # Compute required variables
1423
- ds["Nt"] = get_total_number_concentration(
2052
+ Nt = get_total_number_concentration(
1424
2053
  drop_number_concentration=ds["drop_number_concentration"],
1425
2054
  diameter_bin_width=ds["diameter_bin_width"],
1426
2055
  )
1427
2056
 
2057
+ # Define search space
2058
+ if Lambda is None:
2059
+ Lambda = np.arange(0.01, 10, step=0.01)
2060
+
1428
2061
  # Define kwargs
1429
2062
  kwargs = {
1430
- "D": ds["diameter_bin_center"].data,
1431
- "dD": ds["diameter_bin_width"].data,
1432
- "target": target,
1433
- "transformation": transformation,
1434
- "error_order": error_order,
2063
+ "D": ds["diameter_bin_center"].to_numpy(),
2064
+ "dD": ds["diameter_bin_width"].to_numpy(),
2065
+ "objectives": objectives,
2066
+ "return_loss": return_loss,
2067
+ "Lambda": Lambda,
1435
2068
  }
1436
2069
 
1437
- # Fit distribution in parallel
1438
- da_params = xr.apply_ufunc(
2070
+ # Define function to create parameters dataset
2071
+ def _create_parameters_dataset(da_parameters):
2072
+ # Add parameters coordinates
2073
+ da_parameters = da_parameters.assign_coords({"parameters": ["N0", "Lambda"]})
2074
+
2075
+ # Create parameters dataset
2076
+ ds_parameters = da_parameters.to_dataset(dim="parameters")
2077
+
2078
+ # Add DSD model name to the attribute
2079
+ ds_parameters.attrs["disdrodb_psd_model"] = "ExponentialPSD"
2080
+ return ds_parameters
2081
+
2082
+ # Return cost function if asked
2083
+ if return_loss:
2084
+ da_parameters, da_cost_function = xr.apply_ufunc(
2085
+ apply_exponential_gs,
2086
+ # Variables varying over time
2087
+ Nt,
2088
+ ds["drop_number_concentration"],
2089
+ ds["fall_velocity"],
2090
+ # Other options
2091
+ kwargs=kwargs,
2092
+ # Settings
2093
+ input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
2094
+ output_core_dims=[["parameters"], ["Lambda_values"]],
2095
+ vectorize=True,
2096
+ dask="parallelized",
2097
+ # Lengths of the new output_core_dims dimensions.
2098
+ dask_gufunc_kwargs={"output_sizes": {"Lambda_values": len(Lambda), "parameters": 2}},
2099
+ output_dtypes=["float64", "float64"],
2100
+ )
2101
+ ds_parameters = _create_parameters_dataset(da_parameters)
2102
+ ds_parameters["cost_function"] = da_cost_function
2103
+ ds_parameters = ds_parameters.assign_coords({"Lambda_values": Lambda})
2104
+ return ds_parameters
2105
+
2106
+ # Otherwise return just best parameters
2107
+ da_parameters = xr.apply_ufunc(
1439
2108
  apply_exponential_gs,
1440
2109
  # Variables varying over time
1441
- ds["Nt"],
2110
+ Nt,
1442
2111
  ds["drop_number_concentration"],
1443
2112
  ds["fall_velocity"],
1444
2113
  # Other options
@@ -1451,44 +2120,180 @@ def get_exponential_parameters_gs(ds, target="ND", transformation="log", error_o
1451
2120
  dask_gufunc_kwargs={"output_sizes": {"parameters": 2}}, # lengths of the new output_core_dims dimensions.
1452
2121
  output_dtypes=["float64"],
1453
2122
  )
2123
+ ds_parameters = _create_parameters_dataset(da_parameters)
2124
+ return ds_parameters
1454
2125
 
1455
- # Add parameters coordinates
1456
- da_params = da_params.assign_coords({"parameters": ["N0", "Lambda"]})
1457
2126
 
1458
- # Create parameters dataset
1459
- ds_params = da_params.to_dataset(dim="parameters")
2127
+ def get_gamma_parameters_gs(
2128
+ ds,
2129
+ mu=None,
2130
+ Lambda=None,
2131
+ objectives=None,
2132
+ return_loss=False,
2133
+ ):
2134
+ """Estimate Gamma PSD parameters using Grid Search optimization.
1460
2135
 
1461
- # Add DSD model name to the attribute
1462
- ds_params.attrs["disdrodb_psd_model"] = "ExponentialPSD"
1463
- return ds_params
2136
+ The parameter ``N_t`` is computed empirically from the observed DSD,
2137
+ while the shape parameters ``mu`` and ``Lambda`` are estimated through
2138
+ grid search by minimizing the error between observed and modeled quantities.
2139
+
2140
+ Parameters
2141
+ ----------
2142
+ ds : xarray.Dataset
2143
+ Input dataset containing PSD observations. Must include:
2144
+
2145
+ - ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
2146
+ - ``diameter_bin_center`` : Diameter bin centers [mm]
2147
+ - ``diameter_bin_width`` : Diameter bin widths [mm]
2148
+ - ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
2149
+
2150
+ mu : int, float or numpy.ndarray
2151
+ mu parameter values to search.
2152
+ Lambda : int, float or numpy.ndarray
2153
+ Lambda parameter values to search.
2154
+ objectives: list of dict
2155
+ target : str, optional
2156
+ Target quantity to optimize. Valid options:
2157
+
2158
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
2159
+ - ``"R"`` : Rain rate [mm h⁻¹]
2160
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
2161
+ - ``"LWC"`` : Liquid water content [g m⁻³]
2162
+ - ``"M<p>"`` : Moment of order p
2163
+
2164
+ transformation : str, optional
2165
+ Transformation applied to the target quantity before computing the loss.
2166
+ Valid options:
2167
+
2168
+ - ``"identity"`` : No transformation
2169
+ - ``"log"`` : Logarithmic transformation
2170
+ - ``"sqrt"`` : Square root transformation
2171
+
2172
+ censoring : str
2173
+ Specifies whether the observed particle size distribution (PSD) is
2174
+ treated as censored at the edges of the diameter range due to
2175
+ instrumental sensitivity limits:
2176
+
2177
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
2178
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
2179
+ the spectrum where the observed number concentration is zero are
2180
+ removed prior to cost-function evaluation.
2181
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
2182
+ the spectrum where the observed number concentration is zero are
2183
+ removed prior to cost-function evaluation.
2184
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
2185
+ range of diameter bins with non-zero observed concentrations is
2186
+ retained.
2187
+
2188
+ loss : int, optional
2189
+ Loss function.
2190
+ If target is ``"N(D)"``, valid options are:
2191
+
2192
+ - ``SSE``: Sum of Squared Errors
2193
+ - ``SAE``: Sum of Absolute Errors
2194
+ - ``MAE``: Mean Absolute Error
2195
+ - ``MSE``: Mean Squared Error
2196
+ - ``RMSE``: Root Mean Squared Error
2197
+ - ``relMAE``: Relative Mean Absolute Error
2198
+ - ``KLDiv``: Kullback-Leibler Divergence
2199
+ - ``WD``: Wasserstein Distance
2200
+ - ``JSD``: Jensen-Shannon Distance
2201
+ - ``KS``: Kolmogorov-Smirnov Statistic
2202
+
2203
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
2204
+
2205
+ - ``AE``: Absolute Error
2206
+ - ``SE``: Squared Error
2207
+
2208
+ loss_weight: int, optional
2209
+ Weight of this objective when multiple objectives are used.
2210
+ Must be specified if more than one objective is specified.
2211
+ return_loss : bool, optional
2212
+ If True, return both the loss surface and parameters.
2213
+ Default is False.
1464
2214
 
2215
+ Returns
2216
+ -------
2217
+ ds_params : xarray.Dataset
2218
+ Dataset containing the estimated Gamma distribution parameters.
2219
+ """
2220
+ # Use default objectives if not specified
2221
+ if objectives is None:
2222
+ objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
1465
2223
 
1466
- def get_gamma_parameters_gs(ds, target="ND", transformation="log", error_order=1):
1467
- """Compute Grid Search to identify mu and Lambda Gamma distribution parameters."""
1468
- # "target": ["ND", "LWC", "Z", "R"]
1469
- # "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
1470
- # "error_order": 1, # MAE/MSE ... only for drop_number_concentration
2224
+ # Check objectives
2225
+ objectives = check_objectives(objectives=objectives)
1471
2226
 
1472
2227
  # Compute required variables
1473
- ds["Nt"] = get_total_number_concentration(
2228
+ Nt = get_total_number_concentration(
1474
2229
  drop_number_concentration=ds["drop_number_concentration"],
1475
2230
  diameter_bin_width=ds["diameter_bin_width"],
1476
2231
  )
1477
2232
 
2233
+ # Define search space
2234
+ if mu is None:
2235
+ mu = np.arange(0, 15, step=0.1)
2236
+ if Lambda is None:
2237
+ Lambda = np.arange(0, 30, step=0.1)
2238
+
1478
2239
  # Define kwargs
1479
2240
  kwargs = {
1480
- "D": ds["diameter_bin_center"].data,
1481
- "dD": ds["diameter_bin_width"].data,
1482
- "target": target,
1483
- "transformation": transformation,
1484
- "error_order": error_order,
2241
+ "D": ds["diameter_bin_center"].to_numpy(),
2242
+ "dD": ds["diameter_bin_width"].to_numpy(),
2243
+ "objectives": objectives,
2244
+ "return_loss": return_loss,
2245
+ "mu": mu,
2246
+ "Lambda": Lambda,
1485
2247
  }
1486
2248
 
1487
- # Fit distribution in parallel
1488
- da_params = xr.apply_ufunc(
2249
+ # Define function to create parameters dataset
2250
+ def _create_parameters_dataset(da_parameters):
2251
+ # Add parameters coordinates
2252
+ da_parameters = da_parameters.assign_coords({"parameters": ["N0", "Lambda", "mu"]})
2253
+
2254
+ # Create parameters dataset
2255
+ ds_parameters = da_parameters.to_dataset(dim="parameters")
2256
+
2257
+ # Add DSD model name to the attribute
2258
+ ds_parameters.attrs["disdrodb_psd_model"] = "GammaPSD"
2259
+ return ds_parameters
2260
+
2261
+ # Return cost function if asked
2262
+ if return_loss:
2263
+ # Define lengths of the new output_core_dims dimensions.
2264
+ output_dict_size = {
2265
+ "mu_values": len(mu),
2266
+ "Lambda_values": len(Lambda),
2267
+ "parameters": 3,
2268
+ }
2269
+ # Compute cost function and parameters
2270
+ da_parameters, da_cost_function = xr.apply_ufunc(
2271
+ apply_gamma_gs,
2272
+ # Variables varying over time
2273
+ Nt,
2274
+ ds["drop_number_concentration"],
2275
+ ds["fall_velocity"],
2276
+ # Other options
2277
+ kwargs=kwargs,
2278
+ # Settings
2279
+ input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
2280
+ output_core_dims=[["parameters"], ["Lambda_values", "mu_values"]],
2281
+ vectorize=True,
2282
+ dask="parallelized",
2283
+ # Lengths of the new output_core_dims dimensions.
2284
+ dask_gufunc_kwargs={"output_sizes": output_dict_size},
2285
+ output_dtypes=["float64", "float64"],
2286
+ )
2287
+ ds_parameters = _create_parameters_dataset(da_parameters)
2288
+ ds_parameters["cost_function"] = da_cost_function
2289
+ ds_parameters = ds_parameters.assign_coords({"mu_values": mu, "Lambda_values": Lambda})
2290
+ return ds_parameters
2291
+
2292
+ # Otherwise return just best parameters
2293
+ da_parameters = xr.apply_ufunc(
1489
2294
  apply_gamma_gs,
1490
2295
  # Variables varying over time
1491
- ds["Nt"],
2296
+ Nt,
1492
2297
  ds["drop_number_concentration"],
1493
2298
  ds["fall_velocity"],
1494
2299
  # Other options
@@ -1501,44 +2306,186 @@ def get_gamma_parameters_gs(ds, target="ND", transformation="log", error_order=1
1501
2306
  dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
1502
2307
  output_dtypes=["float64"],
1503
2308
  )
2309
+ ds_parameters = _create_parameters_dataset(da_parameters)
2310
+ return ds_parameters
1504
2311
 
1505
- # Add parameters coordinates
1506
- da_params = da_params.assign_coords({"parameters": ["N0", "mu", "Lambda"]})
1507
-
1508
- # Create parameters dataset
1509
- ds_params = da_params.to_dataset(dim="parameters")
1510
2312
 
1511
- # Add DSD model name to the attribute
1512
- ds_params.attrs["disdrodb_psd_model"] = "GammaPSD"
1513
- return ds_params
2313
+ def get_generalized_gamma_parameters_gs(
2314
+ ds,
2315
+ mu=None,
2316
+ c=None,
2317
+ Lambda=None,
2318
+ objectives=None,
2319
+ return_loss=False,
2320
+ ):
2321
+ """Estimate Generalized Gamma PSD parameters using Grid Search optimization.
1514
2322
 
2323
+ The parameter ``N_t`` is computed empirically from the observed DSD,
2324
+ while the shape parameters ``mu``, ``c``, and ``Lambda`` are estimated through
2325
+ grid search by minimizing the error between observed and modeled quantities.
1515
2326
 
1516
- def get_lognormal_parameters_gs(ds, target="ND", transformation="log", error_order=1):
1517
- """Compute Grid Search to identify mu and sigma lognormal distribution parameters."""
1518
- # "target": ["ND", "LWC", "Z", "R"]
1519
- # "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
1520
- # "error_order": 1, # MAE/MSE ... only for drop_number_concentration
2327
+ Parameters
2328
+ ----------
2329
+ ds : xarray.Dataset
2330
+ Input dataset containing PSD observations. Must include:
2331
+
2332
+ - ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
2333
+ - ``diameter_bin_center`` : Diameter bin centers [mm]
2334
+ - ``diameter_bin_width`` : Diameter bin widths [mm]
2335
+ - ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
2336
+
2337
+ mu : int, float or numpy.ndarray
2338
+ mu parameter values to search.
2339
+ c : int, float or numpy.ndarray
2340
+ c parameter values to search.
2341
+ Lambda : int, float or numpy.ndarray
2342
+ Lambda parameter values to search.
2343
+ objectives: list of dict
2344
+ target : str, optional
2345
+ Target quantity to optimize. Valid options:
2346
+
2347
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
2348
+ - ``"R"`` : Rain rate [mm h⁻¹]
2349
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
2350
+ - ``"LWC"`` : Liquid water content [g m⁻³]
2351
+ - ``"M<p>"`` : Moment of order p
2352
+
2353
+ transformation : str, optional
2354
+ Transformation applied to the target quantity before computing the loss.
2355
+ Valid options:
2356
+
2357
+ - ``"identity"`` : No transformation
2358
+ - ``"log"`` : Logarithmic transformation
2359
+ - ``"sqrt"`` : Square root transformation
2360
+
2361
+ censoring : str
2362
+ Specifies whether the observed particle size distribution (PSD) is
2363
+ treated as censored at the edges of the diameter range due to
2364
+ instrumental sensitivity limits:
2365
+
2366
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
2367
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
2368
+ the spectrum where the observed number concentration is zero are
2369
+ removed prior to cost-function evaluation.
2370
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
2371
+ the spectrum where the observed number concentration is zero are
2372
+ removed prior to cost-function evaluation.
2373
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
2374
+ range of diameter bins with non-zero observed concentrations is
2375
+ retained.
2376
+
2377
+ loss : int, optional
2378
+ Loss function.
2379
+ If target is ``"N(D)"``, valid options are:
2380
+
2381
+ - ``SSE``: Sum of Squared Errors
2382
+ - ``SAE``: Sum of Absolute Errors
2383
+ - ``MAE``: Mean Absolute Error
2384
+ - ``MSE``: Mean Squared Error
2385
+ - ``RMSE``: Root Mean Squared Error
2386
+ - ``relMAE``: Relative Mean Absolute Error
2387
+ - ``KLDiv``: Kullback-Leibler Divergence
2388
+ - ``WD``: Wasserstein Distance
2389
+ - ``JSD``: Jensen-Shannon Distance
2390
+ - ``KS``: Kolmogorov-Smirnov Statistic
2391
+
2392
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
2393
+
2394
+ - ``AE``: Absolute Error
2395
+ - ``SE``: Squared Error
2396
+
2397
+ loss_weight: int, optional
2398
+ Weight of this objective when multiple objectives are used.
2399
+ Must be specified if more than one objective is specified.
2400
+ return_loss : bool, optional
2401
+ If True, return both the loss surface and parameters.
2402
+ Default is False.
2403
+
2404
+ Returns
2405
+ -------
2406
+ ds_params : xarray.Dataset
2407
+ Dataset containing the estimated Generalized Gamma distribution parameters.
2408
+ """
2409
+ # Use default objectives if not specified
2410
+ if objectives is None:
2411
+ objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
2412
+
2413
+ # Check objectives
2414
+ objectives = check_objectives(objectives=objectives)
1521
2415
 
1522
2416
  # Compute required variables
1523
- ds["Nt"] = get_total_number_concentration(
2417
+ Nt = get_total_number_concentration(
1524
2418
  drop_number_concentration=ds["drop_number_concentration"],
1525
2419
  diameter_bin_width=ds["diameter_bin_width"],
1526
2420
  )
1527
2421
 
2422
+ # Define search space
2423
+ if mu is None:
2424
+ mu = np.arange(0, 10, step=0.2)
2425
+ if c is None:
2426
+ c = np.arange(0, 10, step=0.2)
2427
+ if Lambda is None:
2428
+ Lambda = np.arange(0, 20, step=0.2)
2429
+
1528
2430
  # Define kwargs
1529
2431
  kwargs = {
1530
- "D": ds["diameter_bin_center"].data,
1531
- "dD": ds["diameter_bin_width"].data,
1532
- "target": target,
1533
- "transformation": transformation,
1534
- "error_order": error_order,
2432
+ "D": ds["diameter_bin_center"].to_numpy(),
2433
+ "dD": ds["diameter_bin_width"].to_numpy(),
2434
+ "objectives": objectives,
2435
+ "return_loss": return_loss,
2436
+ "mu": mu,
2437
+ "c": c,
2438
+ "Lambda": Lambda,
1535
2439
  }
1536
2440
 
1537
- # Fit distribution in parallel
1538
- da_params = xr.apply_ufunc(
1539
- apply_lognormal_gs,
2441
+ # Define function to create parameters dataset
2442
+ def _create_parameters_dataset(da_parameters):
2443
+ # Add parameters coordinates
2444
+ da_parameters = da_parameters.assign_coords({"parameters": ["Nt", "Lambda", "mu", "c"]})
2445
+
2446
+ # Create parameters dataset
2447
+ ds_parameters = da_parameters.to_dataset(dim="parameters")
2448
+
2449
+ # Add DSD model name to the attribute
2450
+ ds_parameters.attrs["disdrodb_psd_model"] = "GeneralizedGammaPSD"
2451
+ return ds_parameters
2452
+
2453
+ # Return cost function if asked
2454
+ if return_loss:
2455
+ # Define lengths of the new output_core_dims dimensions.
2456
+ output_dict_size = {
2457
+ "mu_values": len(mu),
2458
+ "Lambda_values": len(Lambda),
2459
+ "c_values": len(c),
2460
+ "parameters": 4,
2461
+ }
2462
+ # Compute
2463
+ da_parameters, da_cost_function = xr.apply_ufunc(
2464
+ apply_generalized_gamma_gs,
2465
+ # Variables varying over time
2466
+ Nt,
2467
+ ds["drop_number_concentration"],
2468
+ ds["fall_velocity"],
2469
+ # Other options
2470
+ kwargs=kwargs,
2471
+ # Settings
2472
+ input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
2473
+ output_core_dims=[["parameters"], ["Lambda_values", "mu_values", "c_values"]],
2474
+ vectorize=True,
2475
+ dask="parallelized",
2476
+ dask_gufunc_kwargs={"output_sizes": output_dict_size},
2477
+ output_dtypes=["float64", "float64"],
2478
+ )
2479
+ ds_parameters = _create_parameters_dataset(da_parameters)
2480
+ ds_parameters["cost_function"] = da_cost_function
2481
+ ds_parameters = ds_parameters.assign_coords({"mu_values": mu, "Lambda_values": Lambda, "c_values": c})
2482
+ return ds_parameters
2483
+
2484
+ # Otherwise return just best parameters
2485
+ da_parameters = xr.apply_ufunc(
2486
+ apply_generalized_gamma_gs,
1540
2487
  # Variables varying over time
1541
- ds["Nt"],
2488
+ Nt,
1542
2489
  ds["drop_number_concentration"],
1543
2490
  ds["fall_velocity"],
1544
2491
  # Other options
@@ -1548,49 +2495,289 @@ def get_lognormal_parameters_gs(ds, target="ND", transformation="log", error_ord
1548
2495
  output_core_dims=[["parameters"]],
1549
2496
  vectorize=True,
1550
2497
  dask="parallelized",
1551
- dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
2498
+ dask_gufunc_kwargs={"output_sizes": {"parameters": 4}}, # lengths of the new output_core_dims dimensions.
1552
2499
  output_dtypes=["float64"],
1553
2500
  )
2501
+ ds_parameters = _create_parameters_dataset(da_parameters)
2502
+ return ds_parameters
1554
2503
 
1555
- # Add parameters coordinates
1556
- da_params = da_params.assign_coords({"parameters": ["Nt", "mu", "sigma"]})
1557
2504
 
1558
- # Create parameters dataset
1559
- ds_params = da_params.to_dataset(dim="parameters")
2505
+ def get_lognormal_parameters_gs(
2506
+ ds,
2507
+ mu=None,
2508
+ sigma=None,
2509
+ objectives=None,
2510
+ return_loss=False,
2511
+ ):
2512
+ """Estimate Lognormal PSD parameters using Grid Search optimization.
1560
2513
 
1561
- # Add DSD model name to the attribute
1562
- ds_params.attrs["disdrodb_psd_model"] = "LognormalPSD"
1563
- return ds_params
2514
+ The parameter ``N_t`` is computed empirically from the observed DSD,
2515
+ while the shape parameters ``mu`` and ``sigma`` are estimated through
2516
+ grid search by minimizing the error between observed and modeled quantities.
2517
+
2518
+ Parameters
2519
+ ----------
2520
+ ds : xarray.Dataset
2521
+ Input dataset containing PSD observations. Must include:
2522
+
2523
+ - ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
2524
+ - ``diameter_bin_center`` : Diameter bin centers [mm]
2525
+ - ``diameter_bin_width`` : Diameter bin widths [mm]
2526
+ - ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
2527
+
2528
+ mu : int, float or numpy.ndarray
2529
+ mu parameter values to search.
2530
+ sigma : int, float or numpy.ndarray
2531
+ sigma parameter values to search.
2532
+ objectives: list of dict
2533
+ target : str, optional
2534
+ Target quantity to optimize. Valid options:
2535
+
2536
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
2537
+ - ``"R"`` : Rain rate [mm h⁻¹]
2538
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
2539
+ - ``"LWC"`` : Liquid water content [g m⁻³]
2540
+ - ``"M<p>"`` : Moment of order p
2541
+
2542
+ transformation : str, optional
2543
+ Transformation applied to the target quantity before computing the loss.
2544
+ Valid options:
2545
+
2546
+ - ``"identity"`` : No transformation
2547
+ - ``"log"`` : Logarithmic transformation
2548
+ - ``"sqrt"`` : Square root transformation
2549
+
2550
+ censoring : str
2551
+ Specifies whether the observed particle size distribution (PSD) is
2552
+ treated as censored at the edges of the diameter range due to
2553
+ instrumental sensitivity limits:
2554
+
2555
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
2556
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
2557
+ the spectrum where the observed number concentration is zero are
2558
+ removed prior to cost-function evaluation.
2559
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
2560
+ the spectrum where the observed number concentration is zero are
2561
+ removed prior to cost-function evaluation.
2562
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
2563
+ range of diameter bins with non-zero observed concentrations is
2564
+ retained.
2565
+
2566
+ loss : int, optional
2567
+ Loss function.
2568
+ If target is ``"N(D)"``, valid options are:
2569
+
2570
+ - ``SSE``: Sum of Squared Errors
2571
+ - ``SAE``: Sum of Absolute Errors
2572
+ - ``MAE``: Mean Absolute Error
2573
+ - ``MSE``: Mean Squared Error
2574
+ - ``RMSE``: Root Mean Squared Error
2575
+ - ``relMAE``: Relative Mean Absolute Error
2576
+ - ``KLDiv``: Kullback-Leibler Divergence
2577
+ - ``WD``: Wasserstein Distance
2578
+ - ``JSD``: Jensen-Shannon Distance
2579
+ - ``KS``: Kolmogorov-Smirnov Statistic
2580
+
2581
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
2582
+
2583
+ - ``AE``: Absolute Error
2584
+ - ``SE``: Squared Error
2585
+
2586
+ loss_weight: int, optional
2587
+ Weight of this objective when multiple objectives are used.
2588
+ Must be specified if more than one objective is specified.
2589
+ return_loss : bool, optional
2590
+ If True, return both the loss surface and parameters.
2591
+ Default is False.
2592
+
2593
+ Returns
2594
+ -------
2595
+ ds_params : xarray.Dataset
2596
+ Dataset containing the estimated Lognormal distribution parameters.
2597
+ """
2598
+ # Use default objectives if not specified
2599
+ if objectives is None:
2600
+ objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
2601
+
2602
+ # Check objectives
2603
+ objectives = check_objectives(objectives=objectives)
2604
+
2605
+ # Compute required variables
2606
+ Nt = get_total_number_concentration(
2607
+ drop_number_concentration=ds["drop_number_concentration"],
2608
+ diameter_bin_width=ds["diameter_bin_width"],
2609
+ )
2610
+
2611
+ # Define search space
2612
+ if mu is None:
2613
+ mu = np.arange(-4, 1, step=0.1)
2614
+ if sigma is None:
2615
+ sigma = np.arange(0, 3, step=0.2)
2616
+
2617
+ # Define kwargs
2618
+ kwargs = {
2619
+ "D": ds["diameter_bin_center"].to_numpy(),
2620
+ "dD": ds["diameter_bin_width"].to_numpy(),
2621
+ "objectives": objectives,
2622
+ "return_loss": return_loss,
2623
+ "mu": mu,
2624
+ "sigma": sigma,
2625
+ }
1564
2626
 
2627
+ # Define function to create parameters dataset
2628
+ def _create_parameters_dataset(da_parameters):
2629
+ # Add parameters coordinates
2630
+ da_parameters = da_parameters.assign_coords({"parameters": ["Nt", "mu", "sigma"]})
2631
+
2632
+ # Create parameters dataset
2633
+ ds_parameters = da_parameters.to_dataset(dim="parameters")
2634
+
2635
+ # Add DSD model name to the attribute
2636
+ ds_parameters.attrs["disdrodb_psd_model"] = "LognormalPSD"
2637
+ return ds_parameters
2638
+
2639
+ # Return cost function if asked
2640
+ if return_loss:
2641
+ da_parameters, da_cost_function = xr.apply_ufunc(
2642
+ apply_lognormal_gs,
2643
+ # Variables varying over time
2644
+ Nt,
2645
+ ds["drop_number_concentration"],
2646
+ ds["fall_velocity"],
2647
+ # Other options
2648
+ kwargs=kwargs,
2649
+ # Settings
2650
+ input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
2651
+ output_core_dims=[["parameters"], ["sigma_values", "mu_values"]],
2652
+ vectorize=True,
2653
+ dask="parallelized",
2654
+ # Lengths of the new output_core_dims dimensions.
2655
+ dask_gufunc_kwargs={"output_sizes": {"mu_values": len(mu), "sigma_values": len(sigma), "parameters": 3}},
2656
+ output_dtypes=["float64", "float64"],
2657
+ )
2658
+ ds_parameters = _create_parameters_dataset(da_parameters)
2659
+ ds_parameters["cost_function"] = da_cost_function
2660
+ ds_parameters = ds_parameters.assign_coords({"mu_values": mu, "sigma_values": sigma})
2661
+ return ds_parameters
2662
+
2663
+ # Otherwise return just best parameters
2664
+ da_parameters = xr.apply_ufunc(
2665
+ apply_lognormal_gs,
2666
+ # Variables varying over time
2667
+ Nt,
2668
+ ds["drop_number_concentration"],
2669
+ ds["fall_velocity"],
2670
+ # Other options
2671
+ kwargs=kwargs,
2672
+ # Settings
2673
+ input_core_dims=[[], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
2674
+ output_core_dims=[["parameters"]],
2675
+ vectorize=True,
2676
+ dask="parallelized",
2677
+ dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
2678
+ output_dtypes=["float64"],
2679
+ )
2680
+ ds_parameters = _create_parameters_dataset(da_parameters)
2681
+ return ds_parameters
1565
2682
 
1566
- def get_normalized_gamma_parameters_gs(ds, target="ND", transformation="log", error_order=1):
1567
- r"""Estimate $\mu$ of a Normalized Gamma distribution using Grid Search.
1568
2683
 
1569
- The D50 and Nw parameters of the Normalized Gamma distribution are derived empirically from the obs DSD.
1570
- $\mu$ is derived by minimizing the errors between the obs DSD and modelled Normalized Gamma distribution.
2684
+ def get_normalized_gamma_parameters_gs(
2685
+ ds,
2686
+ mu=None,
2687
+ objectives=None,
2688
+ return_loss=False,
2689
+ ):
2690
+ """Estimate Normalized Gamma PSD parameters using Grid Search optimization.
2691
+
2692
+ The parameters ``N_w`` and ``D50`` are computed empirically from the observed DSD
2693
+ moments, while the shape parameter ``mu`` is estimated through
2694
+ grid search by minimizing the error between observed and modeled quantities.
1571
2695
 
1572
2696
  Parameters
1573
2697
  ----------
1574
- Nd : array_like
1575
- A drop size distribution
1576
- D50: optional, float
1577
- Median drop diameter in mm. If none is given, it will be estimated.
1578
- Nw: optional, float
1579
- Normalized Intercept Parameter. If none is given, it will be estimated.
1580
- order: optional, float
1581
- Order to which square the error when computing the sum of errors.
1582
- Order = 2 is equivalent to minimize the mean squared error (MSE) (L2 norm). The default is 2.
1583
- Order = 1 is equivalent to minimize the mean absolute error (MAE) (L1 norm).
1584
- Higher orders typically stretch higher the gamma distribution.
2698
+ ds : xarray.Dataset
2699
+ Input dataset containing PSD observations. Must include:
2700
+
2701
+ - ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
2702
+ - ``diameter_bin_center`` : Diameter bin centers [mm]
2703
+ - ``diameter_bin_width`` : Diameter bin widths [mm]
2704
+ - ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
2705
+
2706
+ mu : int, float or numpy.ndarray
2707
+ mu parameter values to search.
2708
+ objectives: list of dict
2709
+ target : str, optional
2710
+ Target quantity to optimize. Valid options:
2711
+
2712
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
2713
+ - ``"H(x)"`` : Normalized drop number concentration [-]
2714
+ - ``"R"`` : Rain rate [mm h⁻¹]
2715
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
2716
+ - ``"LWC"`` : Liquid water content [g m⁻³]
2717
+ - ``"M<p>"`` : Moment of order p
2718
+
2719
+ transformation : str, optional
2720
+ Transformation applied to the target quantity before computing the loss.
2721
+ Valid options:
2722
+
2723
+ - ``"identity"`` : No transformation
2724
+ - ``"log"`` : Logarithmic transformation
2725
+ - ``"sqrt"`` : Square root transformation
2726
+
2727
+ censoring : str
2728
+ Specifies whether the observed particle size distribution (PSD) is
2729
+ treated as censored at the edges of the diameter range due to
2730
+ instrumental sensitivity limits:
2731
+
2732
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
2733
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
2734
+ the spectrum where the observed number concentration is zero are
2735
+ removed prior to cost-function evaluation.
2736
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
2737
+ the spectrum where the observed number concentration is zero are
2738
+ removed prior to cost-function evaluation.
2739
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
2740
+ range of diameter bins with non-zero observed concentrations is
2741
+ retained.
2742
+
2743
+ loss : int, optional
2744
+ Loss function.
2745
+ If target is ``"N(D)"`` or ``"H(x)"``, valid options are:
2746
+
2747
+ - ``SSE``: Sum of Squared Errors
2748
+ - ``SAE``: Sum of Absolute Errors
2749
+ - ``MAE``: Mean Absolute Error
2750
+ - ``MSE``: Mean Squared Error
2751
+ - ``RMSE``: Root Mean Squared Error
2752
+ - ``relMAE``: Relative Mean Absolute Error
2753
+ - ``KLDiv``: Kullback-Leibler Divergence
2754
+ - ``WD``: Wasserstein Distance
2755
+ - ``JSD``: Jensen-Shannon Distance
2756
+ - ``KS``: Kolmogorov-Smirnov Statistic
2757
+
2758
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
2759
+
2760
+ - ``AE``: Absolute Error
2761
+ - ``SE``: Squared Error
2762
+
2763
+ loss_weight: int, optional
2764
+ Weight of this objective when multiple objectives are used.
2765
+ Must be specified if more than one objective is specified.
2766
+ return_loss : bool, optional
2767
+ If True, return both the loss surface and parameters.
2768
+ Default is False.
1585
2769
 
1586
2770
  Returns
1587
2771
  -------
1588
2772
  ds_params : xarray.Dataset
1589
2773
  Dataset containing the estimated Normalized Gamma distribution parameters.
1590
2774
  """
1591
- # "target": ["ND", "LWC", "Z", "R"]
1592
- # "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
1593
- # "error_order": 1, # MAE/MSE ... only for drop_number_concentration
2775
+ # Use default objectives if not specified
2776
+ if objectives is None:
2777
+ objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
2778
+
2779
+ # Check objectives
2780
+ objectives = check_objectives(objectives=objectives)
1594
2781
 
1595
2782
  # Compute required variables
1596
2783
  drop_number_concentration = ds["drop_number_concentration"]
@@ -1608,28 +2795,69 @@ def get_normalized_gamma_parameters_gs(ds, target="ND", transformation="log", er
1608
2795
  diameter_bin_width=diameter_bin_width, # mm
1609
2796
  moment=4,
1610
2797
  )
1611
- ds["Nw"] = get_normalized_intercept_parameter_from_moments(moment_3=m3, moment_4=m4)
1612
- ds["D50"] = get_median_volume_drop_diameter(
2798
+ Nw = get_normalized_intercept_parameter_from_moments(moment_3=m3, moment_4=m4)
2799
+ D50 = get_median_volume_drop_diameter(
1613
2800
  drop_number_concentration=drop_number_concentration,
1614
2801
  diameter=diameter, # m
1615
2802
  diameter_bin_width=diameter_bin_width, # mm
1616
2803
  )
1617
2804
 
2805
+ # Define search space
2806
+ if mu is None:
2807
+ mu = np.arange(-4, 30, step=0.01)
2808
+
1618
2809
  # Define kwargs
1619
2810
  kwargs = {
1620
- "D": ds["diameter_bin_center"].data,
1621
- "dD": ds["diameter_bin_width"].data,
1622
- "target": target,
1623
- "transformation": transformation,
1624
- "error_order": error_order,
2811
+ "D": ds["diameter_bin_center"].to_numpy(),
2812
+ "dD": ds["diameter_bin_width"].to_numpy(),
2813
+ "objectives": objectives,
2814
+ "return_loss": return_loss,
2815
+ "mu": mu,
1625
2816
  }
1626
2817
 
1627
- # Fit distribution in parallel
1628
- da_params = xr.apply_ufunc(
2818
+ # Define function to create parameters dataset
2819
+ def _create_parameters_dataset(da_parameters):
2820
+ # Add parameters coordinates
2821
+ da_parameters = da_parameters.assign_coords({"parameters": ["Nw", "D50", "mu"]})
2822
+
2823
+ # Create parameters dataset
2824
+ ds_parameters = da_parameters.to_dataset(dim="parameters")
2825
+
2826
+ # Add DSD model name to the attribute
2827
+ ds_parameters.attrs["disdrodb_psd_model"] = "NormalizedGammaPSD"
2828
+ return ds_parameters
2829
+
2830
+ # Return cost function if asked
2831
+ if return_loss:
2832
+ da_parameters, da_cost_function = xr.apply_ufunc(
2833
+ apply_normalized_gamma_gs,
2834
+ # Variables varying over time
2835
+ Nw,
2836
+ D50,
2837
+ ds["drop_number_concentration"],
2838
+ ds["fall_velocity"],
2839
+ # Other options
2840
+ kwargs=kwargs,
2841
+ # Settings
2842
+ input_core_dims=[[], [], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
2843
+ output_core_dims=[["parameters"], ["mu_values"]],
2844
+ vectorize=True,
2845
+ dask="parallelized",
2846
+ # Lengths of the new output_core_dims dimensions.
2847
+ dask_gufunc_kwargs={"output_sizes": {"mu_values": len(mu), "parameters": 3}},
2848
+ output_dtypes=["float64", "float64"],
2849
+ )
2850
+ ds_parameters = _create_parameters_dataset(da_parameters)
2851
+ ds_parameters["cost_function"] = da_cost_function
2852
+ ds_parameters = ds_parameters.assign_coords({"mu_values": mu})
2853
+ return ds_parameters
2854
+
2855
+ # Otherwise return just best parameters
2856
+ da_parameters = xr.apply_ufunc(
1629
2857
  apply_normalized_gamma_gs,
1630
2858
  # Variables varying over time
1631
- ds["Nw"],
1632
- ds["D50"],
2859
+ Nw,
2860
+ D50,
1633
2861
  ds["drop_number_concentration"],
1634
2862
  ds["fall_velocity"],
1635
2863
  # Other options
@@ -1642,16 +2870,357 @@ def get_normalized_gamma_parameters_gs(ds, target="ND", transformation="log", er
1642
2870
  dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
1643
2871
  output_dtypes=["float64"],
1644
2872
  )
2873
+ ds_parameters = _create_parameters_dataset(da_parameters)
2874
+ return ds_parameters
1645
2875
 
1646
- # Add parameters coordinates
1647
- da_params = da_params.assign_coords({"parameters": ["Nw", "mu", "D50"]})
1648
2876
 
1649
- # Create parameters dataset
1650
- ds_params = da_params.to_dataset(dim="parameters")
2877
+ def get_normalized_generalized_gamma_parameters_gs(
2878
+ ds,
2879
+ i,
2880
+ j,
2881
+ mu=None,
2882
+ c=None,
2883
+ objectives=None,
2884
+ return_loss=False,
2885
+ ):
2886
+ """Estimate Normalized Generalized Gamma PSD parameters using Grid Search optimization.
1651
2887
 
1652
- # Add DSD model name to the attribute
1653
- ds_params.attrs["disdrodb_psd_model"] = "NormalizedGammaPSD"
1654
- return ds_params
2888
+ The parameters ``N_c`` and ``Dc`` are computed empirically from the observed DSD
2889
+ moments, while the shape parameters ``mu`` and ``c`` are estimated through
2890
+ grid search by minimizing the error between observed and modeled quantities.
2891
+
2892
+ Parameters
2893
+ ----------
2894
+ ds : xarray.Dataset
2895
+ Input dataset containing PSD observations. Must include:
2896
+
2897
+ - ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
2898
+ - ``diameter_bin_center`` : Diameter bin centers [mm]
2899
+ - ``diameter_bin_width`` : Diameter bin widths [mm]
2900
+ - ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if target='R')
2901
+
2902
+ i : int
2903
+ Moment order i of the NormalizedGeneralizedGammaPSD.
2904
+ j : int
2905
+ Moment order j of the NormalizedGeneralizedGammaPSD.
2906
+ mu : int, float or numpy.ndarray
2907
+ mu parameter values to search.
2908
+ c : int, float or numpy.ndarray
2909
+ c parameter values to search.
2910
+ objectives: list of dict
2911
+ target : str, optional
2912
+ Target quantity to optimize. Valid options:
2913
+
2914
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
2915
+ - ``"H(x)"`` : Normalized drop number concentration [-]
2916
+ - ``"R"`` : Rain rate [mm h⁻¹]
2917
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
2918
+ - ``"LWC"`` : Liquid water content [g m⁻³]
2919
+ - ``"M<p>"`` : Moment of order p
2920
+
2921
+ transformation : str, optional
2922
+ Transformation applied to the target quantity before computing the loss.
2923
+ Valid options:
2924
+
2925
+ - ``"identity"`` : No transformation
2926
+ - ``"log"`` : Logarithmic transformation
2927
+ - ``"sqrt"`` : Square root transformation
2928
+
2929
+ censoring : str
2930
+ Specifies whether the observed particle size distribution (PSD) is
2931
+ treated as censored at the edges of the diameter range due to
2932
+ instrumental sensitivity limits:
2933
+
2934
+ - ``"none"`` : No censoring is applied. All diameter bins are used.
2935
+ - ``"left"`` : Left-censored PSD. Diameter bins at the lower end of
2936
+ the spectrum where the observed number concentration is zero are
2937
+ removed prior to cost-function evaluation.
2938
+ - ``"right"`` : Right-censored PSD. Diameter bins at the upper end of
2939
+ the spectrum where the observed number concentration is zero are
2940
+ removed prior to cost-function evaluation.
2941
+ - ``"both"`` : Both left- and right-censored PSD. Only the contiguous
2942
+ range of diameter bins with non-zero observed concentrations is
2943
+ retained.
2944
+
2945
+ loss : int, optional
2946
+ Loss function.
2947
+ If target is ``"N(D)"`` or ``"H(x)"``, valid options are:
2948
+
2949
+ - ``SSE``: Sum of Squared Errors
2950
+ - ``SAE``: Sum of Absolute Errors
2951
+ - ``MAE``: Mean Absolute Error
2952
+ - ``MSE``: Mean Squared Error
2953
+ - ``RMSE``: Root Mean Squared Error
2954
+ - ``relMAE``: Relative Mean Absolute Error
2955
+ - ``KLDiv``: Kullback-Leibler Divergence
2956
+ - ``WD``: Wasserstein Distance
2957
+ - ``JSD``: Jensen-Shannon Distance
2958
+ - ``KS``: Kolmogorov-Smirnov Statistic
2959
+
2960
+ If target is one of ``"R"``, ``"Z"``, ``"LWC"``, or ``"M<p>"``, valid options are:
2961
+
2962
+ - ``AE``: Absolute Error
2963
+ - ``SE``: Squared Error
2964
+
2965
+ loss_weight: int, optional
2966
+ Weight of this objective when multiple objectives are used.
2967
+ Must be specified if more than one objective is specified.
2968
+ return_loss : bool, optional
2969
+ If True, return both the loss surface and parameters.
2970
+ Default is False.
2971
+
2972
+ Returns
2973
+ -------
2974
+ ds_params : xarray.Dataset
2975
+ Dataset containing the estimated Normalized Generalized Gamma distribution parameters.
2976
+ """
2977
+ # Use default objectives if not specified
2978
+ if objectives is None:
2979
+ objectives = copy.deepcopy(DEFAULT_OBJECTIVES)
2980
+
2981
+ # Check objectives
2982
+ objectives = check_objectives(objectives=objectives)
2983
+
2984
+ # Compute required variables
2985
+ drop_number_concentration = ds["drop_number_concentration"]
2986
+ diameter_bin_width = ds["diameter_bin_width"]
2987
+ diameter = ds["diameter_bin_center"] / 1000 # conversion from mm to m
2988
+ Mi = get_moment(
2989
+ drop_number_concentration=drop_number_concentration,
2990
+ diameter=diameter, # m
2991
+ diameter_bin_width=diameter_bin_width, # mm
2992
+ moment=i,
2993
+ )
2994
+ Mj = get_moment(
2995
+ drop_number_concentration=drop_number_concentration,
2996
+ diameter=diameter, # m
2997
+ diameter_bin_width=diameter_bin_width, # mm
2998
+ moment=j,
2999
+ )
3000
+ Dc = NormalizedGeneralizedGammaPSD.compute_Dc(i=i, j=j, Mi=Mi, Mj=Mj)
3001
+ Nc = NormalizedGeneralizedGammaPSD.compute_Nc(i=i, j=j, Mi=Mi, Mj=Mj)
3002
+
3003
+ # Define search space
3004
+ if mu is None:
3005
+ mu = np.arange(-6, 10, step=0.1)
3006
+ # mu = np.arange(-7, 30, step=0.01)
3007
+ if c is None:
3008
+ c = np.arange(0.01, 10, step=0.1)
3009
+
3010
+ # Define kwargs
3011
+ kwargs = {
3012
+ "i": i,
3013
+ "j": j,
3014
+ "D": ds["diameter_bin_center"].to_numpy(),
3015
+ "dD": ds["diameter_bin_width"].to_numpy(),
3016
+ "objectives": objectives,
3017
+ "return_loss": return_loss,
3018
+ "mu": mu,
3019
+ "c": c,
3020
+ }
3021
+
3022
+ # Define function to create parameters dataset
3023
+ def _create_parameters_dataset(da_parameters, i, j):
3024
+ # Add parameters coordinates
3025
+ da_parameters = da_parameters.assign_coords({"parameters": ["Nc", "Dc", "mu", "c"]})
3026
+
3027
+ # Create parameters dataset
3028
+ ds_parameters = da_parameters.to_dataset(dim="parameters")
3029
+
3030
+ # Add Nc and Dc
3031
+ ds_parameters["Dc"].attrs["moment_orders"] = f"{i}, {j}"
3032
+ ds_parameters["Nc"].attrs["moment_orders"] = f"{i}, {j}"
3033
+
3034
+ # Add DSD model name to the attribute
3035
+ ds_parameters.attrs["disdrodb_psd_model"] = "NormalizedGeneralizedGammaPSD"
3036
+ ds_parameters.attrs["disdrodb_psd_model_kwargs"] = f"{{'i': {i}, 'j': {j}}}"
3037
+ return ds_parameters
3038
+
3039
+ # Return cost function if asked
3040
+ if return_loss:
3041
+ da_parameters, da_cost_function = xr.apply_ufunc(
3042
+ apply_normalized_generalized_gamma_gs,
3043
+ # Variables varying over time
3044
+ Nc,
3045
+ Dc,
3046
+ ds["drop_number_concentration"],
3047
+ ds["fall_velocity"],
3048
+ # Other options
3049
+ kwargs=kwargs,
3050
+ # Settings
3051
+ input_core_dims=[[], [], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
3052
+ output_core_dims=[["parameters"], ["c_values", "mu_values"]],
3053
+ vectorize=True,
3054
+ dask="parallelized",
3055
+ # Lengths of the new output_core_dims dimensions.
3056
+ dask_gufunc_kwargs={"output_sizes": {"mu_values": len(mu), "c_values": len(c), "parameters": 4}},
3057
+ output_dtypes=["float64", "float64", "float64"],
3058
+ )
3059
+ ds_parameters = _create_parameters_dataset(da_parameters, i=i, j=j)
3060
+ ds_parameters["cost_function"] = da_cost_function
3061
+ ds_parameters = ds_parameters.assign_coords({"mu_values": mu, "c_values": c})
3062
+ return ds_parameters
3063
+
3064
+ # Otherwise return just best parameters
3065
+ da_parameters = xr.apply_ufunc(
3066
+ apply_normalized_generalized_gamma_gs,
3067
+ # Variables varying over time
3068
+ Nc,
3069
+ Dc,
3070
+ ds["drop_number_concentration"],
3071
+ ds["fall_velocity"],
3072
+ # Other options
3073
+ kwargs=kwargs,
3074
+ # Settings
3075
+ input_core_dims=[[], [], [DIAMETER_DIMENSION], [DIAMETER_DIMENSION]],
3076
+ output_core_dims=[["parameters"]],
3077
+ vectorize=True,
3078
+ dask="parallelized",
3079
+ dask_gufunc_kwargs={"output_sizes": {"parameters": 4}}, # lengths of the new output_core_dims dimensions.
3080
+ output_dtypes=["float64"],
3081
+ )
3082
+ ds_parameters = _create_parameters_dataset(da_parameters, i=i, j=j)
3083
+ return ds_parameters
3084
+
3085
+
3086
+ def fit_ngg_on_normalized_space(
3087
+ x,
3088
+ ND_norm,
3089
+ # PSD parameters
3090
+ i,
3091
+ j,
3092
+ mu=None,
3093
+ c=None,
3094
+ # Optimization options
3095
+ transformation="log",
3096
+ loss="SSE",
3097
+ # Output options
3098
+ return_loss=False,
3099
+ ):
3100
+ """Fit a NormalizedGeneralizedGammaPSD model in normalized space.
3101
+
3102
+ This function performs a grid search optimization to find the best parameters
3103
+ (mu, c) for the NormalizedGeneralizedGammaPSD model by minimizing a cost function.
3104
+
3105
+ Parameters
3106
+ ----------
3107
+ x : numpy.ndarray
3108
+ Normalized diameter parameter (D/Dc) [-].
3109
+ ND_norm : numpy.ndarray
3110
+ Observed normalized PSD data (N(D)/Nc [-].
3111
+ i : int
3112
+ Moment order i of the NormalizedGeneralizedGammaPSD.
3113
+ j : int
3114
+ Moment order j of the NormalizedGeneralizedGammaPSD.
3115
+ mu : int, float or numpy.ndarray
3116
+ mu parameter values to search.
3117
+ c : int, float or numpy.ndarray
3118
+ c parameter values to search.
3119
+ transformation : str, optional
3120
+ Transformation applied to the target quantity before computing the loss.
3121
+ The default is ``"log"``.
3122
+ Valid options:
3123
+
3124
+ - ``"identity"`` : No transformation
3125
+ - ``"log"`` : Logarithmic transformation
3126
+ - ``"sqrt"`` : Square root transformation
3127
+
3128
+ loss : int, optional
3129
+ Loss function. The default is ``SSE``.
3130
+ Valid options are:
3131
+
3132
+ - ``SSE``: Sum of Squared Errors
3133
+ - ``SAE``: Sum of Absolute Errors
3134
+ - ``MAE``: Mean Absolute Error
3135
+ - ``MSE``: Mean Squared Error
3136
+ - ``RMSE``: Root Mean Squared Error
3137
+ - ``relMAE``: Relative Mean Absolute Error
3138
+
3139
+ return_loss : bool, optional
3140
+ If True, return both the loss surface and parameters.
3141
+ Default is False.
3142
+
3143
+ Returns
3144
+ -------
3145
+ parameters : numpy.ndarray
3146
+ Best parameters [mu, c].
3147
+ An array of NaN values is returned if no valid solution is found.
3148
+ total_loss : numpy.ndarray, optional
3149
+ 2D array of total loss values reshaped to (len(mu), len(c)).
3150
+ Only returned if return_loss=True.
3151
+
3152
+ """
3153
+ # Ensure input is numpy array
3154
+ x = np.asarray(x)
3155
+ ND_norm = np.asarray(ND_norm)
3156
+
3157
+ # Define search space
3158
+ if mu is None:
3159
+ mu = np.arange(-6, 20, step=0.1)
3160
+ if c is None:
3161
+ c = np.arange(0.01, 20, step=0.1)
3162
+
3163
+ # Define combinations of parameters for grid search
3164
+ mu_grid, c_grid = np.meshgrid(
3165
+ mu,
3166
+ c,
3167
+ indexing="xy",
3168
+ )
3169
+ mu_arr = mu_grid.ravel()
3170
+ c_arr = c_grid.ravel()
3171
+
3172
+ # Define objectives
3173
+ transformation = check_transformation(transformation)
3174
+ valid_loss = ["SSE", "SAE", "MAE", "MSE", "RMSE", "relMAE"]
3175
+ if loss not in valid_loss:
3176
+ raise ValueError(f"Invalid loss {loss}. Valid loss are {valid_loss}")
3177
+
3178
+ objectives = [
3179
+ {
3180
+ "target": "N(D)", # dummy. Do not change
3181
+ "censoring": "none", # dummy. Do not change
3182
+ "transformation": transformation,
3183
+ "loss": loss,
3184
+ },
3185
+ ]
3186
+
3187
+ # Perform grid search
3188
+ with suppress_warnings():
3189
+
3190
+ # Compute N(D)/Nc
3191
+ ND_norm_preds = NormalizedGeneralizedGammaPSD.normalized_formula(
3192
+ x=x[None, :],
3193
+ i=i,
3194
+ j=j,
3195
+ mu=mu_arr[:, None],
3196
+ c=c_arr[:, None],
3197
+ )
3198
+
3199
+ # Compute loss
3200
+ total_loss = compute_weighted_loss(
3201
+ ND_obs=ND_norm,
3202
+ ND_preds=ND_norm_preds,
3203
+ D=x,
3204
+ dD=None,
3205
+ V=None,
3206
+ objectives=objectives,
3207
+ )
3208
+
3209
+ # Define best parameters
3210
+ if not np.all(np.isnan(total_loss)):
3211
+ best_index = np.nanargmin(total_loss)
3212
+ mu_best, c_best = mu_arr[best_index].item(), c_arr[best_index].item()
3213
+ parameters = np.array([mu_best, c_best])
3214
+ else:
3215
+ parameters = np.array([np.nan, np.nan])
3216
+
3217
+ # If asked, return cost function
3218
+ if return_loss:
3219
+ total_loss = total_loss.reshape(mu_grid.shape)
3220
+ total_loss = xr.DataArray(total_loss, dims=["c_values", "mu_values"])
3221
+ total_loss = total_loss.assign_coords({"mu_values": mu, "c_values": c})
3222
+ return parameters, total_loss
3223
+ return parameters
1655
3224
 
1656
3225
 
1657
3226
  ####-----------------------------------------------------------------.
@@ -1930,10 +3499,40 @@ def _get_exponential_parameters_mom(ds: xr.Dataset, mom_method: str) -> xr.Datas
1930
3499
 
1931
3500
 
1932
3501
  ####--------------------------------------------------------------------------------------.
1933
- #### Routines dictionary
3502
+ #### GLOBAL DICTIONARIES
1934
3503
 
1935
- ####--------------------------------------------------------------------------------------.
1936
3504
  ATTRS_PARAMS_DICT = {
3505
+ "LognormalPSD": {
3506
+ "Nt": {
3507
+ "standard_name": "number_concentration_of_particles",
3508
+ "units": "m-3",
3509
+ "long_name": "Total Number Concentration",
3510
+ },
3511
+ "mu": {
3512
+ "description": "Mean of the Lognormal PSD",
3513
+ "units": "log(mm)",
3514
+ "long_name": "Mean of the Lognormal PSD",
3515
+ },
3516
+ "sigma": {
3517
+ "standard_name": "Standard Deviation of the Lognormal PSD",
3518
+ "units": "",
3519
+ "long_name": "Standard Deviation of the Lognormal PSD",
3520
+ },
3521
+ },
3522
+ "ExponentialPSD": {
3523
+ "N0": {
3524
+ "description": "Intercept parameter of the Exponential PSD",
3525
+ "standard_name": "particle_size_distribution_intercept",
3526
+ "units": "mm-1 m-3",
3527
+ "long_name": "ExponentialPSD intercept parameter",
3528
+ },
3529
+ "Lambda": {
3530
+ "description": "Slope (rate) parameter of the Exponential PSD",
3531
+ "standard_name": "particle_size_distribution_slope",
3532
+ "units": "mm-1",
3533
+ "long_name": "ExponentialPSD slope parameter",
3534
+ },
3535
+ },
1937
3536
  "GammaPSD": {
1938
3537
  "N0": {
1939
3538
  "description": "Intercept parameter of the Gamma PSD",
@@ -1972,35 +3571,53 @@ ATTRS_PARAMS_DICT = {
1972
3571
  "long_name": "NormalizedGammaPSD Median Volume Drop Diameter",
1973
3572
  },
1974
3573
  },
1975
- "LognormalPSD": {
3574
+ "GeneralizedGammaPSD": {
1976
3575
  "Nt": {
1977
- "standard_name": "number_concentration_of_rain_drops_in_air",
3576
+ "standard_name": "number_concentration_of_particles",
1978
3577
  "units": "m-3",
1979
3578
  "long_name": "Total Number Concentration",
1980
3579
  },
3580
+ "Lambda": {
3581
+ "description": "Slope (rate) parameter of the Generalized Gamma PSD",
3582
+ "standard_name": "particle_size_distribution_slope",
3583
+ "units": "mm-1",
3584
+ "long_name": "GeneralizedGammaPSD slope parameter",
3585
+ },
1981
3586
  "mu": {
1982
- "description": "Mean of the Lognormal PSD",
1983
- "units": "log(mm)",
1984
- "long_name": "Mean of the Lognormal PSD",
3587
+ "description": "Shape parameter of the Generalized Gamma PSD",
3588
+ "standard_name": "particle_size_distribution_shape",
3589
+ "units": "",
3590
+ "long_name": "GeneralizedGammaPSD shape parameter",
1985
3591
  },
1986
- "sigma": {
1987
- "standard_name": "Standard Deviation of the Lognormal PSD",
3592
+ "c": {
3593
+ "description": "Shape parameter of the Generalized Gamma PSD",
3594
+ "standard_name": "particle_size_distribution_shape",
1988
3595
  "units": "",
1989
- "long_name": "Standard Deviation of the Lognormal PSD",
3596
+ "long_name": "GeneralizedGammaPSD shape parameter c",
1990
3597
  },
1991
3598
  },
1992
- "ExponentialPSD": {
1993
- "N0": {
1994
- "description": "Intercept parameter of the Exponential PSD",
1995
- "standard_name": "particle_size_distribution_intercept",
3599
+ "NormalizedGeneralizedGammaPSD": {
3600
+ "Nc": {
3601
+ "standard_name": "characteristic intercept",
1996
3602
  "units": "mm-1 m-3",
1997
- "long_name": "ExponentialPSD intercept parameter",
3603
+ "long_name": "NormalizedGeneralizedGammaPSD Characteristic Intercept Parameter",
1998
3604
  },
1999
- "Lambda": {
2000
- "description": "Slope (rate) parameter of the Exponential PSD",
2001
- "standard_name": "particle_size_distribution_slope",
2002
- "units": "mm-1",
2003
- "long_name": "ExponentialPSD slope parameter",
3605
+ "Dc": {
3606
+ "standard_name": "characteristic_diameter",
3607
+ "units": "mm",
3608
+ "long_name": "NormalizedGeneralizedGammaPSD Characteristic Diameter",
3609
+ },
3610
+ "mu": {
3611
+ "description": "Shape parameter of the Normalized Generalized Gamma PSD",
3612
+ "standard_name": "particle_size_distribution_shape",
3613
+ "units": "",
3614
+ "long_name": "NormalizedGeneralizedGammaPSD Shape Parameter",
3615
+ },
3616
+ "c": {
3617
+ "description": "Shape parameter of the Normalized Generalized Gamma PSD",
3618
+ "standard_name": "particle_size_distribution_shape",
3619
+ "units": "",
3620
+ "long_name": "NormalizedGeneralizedGammaPSD Shape Parameter c",
2004
3621
  },
2005
3622
  },
2006
3623
  }
@@ -2035,6 +3652,8 @@ OPTIMIZATION_ROUTINES_DICT = {
2035
3652
  "NormalizedGammaPSD": get_normalized_gamma_parameters_gs,
2036
3653
  "LognormalPSD": get_lognormal_parameters_gs,
2037
3654
  "ExponentialPSD": get_exponential_parameters_gs,
3655
+ "GeneralizedGammaPSD": get_generalized_gamma_parameters_gs,
3656
+ "NormalizedGeneralizedGammaPSD": get_normalized_generalized_gamma_parameters_gs,
2038
3657
  },
2039
3658
  "ML": {
2040
3659
  "GammaPSD": get_gamma_parameters,
@@ -2056,39 +3675,181 @@ def available_optimization(psd_model):
2056
3675
  return [opt for opt in list(OPTIMIZATION_ROUTINES_DICT) if psd_model in OPTIMIZATION_ROUTINES_DICT[opt]]
2057
3676
 
2058
3677
 
3678
+ def get_psd_model_parameter_names(psd_model):
3679
+ """Get psd_model parameter names."""
3680
+ return list(ATTRS_PARAMS_DICT[psd_model].keys())
3681
+
3682
+
3683
+ def check_psd_parameters(psd_model, parameters):
3684
+ """Check valid psd_model parameters."""
3685
+ valid_params = get_psd_model_parameter_names(psd_model)
3686
+ for param in parameters:
3687
+ if param not in valid_params:
3688
+ raise ValueError(
3689
+ f"Invalid parameter '{param}' for PSD model '{psd_model}'. Valid parameters are {valid_params}.",
3690
+ )
3691
+ return parameters
3692
+
3693
+
2059
3694
  ####--------------------------------------------------------------------------------------.
2060
- #### Argument checkers
3695
+ #### CONFIGURATION CHECKERS
3696
+ #### - GS
2061
3697
 
2062
3698
 
2063
- def check_psd_model(psd_model, optimization):
2064
- """Check valid psd_model argument."""
2065
- valid_psd_models = list(OPTIMIZATION_ROUTINES_DICT[optimization])
2066
- if psd_model not in valid_psd_models:
2067
- msg = (
2068
- f"{optimization} optimization is not available for 'psd_model' {psd_model}. "
2069
- f"Accepted PSD models are {valid_psd_models}."
2070
- )
2071
- raise NotImplementedError(msg)
3699
+ def check_fixed_parameters(psd_model, fixed_parameters):
3700
+ """Check valid fixed_parameters argument."""
3701
+ if fixed_parameters is None:
3702
+ if psd_model == "NormalizedGeneralizedGammaPSD":
3703
+ raise ValueError(
3704
+ "For NormalizedGeneralizedGammaPSD fixed_parameters must include 'i' and 'j' moment orders.",
3705
+ )
3706
+ return None
3707
+ if not isinstance(fixed_parameters, dict):
3708
+ raise ValueError("fixed_parameters must be a dictionary.")
3709
+
3710
+ # Extract list of parameters
3711
+ parameters = set(fixed_parameters.keys())
3712
+
3713
+ # Check for NormalizedGeneralizedGammaPSD i and j are provided
3714
+ if psd_model == "NormalizedGeneralizedGammaPSD":
3715
+ if "i" not in parameters or "j" not in parameters:
3716
+ raise ValueError(
3717
+ "fixed_parameters for NormalizedGeneralizedGammaPSD must include 'i' and 'j' moment orders.",
3718
+ )
3719
+ parameters = parameters.difference({"i", "j"})
2072
3720
 
3721
+ # Check validity of fixed_parameters keys (only PSD parameters are allowed)
3722
+ _ = check_psd_parameters(psd_model=psd_model, parameters=parameters)
2073
3723
 
2074
- def check_target(target):
2075
- """Check valid target argument."""
2076
- valid_targets = ["ND", "R", "Z", "LWC"]
2077
- if target not in valid_targets:
2078
- raise ValueError(f"Invalid 'target' {target}. Valid targets are {valid_targets}.")
2079
- return target
3724
+ # Check value validity
3725
+ for param_name, param_value in fixed_parameters.items():
3726
+ if isinstance(param_value, str):
3727
+ raise ValueError(
3728
+ f"Invalid value for '{param_name}': strings are not allowed.",
3729
+ )
3730
+ if not np.isscalar(param_value):
3731
+ raise ValueError(
3732
+ f"""Invalid value for '{param_name}': expected scalar,
3733
+ , got {type(param_value).__name__}.""",
3734
+ )
3735
+ fixed_parameters[param_name] = float(param_value)
3736
+ return fixed_parameters
3737
+
3738
+
3739
+ def check_search_space_parameters(search_space, psd_model):
3740
+ """Check search_space parameters are PSD model parameters."""
3741
+ if search_space is None:
3742
+ return None
3743
+ parameters = list(search_space.keys())
3744
+ _ = check_psd_parameters(psd_model=psd_model, parameters=parameters)
3745
+ return search_space
3746
+
3747
+
3748
+ def check_search_space(search_space):
3749
+ """Check valid search_space dictionary."""
3750
+ if search_space is None:
3751
+ return None
3752
+ if not isinstance(search_space, dict):
3753
+ raise ValueError("search_space must be a dictionary.")
3754
+ if len(search_space) == 0:
3755
+ return None
3756
+ # Check validity of each parameter search space specification
3757
+ for param_name, space in search_space.items():
3758
+ if not isinstance(space, dict) or "min" not in space or "max" not in space or "step" not in space:
3759
+ raise ValueError(
3760
+ f"Search space for '{param_name}' must be a dict with 'min', 'max', and 'step' keys. " f"Got: {space}",
3761
+ )
3762
+ # Validate bounds
3763
+ min_val = space["min"]
3764
+ max_val = space["max"]
3765
+ step = space.get("step", None)
3766
+ if min_val >= max_val:
3767
+ raise ValueError(
3768
+ f"Invalid search bounds for '{param_name}': min ({min_val}) >= max ({max_val}). " f"Require min < max.",
3769
+ )
3770
+ if step is None:
3771
+ raise ValueError(
3772
+ f"Search space for '{param_name}' must include 'step' key. Got: {space}",
3773
+ )
3774
+ if step <= 0:
3775
+ raise ValueError(
3776
+ f"Invalid step size for '{param_name}': step ({step}) must be positive.",
3777
+ )
3778
+ return search_space
2080
3779
 
2081
3780
 
2082
- def check_transformation(transformation):
2083
- """Check valid transformation argument."""
2084
- valid_transformation = ["identity", "log", "sqrt"]
2085
- if transformation not in valid_transformation:
2086
- raise ValueError(
2087
- f"Invalid 'transformation' {transformation}. Valid transformations are {transformation}.",
2088
- )
2089
- return transformation
3781
+ def define_gs_parameters(psd_model, fixed_parameters=None, search_space=None):
3782
+ """Define PSD model parameters for Grid Search optimization routines.
3783
+
3784
+ This function constructs a dictionary of parameter values ready for grid search,
3785
+ converting search space ranges into numpy arrays.
3786
+
3787
+ Parameters
3788
+ ----------
3789
+ fixed_parameters : dict, optional
3790
+ Dictionary with parameter names as keys and scalar values as values.
3791
+ Example: {"mu": 2.0}
3792
+ search_space : dict, optional
3793
+ Dictionary defining search ranges for parameters.
3794
+ Each parameter can have:
3795
+ - 'min' : float, Minimum value
3796
+ - 'max' : float, Maximum value
3797
+ - 'step' : float, Step size for linspace interval
3798
+ Example: {"Lambda": {"min": 0, "max": 10, "step": 0.2}}
3799
+
3800
+ Returns
3801
+ -------
3802
+ dict
3803
+ Dictionary with PSD parameter names as keys and values as:
3804
+ - scalar (int or float)
3805
+ - numpy.ndarray for grid search
3806
+ Empty dict if both inputs are None or empty
3807
+
3808
+ """
3809
+ # Check validity of inputs
3810
+ search_space = check_search_space(search_space=search_space)
3811
+ search_space = check_search_space_parameters(search_space=search_space, psd_model=psd_model)
3812
+ fixed_parameters = check_fixed_parameters(psd_model=psd_model, fixed_parameters=fixed_parameters)
3813
+
3814
+ # Return empty dict if both inputs are empty
3815
+ if (fixed_parameters is None or len(fixed_parameters) == 0) and (search_space is None or len(search_space) == 0):
3816
+ return {}
3817
+
3818
+ # Define parameters dictionary (initialize with None values)
3819
+ required_parameters_dict = {
3820
+ "NormalizedGeneralizedGammaPSD": ["mu", "c", "i", "j"],
3821
+ "NormalizedGammaPSD": ["mu"],
3822
+ "GeneralizedGammaPSD": ["Lambda", "mu", "c"],
3823
+ "LognormalPSD": ["mu", "sigma"],
3824
+ "GammaPSD": ["Lambda", "mu"],
3825
+ "ExponentialPSD": ["Lambda"],
3826
+ }
3827
+ required_parameters = required_parameters_dict[psd_model]
3828
+ parameters = dict.fromkeys(required_parameters)
3829
+
3830
+ # Process fixed_parameters (scalar initial values)
3831
+ if fixed_parameters is not None:
3832
+ for param_name, param_value in fixed_parameters.items():
3833
+ parameters[param_name] = param_value
3834
+
3835
+ # Check if this parameter has a search space range
3836
+ if search_space is not None:
3837
+ for param_name, space in search_space.items():
3838
+ # Extract search space bounds
3839
+ min_val = space["min"]
3840
+ max_val = space["max"]
3841
+ step = space.get("step", None)
3842
+ # Create array of values for this parameter
3843
+ parameters[param_name] = np.arange(min_val, max_val + step, step)
3844
+ else:
3845
+ # Use scalar value
3846
+ parameters[param_name] = param_value
2090
3847
 
3848
+ return parameters
2091
3849
 
3850
+
3851
+ # -----------------------------------------------------------------
3852
+ #### - ML
2092
3853
  def check_likelihood(likelihood):
2093
3854
  """Check valid likelihood argument."""
2094
3855
  valid_likelihood = ["multinomial", "poisson"]
@@ -2126,6 +3887,8 @@ def check_optimizer(optimizer):
2126
3887
  return optimizer
2127
3888
 
2128
3889
 
3890
+ # -----------------------------------------------------------------
3891
+ #### - MOM
2129
3892
  def check_mom_methods(mom_methods, psd_model, allow_none=False):
2130
3893
  """Check valid mom_methods arguments."""
2131
3894
  if isinstance(mom_methods, (str, type(None))):
@@ -2142,6 +3905,21 @@ def check_mom_methods(mom_methods, psd_model, allow_none=False):
2142
3905
  return mom_methods
2143
3906
 
2144
3907
 
3908
+ # -----------------------------------------------------------------
3909
+ #### - WRAPPERS
3910
+
3911
+
3912
+ def check_psd_model(psd_model, optimization):
3913
+ """Check valid psd_model argument."""
3914
+ valid_psd_models = list(OPTIMIZATION_ROUTINES_DICT[optimization])
3915
+ if psd_model not in valid_psd_models:
3916
+ msg = (
3917
+ f"{optimization} optimization is not available for 'psd_model' {psd_model}. "
3918
+ f"Accepted PSD models are {valid_psd_models}."
3919
+ )
3920
+ raise NotImplementedError(msg)
3921
+
3922
+
2145
3923
  def check_optimization(optimization):
2146
3924
  """Check valid optimization argument."""
2147
3925
  valid_optimization = list(OPTIMIZATION_ROUTINES_DICT)
@@ -2152,8 +3930,8 @@ def check_optimization(optimization):
2152
3930
  return optimization
2153
3931
 
2154
3932
 
2155
- def check_optimization_kwargs(optimization_kwargs, optimization, psd_model):
2156
- """Check valid optimization_kwargs."""
3933
+ def check_optimization_settings(optimization_settings, optimization, psd_model):
3934
+ """Check valid optimization_settings."""
2157
3935
  dict_arguments = {
2158
3936
  "ML": {
2159
3937
  "init_method": None,
@@ -2163,9 +3941,8 @@ def check_optimization_kwargs(optimization_kwargs, optimization, psd_model):
2163
3941
  "optimizer": check_optimizer,
2164
3942
  },
2165
3943
  "GS": {
2166
- "target": check_target,
2167
- "transformation": check_transformation,
2168
- "error_order": None,
3944
+ "objectives": check_objectives,
3945
+ "search_space": check_search_space,
2169
3946
  },
2170
3947
  "MOM": {
2171
3948
  "mom_methods": None,
@@ -2174,38 +3951,55 @@ def check_optimization_kwargs(optimization_kwargs, optimization, psd_model):
2174
3951
  optimization = check_optimization(optimization)
2175
3952
  check_psd_model(psd_model=psd_model, optimization=optimization)
2176
3953
 
3954
+ # Check fixed parameters validity for optimization="GS"
3955
+ if optimization == "GS":
3956
+ optimization_settings["fixed_parameters"] = check_fixed_parameters(
3957
+ psd_model=psd_model,
3958
+ fixed_parameters=optimization_settings.get("fixed_parameters", None),
3959
+ )
3960
+
2177
3961
  # Retrieve the expected arguments for the given optimization method
2178
3962
  expected_arguments = dict_arguments.get(optimization, {})
2179
3963
 
2180
- # Check for missing arguments in optimization_kwargs
2181
- # missing_args = [arg for arg in expected_arguments if arg not in optimization_kwargs]
3964
+ # Check for missing arguments in optimization_settings
3965
+ # missing_args = [arg for arg in expected_arguments if arg not in optimization_settings]
2182
3966
  # if missing_args:
2183
3967
  # raise ValueError(f"Missing required arguments for {optimization} optimization: {missing_args}")
2184
3968
 
2185
3969
  # Validate arguments values
2186
3970
  _ = [
2187
- check(optimization_kwargs[arg])
3971
+ check(optimization_settings[arg])
2188
3972
  for arg, check in expected_arguments.items()
2189
- if callable(check) and arg in optimization_kwargs
3973
+ if callable(check) and arg in optimization_settings
2190
3974
  ]
2191
3975
 
2192
3976
  # Further special checks
2193
- if optimization == "MOM" and "mom_methods" in optimization_kwargs:
2194
- _ = check_mom_methods(mom_methods=optimization_kwargs["mom_methods"], psd_model=psd_model)
2195
- if optimization == "ML" and optimization_kwargs.get("init_method", None) is not None:
2196
- _ = check_mom_methods(mom_methods=optimization_kwargs["init_method"], psd_model=psd_model, allow_none=True)
3977
+ if optimization == "MOM" and "mom_methods" in optimization_settings:
3978
+ _ = check_mom_methods(mom_methods=optimization_settings["mom_methods"], psd_model=psd_model)
3979
+ if optimization == "ML" and optimization_settings.get("init_method", None) is not None:
3980
+ _ = check_mom_methods(mom_methods=optimization_settings["init_method"], psd_model=psd_model, allow_none=True)
2197
3981
 
2198
3982
 
2199
3983
  ####--------------------------------------------------------------------------------------.
2200
3984
  #### Wrappers for fitting
2201
3985
 
2202
3986
 
2203
- def _finalize_attributes(ds_params, psd_model, optimization, optimization_kwargs):
3987
+ def _format_optimization_settings(settings):
3988
+ if isinstance(settings, dict):
3989
+ return ", ".join(f"{k}: {v}" for k, v in settings.items())
3990
+ if isinstance(settings, list):
3991
+ blocks = []
3992
+ for d in settings:
3993
+ opt_str = _format_optimization_settings(d)
3994
+ blocks.append(opt_str)
3995
+ return " | ".join(blocks)
3996
+ raise TypeError("optimization_settings must be dict or list of dict")
3997
+
3998
+
3999
+ def _finalize_attributes(ds_params, psd_model, optimization, optimization_settings):
2204
4000
  ds_params.attrs["disdrodb_psd_model"] = psd_model
2205
4001
  ds_params.attrs["disdrodb_psd_optimization"] = optimization
2206
- ds_params.attrs["disdrodb_psd_optimization_kwargs"] = ", ".join(
2207
- [f"{k}: {v}" for k, v in optimization_kwargs.items()],
2208
- )
4002
+ ds_params.attrs["disdrodb_psd_optimization_settings"] = _format_optimization_settings(optimization_settings)
2209
4003
  return ds_params
2210
4004
 
2211
4005
 
@@ -2248,12 +4042,12 @@ def get_mom_parameters(ds: xr.Dataset, psd_model: str, mom_methods=None) -> xr.D
2248
4042
  ds_params = ds_params.assign_coords({"mom_method": mom_methods})
2249
4043
 
2250
4044
  # Add model attributes
2251
- optimization_kwargs = {"mom_methods": mom_methods}
4045
+ optimization_settings = {"mom_methods": mom_methods}
2252
4046
  ds_params = _finalize_attributes(
2253
4047
  ds_params=ds_params,
2254
4048
  psd_model=psd_model,
2255
4049
  optimization="MOM",
2256
- optimization_kwargs=optimization_kwargs,
4050
+ optimization_settings=optimization_settings,
2257
4051
  )
2258
4052
  return ds_params
2259
4053
 
@@ -2272,14 +4066,16 @@ def get_ml_parameters(
2272
4066
 
2273
4067
  Parameters
2274
4068
  ----------
2275
- ds : xarray.Dataset
4069
+ ds : xarray.Dataset
2276
4070
  Input dataset containing drop number concentration data and diameter information.
2277
4071
  It must include the following variables:
4072
+
2278
4073
  - ``drop_number_concentration``: The number concentration of drops.
2279
4074
  - ``diameter_bin_width``": The width of each diameter bin.
2280
4075
  - ``diameter_bin_lower``: The lower bounds of the diameter bins.
2281
4076
  - ``diameter_bin_upper``: The upper bounds of the diameter bins.
2282
4077
  - ``diameter_bin_center``: The center values of the diameter bins.
4078
+
2283
4079
  psd_model : str
2284
4080
  The PSD model to fit. See ``available_psd_models()``.
2285
4081
  init_method: str or list
@@ -2339,7 +4135,7 @@ def get_ml_parameters(
2339
4135
  ds_params = ds_params.assign_coords({"init_method": init_method})
2340
4136
 
2341
4137
  # Add model attributes
2342
- optimization_kwargs = {
4138
+ optimization_settings = {
2343
4139
  "init_method": init_method,
2344
4140
  "probability_method": "probability_method",
2345
4141
  "likelihood": likelihood,
@@ -2350,47 +4146,191 @@ def get_ml_parameters(
2350
4146
  ds_params=ds_params,
2351
4147
  psd_model=psd_model,
2352
4148
  optimization="ML",
2353
- optimization_kwargs=optimization_kwargs,
4149
+ optimization_settings=optimization_settings,
2354
4150
  )
2355
4151
 
2356
4152
  # Return dataset with parameters
2357
4153
  return ds_params
2358
4154
 
2359
4155
 
2360
- def get_gs_parameters(ds, psd_model, target="ND", transformation="log", error_order=1):
2361
- """Retrieve PSD model parameters using Grid Search."""
2362
- # Check valid psd_model
2363
- check_psd_model(psd_model, optimization="GS")
4156
+ def get_gs_parameters(ds, psd_model, fixed_parameters=None, objectives=None, search_space=None, return_loss=False):
4157
+ """Estimate PSD model parameters using Grid Search optimization with multiple objectives.
2364
4158
 
2365
- # Check valid target
2366
- target = check_target(target)
4159
+ This function estimates particle size distribution (PSD) model parameters
4160
+ by minimizing a weighted combination of errors across multiple objectives through
4161
+ grid search over the parameter space.
2367
4162
 
2368
- # Check valid transformation
2369
- transformation = check_transformation(transformation)
4163
+ Parameters
4164
+ ----------
4165
+ ds : xarray.Dataset
4166
+ Input dataset containing PSD observations. Must include:
4167
+
4168
+ - ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
4169
+ - ``diameter_bin_center`` : Diameter bin centers [mm]
4170
+ - ``diameter_bin_width`` : Diameter bin widths [mm]
4171
+ - ``fall_velocity`` : Drop fall velocity [m s⁻¹] (required if any objective targets 'R')
4172
+
4173
+ psd_model : str
4174
+ Name of the PSD model to fit. Valid options are:
4175
+
4176
+ - ``"GammaPSD"`` : Gamma distribution
4177
+ - ``"NormalizedGammaPSD"`` : Normalized gamma distribution
4178
+ - ``"LognormalPSD"`` : Lognormal distribution
4179
+ - ``"ExponentialPSD"`` : Exponential distribution
4180
+ - ``"NormalizedGeneralizedGammaPSD"`` : Normalized generalized gamma distribution
4181
+
4182
+ objectives : list of dict
4183
+ List of optimization objectives. Each objective dict must contain:
4184
+
4185
+ - ``"target"`` : str
4186
+ Target quantity to optimize. Valid options:
4187
+
4188
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
4189
+ - ``"H(x)"`` : Normalized drop number concentration [-]. Only for Normalized PSD models.
4190
+ - ``"R"`` : Rain rate [mm h⁻¹]
4191
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
4192
+ - ``"LWC"`` : Liquid water content [g m⁻³]
4193
+ - ``"M<p>"`` : Moment of order p
4194
+
4195
+ - ``"transformation"`` : str
4196
+ Transformation applied before computing the error. Valid options:
4197
+
4198
+ - ``"identity"`` : No transformation
4199
+ - ``"log"`` : Logarithmic transformation
4200
+ - ``"sqrt"`` : Square root transformation
4201
+
4202
+ - ``"censoring"`` : str
4203
+ Censoring applied to observed PSD. Valid options:
4204
+
4205
+ - ``"none"`` : No censoring applied
4206
+ - ``"left"`` : Left-censored (remove leading zero bins)
4207
+ - ``"right"`` : Right-censored (remove trailing zero bins)
4208
+ - ``"both"`` : Both sides censored
4209
+
4210
+ - ``"loss"`` : str
4211
+ Error metric.
4212
+ For ``"N(D)"`` and ``"H(x)"`` valid options are ``"SSE"``, ``"SAE"``,
4213
+ ``"MAE"``, ``"MSE"``, ``"RMSE"``, ``"relMAE"``
4214
+ ``"KLDiv"``, ``"JSD"``, ``"WD"``, ``"KS"``.
4215
+
4216
+ For ``"R"``, ``"Z"``, ``"LWC"``, and ``"M<p>"`` valid options are
4217
+ ``"AE"``, ``"SE"``.
4218
+
4219
+ - ``"loss_weight"`` : float, optional
4220
+ Weight for this objective in the combined loss (default: 1.0 for single objective).
4221
+ When multiple objectives are provided, weights are normalized to sum to 1.0.
4222
+
4223
+ fixed_parameters : dict, optional
4224
+ Initial parameter values for the PSD model. Keys are parameter names,
4225
+ values are scalars. Example: {"mu": 2.0, "Lambda": 1.5}
4226
+ search_space : dict, optional
4227
+ Search space configuration for parameters. Each parameter can define:
4228
+
4229
+ - ``"min"`` : float, Minimum value
4230
+ - ``"max"`` : float, Maximum value
4231
+ - ``"step"`` : float, Step size for parameter grid
2370
4232
 
2371
- # Check fall velocity is available if target R
4233
+ Example: {"mu": {"min": 0, "max": 10, "step": 0.2}}
4234
+
4235
+ Returns
4236
+ -------
4237
+ ds_params : xarray.Dataset
4238
+ Dataset containing the estimated PSD model parameters.
4239
+ Variables depend on the selected ``psd_model``:
4240
+
4241
+ - ``GammaPSD`` : ``N0``, ``mu``, ``Lambda``
4242
+ - ``NormalizedGammaPSD`` : ``Nw``, ``mu``, ``D50``
4243
+ - ``LognormalPSD`` : ``Nt``, ``mu``, ``sigma``
4244
+ - ``ExponentialPSD`` : ``N0``, ``Lambda``
4245
+ - ``NormalizedGeneralizedGammaPSD`` : ``Nc``, ``Dc``, ``mu``, ``c``
4246
+
4247
+ Each parameter variable includes attributes with name, units, and description.
4248
+ Dataset attributes contain optimization metadata.
4249
+
4250
+ Raises
4251
+ ------
4252
+ ValueError
4253
+ If objectives structure is invalid or fixed_parameters/search_space bounds are invalid
4254
+ NotImplementedError
4255
+ If psd_model is not supported for GS optimization
4256
+
4257
+ Notes
4258
+ -----
4259
+ Grid search optimization explores a predefined parameter space to find
4260
+ the combination that minimizes the specified loss across all objectives.
4261
+ When multiple objectives are provided, losses are combined using normalized weights.
4262
+
4263
+ If ``drop_number_concentration`` values are all zeros or contain
4264
+ non-finite values, the output PSD parameters are set to NaN.
4265
+
4266
+ Examples
4267
+ --------
4268
+ Single objective optimization:
4269
+
4270
+ >>> objectives = [{
4271
+ ... "target": "N(D)",
4272
+ ... "transformation": "log",
4273
+ ... "censoring": "none",
4274
+ ... "loss": "MAE"
4275
+ ... }]
4276
+ >>> ds_params = get_gs_parameters(ds, psd_model="GammaPSD", objectives=objectives)
4277
+
4278
+ Multi-objective optimization:
4279
+
4280
+ >>> objectives = [
4281
+ ... {
4282
+ ... "target": "N(D)",
4283
+ ... "transformation": "identity",
4284
+ ... "censoring": "left",
4285
+ ... "loss": "MAE",
4286
+ ... "loss_weight": 0.6
4287
+ ... },
4288
+ ... {
4289
+ ... "target": "LWC",
4290
+ ... "transformation": "log",
4291
+ ... "censoring": "both",
4292
+ ... "loss": "AE",
4293
+ ... "loss_weight": 0.4
4294
+ ... }
4295
+ ... ]
4296
+ >>> search_space = {
4297
+ ... "mu": {"min": 0, "max": 10, "step": 0.2},
4298
+ ... "Lambda": {"min": 0.1, "max": 5, "step": 0.1}
4299
+ ... }
4300
+ >>> ds_params = get_gs_parameters(
4301
+ ... ds, psd_model="GammaPSD", search_space=search_space
4302
+ ... )
4303
+ """
4304
+ # Validate inputs
4305
+ check_psd_model(psd_model, optimization="GS")
4306
+ objectives = check_objectives(objectives)
4307
+ if objectives is None:
4308
+ objectives = DEFAULT_OBJECTIVES
4309
+
4310
+ # Define PSD model parameters (scalars or arrays for grid search)
4311
+ parameters = define_gs_parameters(
4312
+ psd_model=psd_model,
4313
+ fixed_parameters=fixed_parameters,
4314
+ search_space=search_space,
4315
+ )
4316
+
4317
+ # Ensure fall velocity is available if any objective needs it (e.g., R target)
2372
4318
  if "fall_velocity" not in ds:
2373
4319
  ds["fall_velocity"] = get_rain_fall_velocity_from_ds(ds)
2374
4320
 
2375
- # Retrieve estimation function
4321
+ # Retrieve model-specific grid search function
2376
4322
  func = OPTIMIZATION_ROUTINES_DICT["GS"][psd_model]
2377
4323
 
2378
- # Estimate parameters
2379
- ds_params = func(ds, target=target, transformation=transformation, error_order=error_order)
4324
+ # Call model-specific function with unpacked parameters, objectives, and return_loss=False
4325
+ ds_params = func(ds, **parameters, objectives=objectives, return_loss=return_loss)
2380
4326
 
2381
- # Add model attributes
2382
- optimization_kwargs = {
2383
- "target": target,
2384
- "transformation": transformation,
2385
- "error_order": error_order,
2386
- }
4327
+ # Finalize dataset attributes with optimization metadata
2387
4328
  ds_params = _finalize_attributes(
2388
4329
  ds_params=ds_params,
2389
4330
  psd_model=psd_model,
2390
4331
  optimization="GS",
2391
- optimization_kwargs=optimization_kwargs,
4332
+ optimization_settings=objectives,
2392
4333
  )
2393
- # Return dataset with parameters
2394
4334
  return ds_params
2395
4335
 
2396
4336
 
@@ -2417,14 +4357,142 @@ def estimate_model_parameters(
2417
4357
  ds,
2418
4358
  psd_model,
2419
4359
  optimization,
2420
- optimization_kwargs=None,
4360
+ optimization_settings=None,
2421
4361
  ):
2422
- """Routine to estimate PSD model parameters."""
4362
+ """Estimate particle size distribution model parameters.
4363
+
4364
+ This is the main interface function for fitting PSD models to observed data.
4365
+ It supports three optimization methods: Maximum Likelihood (ML), Method of
4366
+ Moments (MOM), and Grid Search (GS).
4367
+
4368
+ Parameters
4369
+ ----------
4370
+ ds : xarray.Dataset
4371
+ Input dataset containing PSD observations. Must include:
4372
+
4373
+ - ``drop_number_concentration`` : Drop number concentration [m⁻³ mm⁻¹]
4374
+ - ``diameter_bin_center`` : Diameter bin centers [mm]
4375
+ - ``diameter_bin_width`` : Diameter bin widths [mm]
4376
+
4377
+ Additional variables required for specific optimization methods:
4378
+
4379
+ - For ML: ``diameter_bin_lower``, ``diameter_bin_upper``
4380
+ - For GS with target='R': ``fall_velocity`` (auto-computed if missing)
4381
+ - For MOM: Moment variables ``M0``, ``M1``, ..., ``M6`` (depending on method)
4382
+ psd_model : str
4383
+ Name of the PSD model to fit. Valid options:
4384
+
4385
+ - ``"GammaPSD"`` : Gamma distribution
4386
+ - ``"NormalizedGammaPSD"`` : Normalized gamma distribution
4387
+ - ``"LognormalPSD"`` : Lognormal distribution
4388
+ - ``"ExponentialPSD"`` : Exponential distribution
4389
+
4390
+ Use ``available_optimization(psd_model)`` to check which optimization
4391
+ methods are available for a given model.
4392
+ optimization : str
4393
+ Optimization method to use. Valid options:
4394
+
4395
+ - ``"ML"`` : Maximum Likelihood estimation
4396
+ - ``"MOM"`` : Method of Moments
4397
+ - ``"GS"`` : Grid Search
4398
+ optimization_settings : dict, optional
4399
+ Dictionary of keyword arguments specific to the chosen optimization method.
4400
+
4401
+ For ``optimization="ML"``:
4402
+
4403
+ - ``init_method`` : str or list, Method(s) of moments for parameter initialization
4404
+ - ``probability_method`` : str, Method to compute probabilities (default: 'cdf')
4405
+ - ``likelihood`` : str, Likelihood function ('multinomial' or 'poisson', default: 'multinomial')
4406
+ - ``truncated_likelihood`` : bool, Use truncated likelihood (default: True)
4407
+ - ``optimizer`` : str, Optimization algorithm (default: 'Nelder-Mead')
4408
+
4409
+ For ``optimization="GS"``:
4410
+
4411
+ - ``fixed_parameters`` : dict, optional
4412
+ Allows to specify PSD model parameters to fixed value(s).
4413
+ For example for psd_model=GammaPSD one can use fixed_parameters={"mu": 3}
4414
+ For psd_model=NormalizedGeneralizedGammaPSD, it's mandatory to
4415
+ specify i and j moment order with: fixed_parameters={"i": 3, "j": 4}
4416
+ - ``objectives``: dict, optional
4417
+ List of optimization objectives. If None (default), use DEFAULT_OBJECTIVES.
4418
+ Each objective dict must contain:
4419
+
4420
+ - ``"target"`` : str
4421
+ Target quantity to optimize. Valid options:
4422
+ - ``"N(D)"`` : Drop number concentration [m⁻³ mm⁻¹]
4423
+ - ``"H(x)"`` : Normalized drop number concentration [-]. Only for Normalized PSD models.
4424
+ - ``"R"`` : Rain rate [mm h⁻¹]
4425
+ - ``"Z"`` : Radar reflectivity [mm⁶ m⁻³]
4426
+ - ``"LWC"`` : Liquid water content [g m⁻³]
4427
+ - ``"M<p>"`` : Moment of order p
4428
+ - ``"transformation"`` : str
4429
+ Transformation applied before computing the error. Valid options:
4430
+ - ``"identity"`` : No transformation
4431
+ - ``"log"`` : Logarithmic transformation
4432
+ - ``"sqrt"`` : Square root transformation
4433
+ - ``"censoring"`` : str
4434
+ Censoring applied to observed PSD. Valid options:
4435
+ - ``"none"`` : No censoring applied
4436
+ - ``"left"`` : Left-censored (remove leading zero bins)
4437
+ - ``"right"`` : Right-censored (remove trailing zero bins)
4438
+ - ``"both"`` : Both sides censored
4439
+ - ``"loss"`` : str
4440
+ Error metric.
4441
+ For ``"N(D)"`` and ``"H(x)"`` valid options are
4442
+ ``"SSE"``, ``"SAE"``, ``"MAE"``, ``"MSE"``, ``"RMSE"``, ``"relMAE"``
4443
+ ``"KLDiv"``, ``"JSD"``, ``"WD"``, ``"KS"``.
4444
+ For ``"R"``, ``"Z"``, ``"LWC"``, and ``"M<p>"`` valid options are
4445
+ ``"AE"``, ``"SE"``.
4446
+ - ``"loss_weight"`` : float, optional
4447
+ Weight for this objective in the combined loss (default: 1.0 for single objective).
4448
+ When multiple objectives are provided, weights are normalized to sum to 1.0.
4449
+
4450
+
4451
+ - ``search_space``, dict, optional
4452
+ Search space configuration for parameters. If None (default), use reasonable defaults.
4453
+
4454
+ Each parameter can define:
4455
+
4456
+ - ``"min"`` : float, Minimum value
4457
+ - ``"max"`` : float, Maximum value
4458
+ - ``"step"`` : float, Step size for parameter grid
4459
+
4460
+ Example:
4461
+ {"mu": {"min": 0, "max": 10, "step": 0.2},
4462
+ "Lambda": {"min": 0.1, "max": 5, "step": 0.1}}
4463
+
4464
+ For ``optimization="MOM"``:
4465
+
4466
+ - ``mom_methods`` : str or list, Method(s) of moments to use (e.g., 'M234')
4467
+
4468
+ Returns
4469
+ -------
4470
+ ds_params : xarray.Dataset
4471
+ Dataset containing the estimated PSD model parameters with attributes.
4472
+ Variables depend on the selected ``psd_model``:
4473
+
4474
+ - ``GammaPSD`` : ``N0``, ``mu``, ``Lambda``
4475
+ - ``NormalizedGammaPSD`` : ``Nw``, ``mu``, ``Dm``
4476
+ - ``LognormalPSD`` : ``Nt``, ``mu``, ``sigma``
4477
+ - ``ExponentialPSD`` : ``N0``, ``Lambda``
4478
+
4479
+ Each parameter variable includes attributes with parameter name, units,
4480
+ and optimization metadata.
4481
+
4482
+ Dataset attributes include:
4483
+
4484
+ - ``disdrodb_psd_model`` : The fitted PSD model name
4485
+ - ``disdrodb_psd_optimization`` : The optimization method used
4486
+ - ``disdrodb_psd_optimization_settings`` : String representation of kwargs
4487
+ """
2423
4488
  # Check inputs arguments
2424
- optimization_kwargs = {} if optimization_kwargs is None else optimization_kwargs
4489
+ optimization_settings = {} if optimization_settings is None else optimization_settings
2425
4490
  optimization = check_optimization(optimization)
2426
- check_optimization_kwargs(optimization_kwargs=optimization_kwargs, optimization=optimization, psd_model=psd_model)
2427
-
4491
+ check_optimization_settings(
4492
+ optimization_settings=optimization_settings,
4493
+ optimization=optimization,
4494
+ psd_model=psd_model,
4495
+ )
2428
4496
  # Check N(D)
2429
4497
  # --> If all 0, set to np.nan
2430
4498
  # --> If any is not finite --> set to np.nan
@@ -2441,7 +4509,7 @@ def estimate_model_parameters(
2441
4509
  func = dict_func[optimization]
2442
4510
 
2443
4511
  # Retrieve parameters
2444
- ds_params = func(ds, psd_model=psd_model, **optimization_kwargs)
4512
+ ds_params = func(ds, psd_model=psd_model, **optimization_settings)
2445
4513
 
2446
4514
  # Add parameters attributes (and units)
2447
4515
  for var, attrs in ATTRS_PARAMS_DICT[psd_model].items():