disdrodb 0.0.21__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- disdrodb/__init__.py +132 -15
- disdrodb/_config.py +4 -2
- disdrodb/_version.py +9 -4
- disdrodb/api/checks.py +264 -237
- disdrodb/api/configs.py +4 -8
- disdrodb/api/create_directories.py +235 -290
- disdrodb/api/info.py +217 -26
- disdrodb/api/io.py +295 -269
- disdrodb/api/path.py +597 -173
- disdrodb/api/search.py +486 -0
- disdrodb/{metadata/scripts → cli}/disdrodb_check_metadata_archive.py +12 -7
- disdrodb/{utils/pandas.py → cli/disdrodb_data_archive_directory.py} +9 -18
- disdrodb/cli/disdrodb_download_archive.py +86 -0
- disdrodb/cli/disdrodb_download_metadata_archive.py +53 -0
- disdrodb/cli/disdrodb_download_station.py +84 -0
- disdrodb/{api/scripts → cli}/disdrodb_initialize_station.py +22 -10
- disdrodb/cli/disdrodb_metadata_archive_directory.py +32 -0
- disdrodb/{data_transfer/scripts/disdrodb_download_station.py → cli/disdrodb_open_data_archive.py} +22 -22
- disdrodb/cli/disdrodb_open_logs_directory.py +69 -0
- disdrodb/{data_transfer/scripts/disdrodb_upload_station.py → cli/disdrodb_open_metadata_archive.py} +22 -24
- disdrodb/cli/disdrodb_open_metadata_directory.py +71 -0
- disdrodb/cli/disdrodb_open_product_directory.py +74 -0
- disdrodb/cli/disdrodb_open_readers_directory.py +32 -0
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0.py +38 -31
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0_station.py +32 -30
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0a.py +30 -21
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0a_station.py +24 -33
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0b.py +30 -21
- disdrodb/{l0/scripts → cli}/disdrodb_run_l0b_station.py +25 -34
- disdrodb/cli/disdrodb_run_l0c.py +130 -0
- disdrodb/cli/disdrodb_run_l0c_station.py +129 -0
- disdrodb/cli/disdrodb_run_l1.py +122 -0
- disdrodb/cli/disdrodb_run_l1_station.py +121 -0
- disdrodb/cli/disdrodb_run_l2e.py +122 -0
- disdrodb/cli/disdrodb_run_l2e_station.py +122 -0
- disdrodb/cli/disdrodb_run_l2m.py +122 -0
- disdrodb/cli/disdrodb_run_l2m_station.py +122 -0
- disdrodb/cli/disdrodb_upload_archive.py +105 -0
- disdrodb/cli/disdrodb_upload_station.py +98 -0
- disdrodb/configs.py +90 -25
- disdrodb/data_transfer/__init__.py +22 -0
- disdrodb/data_transfer/download_data.py +87 -90
- disdrodb/data_transfer/upload_data.py +64 -37
- disdrodb/data_transfer/zenodo.py +15 -18
- disdrodb/docs.py +1 -1
- disdrodb/issue/__init__.py +17 -4
- disdrodb/issue/checks.py +10 -23
- disdrodb/issue/reader.py +9 -12
- disdrodb/issue/writer.py +14 -17
- disdrodb/l0/__init__.py +17 -26
- disdrodb/l0/check_configs.py +35 -23
- disdrodb/l0/check_standards.py +32 -42
- disdrodb/l0/configs/{Thies_LPM → LPM}/bins_diameter.yml +44 -44
- disdrodb/l0/configs/{Thies_LPM → LPM}/bins_velocity.yml +40 -40
- disdrodb/l0/configs/LPM/l0a_encodings.yml +80 -0
- disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_cf_attrs.yml +62 -59
- disdrodb/l0/configs/{Thies_LPM → LPM}/l0b_encodings.yml +9 -9
- disdrodb/l0/configs/{Thies_LPM → LPM}/raw_data_format.yml +245 -245
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_diameter.yml +66 -66
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/bins_velocity.yml +64 -64
- disdrodb/l0/configs/PARSIVEL/l0a_encodings.yml +32 -0
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_cf_attrs.yml +22 -20
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/l0b_encodings.yml +17 -17
- disdrodb/l0/configs/{OTT_Parsivel → PARSIVEL}/raw_data_format.yml +77 -77
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_diameter.yml +64 -64
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/bins_velocity.yml +64 -64
- disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +39 -0
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_cf_attrs.yml +24 -22
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/l0b_encodings.yml +20 -20
- disdrodb/l0/configs/{OTT_Parsivel2 → PARSIVEL2}/raw_data_format.yml +98 -98
- disdrodb/l0/configs/{RD_80 → RD80}/bins_diameter.yml +40 -40
- disdrodb/l0/configs/RD80/l0a_encodings.yml +16 -0
- disdrodb/l0/configs/{RD_80 → RD80}/l0b_cf_attrs.yml +3 -3
- disdrodb/l0/configs/RD80/l0b_encodings.yml +135 -0
- disdrodb/l0/configs/{RD_80 → RD80}/raw_data_format.yml +48 -48
- disdrodb/l0/l0_reader.py +216 -340
- disdrodb/l0/l0a_processing.py +237 -208
- disdrodb/l0/l0b_nc_processing.py +227 -80
- disdrodb/l0/l0b_processing.py +93 -173
- disdrodb/l0/l0c_processing.py +627 -0
- disdrodb/l0/readers/{ARM → LPM/ARM}/ARM_LPM.py +36 -58
- disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +226 -0
- disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +185 -0
- disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +183 -0
- disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +179 -0
- disdrodb/l0/readers/{UK → LPM/UK}/DIVEN.py +14 -35
- disdrodb/l0/readers/PARSIVEL/AUSTRALIA/MELBOURNE_2007_PARSIVEL.py +157 -0
- disdrodb/l0/readers/PARSIVEL/CHINA/CHONGQING.py +113 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/ARCTIC_2021.py +40 -57
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/COMMON_2011.py +37 -54
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/DAVOS_2009_2011.py +34 -51
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_2009.py +34 -51
- disdrodb/l0/readers/{EPFL/PARADISO_2014.py → PARSIVEL/EPFL/EPFL_ROOF_2008.py} +38 -50
- disdrodb/l0/readers/PARSIVEL/EPFL/EPFL_ROOF_2010.py +105 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2011.py +34 -51
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/EPFL_ROOF_2012.py +33 -51
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GENEPI_2007.py +25 -44
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007.py +25 -44
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/GRAND_ST_BERNARD_2007_2.py +25 -44
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HPICONET_2010.py +34 -51
- disdrodb/l0/readers/{EPFL/EPFL_ROOF_2010.py → PARSIVEL/EPFL/HYMEX_LTE_SOP2.py} +37 -50
- disdrodb/l0/readers/PARSIVEL/EPFL/HYMEX_LTE_SOP3.py +111 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/HYMEX_LTE_SOP4.py +36 -54
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2018.py +34 -52
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/LOCARNO_2019.py +38 -56
- disdrodb/l0/readers/PARSIVEL/EPFL/PARADISO_2014.py +105 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PARSIVEL_2007.py +27 -45
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/PLATO_2019.py +24 -44
- disdrodb/l0/readers/PARSIVEL/EPFL/RACLETS_2019.py +140 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RACLETS_2019_WJF.py +41 -59
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/RIETHOLZBACH_2011.py +34 -51
- disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2017.py +117 -0
- disdrodb/l0/readers/PARSIVEL/EPFL/SAMOYLOV_2019.py +137 -0
- disdrodb/l0/readers/{EPFL → PARSIVEL/EPFL}/UNIL_2022.py +42 -55
- disdrodb/l0/readers/PARSIVEL/GPM/IFLOODS.py +104 -0
- disdrodb/l0/readers/{GPM → PARSIVEL/GPM}/LPVEX.py +29 -48
- disdrodb/l0/readers/PARSIVEL/GPM/MC3E.py +184 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/CCOPE_2015.py +113 -0
- disdrodb/l0/readers/{NCAR/VORTEX_SE_2016_P1.py → PARSIVEL/NCAR/OWLES_MIPS.py} +46 -72
- disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +125 -0
- disdrodb/l0/readers/{NCAR/OWLES_MIPS.py → PARSIVEL/NCAR/PLOWS_MIPS.py} +45 -64
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +114 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +176 -0
- disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +183 -0
- disdrodb/l0/readers/{ARM/ARM_LD.py → PARSIVEL2/ARM/ARM_PARSIVEL2.py} +27 -50
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +163 -0
- disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +163 -0
- disdrodb/l0/readers/{DENMARK → PARSIVEL2/DENMARK}/EROSION_nc.py +14 -35
- disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +119 -0
- disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +104 -0
- disdrodb/l0/readers/PARSIVEL2/GPM/NSSTC.py +176 -0
- disdrodb/l0/readers/PARSIVEL2/ITALY/GID_PARSIVEL2.py +32 -0
- disdrodb/l0/readers/PARSIVEL2/MEXICO/OH_IIUNAM_nc.py +56 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +120 -0
- disdrodb/l0/readers/{NCAR → PARSIVEL2/NCAR}/PECAN_MIPS.py +45 -64
- disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +181 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +160 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +160 -0
- disdrodb/l0/readers/{NCAR/PLOWS_MIPS.py → PARSIVEL2/NCAR/VORTEX_SE_2016_P1.py} +49 -66
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +118 -0
- disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +152 -0
- disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT.py +166 -0
- disdrodb/l0/readers/{NCAR/RELAMPAGO_RD80.py → RD80/BRAZIL/CHUVA_RD80.py} +36 -60
- disdrodb/l0/readers/{BRAZIL → RD80/BRAZIL}/GOAMAZON_RD80.py +36 -55
- disdrodb/l0/readers/{NCAR → RD80/NCAR}/CINDY_2011_RD80.py +35 -54
- disdrodb/l0/readers/{BRAZIL/CHUVA_RD80.py → RD80/NCAR/RELAMPAGO_RD80.py} +40 -54
- disdrodb/l0/readers/template_reader_raw_netcdf_data.py +62 -0
- disdrodb/l0/readers/{reader_template.py → template_reader_raw_text_data.py} +20 -44
- disdrodb/l0/routines.py +885 -581
- disdrodb/l0/standards.py +72 -236
- disdrodb/l0/template_tools.py +104 -109
- disdrodb/l1/__init__.py +17 -0
- disdrodb/l1/beard_model.py +716 -0
- disdrodb/l1/encoding_attrs.py +620 -0
- disdrodb/l1/fall_velocity.py +260 -0
- disdrodb/l1/filters.py +192 -0
- disdrodb/l1/processing.py +200 -0
- disdrodb/l1/resampling.py +236 -0
- disdrodb/l1/routines.py +357 -0
- disdrodb/l1_env/__init__.py +17 -0
- disdrodb/l1_env/routines.py +38 -0
- disdrodb/l2/__init__.py +17 -0
- disdrodb/l2/empirical_dsd.py +1735 -0
- disdrodb/l2/event.py +388 -0
- disdrodb/l2/processing.py +519 -0
- disdrodb/l2/processing_options.py +213 -0
- disdrodb/l2/routines.py +868 -0
- disdrodb/metadata/__init__.py +9 -2
- disdrodb/metadata/checks.py +165 -118
- disdrodb/metadata/download.py +81 -0
- disdrodb/metadata/geolocation.py +146 -0
- disdrodb/metadata/info.py +20 -13
- disdrodb/metadata/manipulation.py +1 -1
- disdrodb/metadata/reader.py +59 -8
- disdrodb/metadata/search.py +77 -144
- disdrodb/metadata/standards.py +7 -8
- disdrodb/metadata/writer.py +8 -14
- disdrodb/psd/__init__.py +38 -0
- disdrodb/psd/fitting.py +2146 -0
- disdrodb/psd/models.py +774 -0
- disdrodb/routines.py +1176 -0
- disdrodb/scattering/__init__.py +28 -0
- disdrodb/scattering/axis_ratio.py +344 -0
- disdrodb/scattering/routines.py +456 -0
- disdrodb/utils/__init__.py +17 -0
- disdrodb/utils/attrs.py +208 -0
- disdrodb/utils/cli.py +269 -0
- disdrodb/utils/compression.py +60 -42
- disdrodb/utils/dask.py +62 -0
- disdrodb/utils/decorators.py +110 -0
- disdrodb/utils/directories.py +107 -46
- disdrodb/utils/encoding.py +127 -0
- disdrodb/utils/list.py +29 -0
- disdrodb/utils/logger.py +168 -46
- disdrodb/utils/time.py +657 -0
- disdrodb/utils/warnings.py +30 -0
- disdrodb/utils/writer.py +57 -0
- disdrodb/utils/xarray.py +138 -47
- disdrodb/utils/yaml.py +0 -1
- disdrodb/viz/__init__.py +17 -0
- disdrodb/viz/plots.py +17 -0
- disdrodb-0.1.0.dist-info/METADATA +321 -0
- disdrodb-0.1.0.dist-info/RECORD +216 -0
- {disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info}/WHEEL +1 -1
- disdrodb-0.1.0.dist-info/entry_points.txt +30 -0
- disdrodb/data_transfer/scripts/disdrodb_download_archive.py +0 -53
- disdrodb/data_transfer/scripts/disdrodb_upload_archive.py +0 -57
- disdrodb/l0/configs/OTT_Parsivel/l0a_encodings.yml +0 -32
- disdrodb/l0/configs/OTT_Parsivel2/l0a_encodings.yml +0 -39
- disdrodb/l0/configs/RD_80/l0a_encodings.yml +0 -16
- disdrodb/l0/configs/RD_80/l0b_encodings.yml +0 -135
- disdrodb/l0/configs/Thies_LPM/l0a_encodings.yml +0 -80
- disdrodb/l0/io.py +0 -257
- disdrodb/l0/l0_processing.py +0 -1091
- disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_OTT.py +0 -178
- disdrodb/l0/readers/AUSTRALIA/MELBOURNE_2007_THIES.py +0 -247
- disdrodb/l0/readers/BRAZIL/CHUVA_LPM.py +0 -204
- disdrodb/l0/readers/BRAZIL/CHUVA_OTT.py +0 -183
- disdrodb/l0/readers/BRAZIL/GOAMAZON_LPM.py +0 -204
- disdrodb/l0/readers/BRAZIL/GOAMAZON_OTT.py +0 -183
- disdrodb/l0/readers/CHINA/CHONGQING.py +0 -131
- disdrodb/l0/readers/EPFL/EPFL_ROOF_2008.py +0 -128
- disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP2.py +0 -127
- disdrodb/l0/readers/EPFL/HYMEX_LTE_SOP3.py +0 -129
- disdrodb/l0/readers/EPFL/RACLETS_2019.py +0 -158
- disdrodb/l0/readers/EPFL/SAMOYLOV_2017.py +0 -136
- disdrodb/l0/readers/EPFL/SAMOYLOV_2019.py +0 -158
- disdrodb/l0/readers/FRANCE/SIRTA_OTT2.py +0 -138
- disdrodb/l0/readers/GPM/GCPEX.py +0 -123
- disdrodb/l0/readers/GPM/IFLOODS.py +0 -123
- disdrodb/l0/readers/GPM/MC3E.py +0 -123
- disdrodb/l0/readers/GPM/NSSTC.py +0 -164
- disdrodb/l0/readers/ITALY/GID.py +0 -199
- disdrodb/l0/readers/MEXICO/OH_IIUNAM_nc.py +0 -92
- disdrodb/l0/readers/NCAR/CCOPE_2015.py +0 -133
- disdrodb/l0/readers/NCAR/PECAN_FP3.py +0 -137
- disdrodb/l0/readers/NCAR/PECAN_MOBILE.py +0 -144
- disdrodb/l0/readers/NCAR/RELAMPAGO_OTT.py +0 -195
- disdrodb/l0/readers/NCAR/SNOWIE_PJ.py +0 -172
- disdrodb/l0/readers/NCAR/SNOWIE_SB.py +0 -179
- disdrodb/l0/readers/NCAR/VORTEX2_2009.py +0 -133
- disdrodb/l0/readers/NCAR/VORTEX2_2010.py +0 -188
- disdrodb/l0/readers/NCAR/VORTEX2_2010_UF.py +0 -191
- disdrodb/l0/readers/NCAR/VORTEX_SE_2016_P2.py +0 -135
- disdrodb/l0/readers/NCAR/VORTEX_SE_2016_PIPS.py +0 -170
- disdrodb/l0/readers/NETHERLANDS/DELFT.py +0 -187
- disdrodb/l0/readers/SPAIN/SBEGUERIA.py +0 -179
- disdrodb/l0/scripts/disdrodb_run_l0b_concat.py +0 -93
- disdrodb/l0/scripts/disdrodb_run_l0b_concat_station.py +0 -85
- disdrodb/utils/netcdf.py +0 -452
- disdrodb/utils/scripts.py +0 -102
- disdrodb-0.0.21.dist-info/AUTHORS.md +0 -18
- disdrodb-0.0.21.dist-info/METADATA +0 -186
- disdrodb-0.0.21.dist-info/RECORD +0 -168
- disdrodb-0.0.21.dist-info/entry_points.txt +0 -15
- /disdrodb/l0/configs/{RD_80 → RD80}/bins_velocity.yml +0 -0
- /disdrodb/l0/manuals/{Thies_LPM.pdf → LPM.pdf} +0 -0
- /disdrodb/l0/manuals/{ODM_470.pdf → ODM470.pdf} +0 -0
- /disdrodb/l0/manuals/{OTT_Parsivel.pdf → PARSIVEL.pdf} +0 -0
- /disdrodb/l0/manuals/{OTT_Parsivel2.pdf → PARSIVEL2.pdf} +0 -0
- /disdrodb/l0/manuals/{PWS_100.pdf → PWS100.pdf} +0 -0
- /disdrodb/l0/manuals/{RD_80.pdf → RD80.pdf} +0 -0
- {disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info/licenses}/LICENSE +0 -0
- {disdrodb-0.0.21.dist-info → disdrodb-0.1.0.dist-info}/top_level.txt +0 -0
disdrodb/psd/fitting.py
ADDED
|
@@ -0,0 +1,2146 @@
|
|
|
1
|
+
# -----------------------------------------------------------------------------.
|
|
2
|
+
# Copyright (c) 2021-2023 DISDRODB developers
|
|
3
|
+
#
|
|
4
|
+
# This program is free software: you can redistribute it and/or modify
|
|
5
|
+
# it under the terms of the GNU General Public License as published by
|
|
6
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
7
|
+
# (at your option) any later version.
|
|
8
|
+
#
|
|
9
|
+
# This program is distributed in the hope that it will be useful,
|
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
# GNU General Public License for more details.
|
|
13
|
+
#
|
|
14
|
+
# You should have received a copy of the GNU General Public License
|
|
15
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
+
# -----------------------------------------------------------------------------.
|
|
17
|
+
"""Routines for PSD fitting."""
|
|
18
|
+
import numpy as np
|
|
19
|
+
import scipy.stats as ss
|
|
20
|
+
import xarray as xr
|
|
21
|
+
from scipy.integrate import quad
|
|
22
|
+
from scipy.optimize import minimize
|
|
23
|
+
from scipy.special import gamma, gammainc, gammaln # Regularized lower incomplete gamma function
|
|
24
|
+
|
|
25
|
+
from disdrodb.psd.models import ExponentialPSD, GammaPSD, LognormalPSD, NormalizedGammaPSD
|
|
26
|
+
from disdrodb.utils.warnings import suppress_warnings
|
|
27
|
+
|
|
28
|
+
# gamma(>171) return inf !
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
####--------------------------------------------------------------------------------------.
|
|
32
|
+
#### Goodness of fit (GOF)
|
|
33
|
+
def compute_gof_stats(drop_number_concentration, psd):
|
|
34
|
+
"""
|
|
35
|
+
Compute various goodness-of-fit (GoF) statistics between observed and predicted values.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
- drop_number_concentration: xarray.DataArray with dimensions ('time', 'diameter_bin_center')
|
|
40
|
+
- psd: instance of PSD class
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
- ds: xarray.Dataset containing the computed GoF statistics
|
|
45
|
+
"""
|
|
46
|
+
from disdrodb.l2.empirical_dsd import get_mode_diameter
|
|
47
|
+
|
|
48
|
+
# Retrieve diameter bin width
|
|
49
|
+
diameter = drop_number_concentration["diameter_bin_center"]
|
|
50
|
+
diameter_bin_width = drop_number_concentration["diameter_bin_width"]
|
|
51
|
+
|
|
52
|
+
# Define observed and predicted values and compute errors
|
|
53
|
+
observed_values = drop_number_concentration
|
|
54
|
+
fitted_values = psd(diameter) # .transpose(*observed_values.dims)
|
|
55
|
+
error = observed_values - fitted_values
|
|
56
|
+
|
|
57
|
+
# Compute GOF statistics
|
|
58
|
+
with suppress_warnings():
|
|
59
|
+
# Compute Pearson correlation
|
|
60
|
+
pearson_r = xr.corr(observed_values, fitted_values, dim="diameter_bin_center")
|
|
61
|
+
|
|
62
|
+
# Compute MSE
|
|
63
|
+
mse = (error**2).mean(dim="diameter_bin_center")
|
|
64
|
+
|
|
65
|
+
# Compute maximum error
|
|
66
|
+
max_error = error.max(dim="diameter_bin_center")
|
|
67
|
+
relative_max_error = error.max(dim="diameter_bin_center") / observed_values.max(dim="diameter_bin_center")
|
|
68
|
+
|
|
69
|
+
# Compute difference in total number concentration
|
|
70
|
+
total_number_concentration_obs = (observed_values * diameter_bin_width).sum(dim="diameter_bin_center")
|
|
71
|
+
total_number_concentration_pred = (fitted_values * diameter_bin_width).sum(dim="diameter_bin_center")
|
|
72
|
+
total_number_concentration_difference = total_number_concentration_pred - total_number_concentration_obs
|
|
73
|
+
|
|
74
|
+
# Compute Kullback-Leibler divergence
|
|
75
|
+
# - Compute pdf per bin
|
|
76
|
+
pk_pdf = observed_values / total_number_concentration_obs
|
|
77
|
+
qk_pdf = fitted_values / total_number_concentration_pred
|
|
78
|
+
|
|
79
|
+
# - Compute probabilities per bin
|
|
80
|
+
pk = pk_pdf * diameter_bin_width
|
|
81
|
+
pk = pk / pk.sum(dim="diameter_bin_center") # this might not be necessary
|
|
82
|
+
qk = qk_pdf * diameter_bin_width
|
|
83
|
+
qk = qk / qk.sum(dim="diameter_bin_center") # this might not be necessary
|
|
84
|
+
|
|
85
|
+
# - Compute divergence
|
|
86
|
+
log_prob_ratio = np.log(pk / qk)
|
|
87
|
+
log_prob_ratio = log_prob_ratio.where(np.isfinite(log_prob_ratio))
|
|
88
|
+
kl_divergence = (pk * log_prob_ratio).sum(dim="diameter_bin_center")
|
|
89
|
+
|
|
90
|
+
# Other statistics that can be computed also from different diameter discretization
|
|
91
|
+
# - Compute max deviation at distribution mode
|
|
92
|
+
max_deviation = observed_values.max(dim="diameter_bin_center") - fitted_values.max(dim="diameter_bin_center")
|
|
93
|
+
max_relative_deviation = max_deviation / fitted_values.max(dim="diameter_bin_center")
|
|
94
|
+
|
|
95
|
+
# - Compute diameter difference of the distribution mode
|
|
96
|
+
diameter_mode_deviation = get_mode_diameter(observed_values, diameter) - get_mode_diameter(
|
|
97
|
+
fitted_values,
|
|
98
|
+
diameter,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Create an xarray.Dataset to hold the computed statistics
|
|
102
|
+
ds = xr.Dataset(
|
|
103
|
+
{
|
|
104
|
+
"r2": pearson_r**2, # Squared Pearson correlation coefficient
|
|
105
|
+
"mse": mse, # Mean Squared Error
|
|
106
|
+
"max_error": max_error, # Maximum Absolute Error
|
|
107
|
+
"relative_max_error": relative_max_error, # Relative Maximum Error
|
|
108
|
+
"total_number_concentration_difference": total_number_concentration_difference,
|
|
109
|
+
"kl_divergence": kl_divergence, # Kullback-Leibler divergence
|
|
110
|
+
"max_deviation": max_deviation, # Deviation at distribution mode
|
|
111
|
+
"max_relative_deviation": max_relative_deviation, # Relative deviation at mode
|
|
112
|
+
"diameter_mode_deviation": diameter_mode_deviation, # Difference in mode diameters
|
|
113
|
+
},
|
|
114
|
+
)
|
|
115
|
+
return ds
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
####--------------------------------------------------------------------------------------.
|
|
119
|
+
#### Maximum Likelihood (ML)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def get_expected_probabilities(params, cdf_func, pdf_func, bin_edges, probability_method, normalized=False):
|
|
123
|
+
"""
|
|
124
|
+
Compute the expected probabilities for each bin given the distribution parameters.
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
params : array-like
|
|
129
|
+
Parameters for the CDF or PDF function.
|
|
130
|
+
cdf_func : callable
|
|
131
|
+
Cumulative distribution function (CDF) that takes bin edges and parameters as inputs.
|
|
132
|
+
pdf_func : callable
|
|
133
|
+
Probability density function (PDF) that takes a value and parameters as inputs.
|
|
134
|
+
bin_edges : array-like
|
|
135
|
+
Edges of the bins for which to compute the probabilities.
|
|
136
|
+
probability_method : {'cdf', 'pdf'}
|
|
137
|
+
Method to compute the probabilities. If 'cdf', use the CDF to compute probabilities.
|
|
138
|
+
If 'pdf', integrate the PDF over each bin range.
|
|
139
|
+
normalized : bool, optional
|
|
140
|
+
If True, normalize the probabilities to sum to 1. Default is False.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
expected_probabilities : numpy.ndarray
|
|
145
|
+
Array of expected probabilities for each bin.
|
|
146
|
+
|
|
147
|
+
Notes
|
|
148
|
+
-----
|
|
149
|
+
- If the 'cdf' method is used, the probabilities are computed as the difference in CDF values at the bin edges.
|
|
150
|
+
- If the 'pdf' method is used, the probabilities are computed by integrating the PDF over each bin range.
|
|
151
|
+
- Any zero or negative probabilities are replaced with a very small positive number (1e-10) to ensure optimization.
|
|
152
|
+
- If `normalized` is True, the probabilities are normalized to sum to 1.
|
|
153
|
+
|
|
154
|
+
"""
|
|
155
|
+
if probability_method == "cdf":
|
|
156
|
+
# Compute the CDF at bin edges
|
|
157
|
+
cdf_vals = cdf_func(bin_edges, params)
|
|
158
|
+
# Compute probabilities for each bin
|
|
159
|
+
expected_probabilities = np.diff(cdf_vals)
|
|
160
|
+
# Replace any zero or negative probabilities with a very small positive number
|
|
161
|
+
# --> Otherwise do not optimize ...
|
|
162
|
+
expected_probabilities = np.maximum(expected_probabilities, 1e-10)
|
|
163
|
+
# Or integrate PDF over the bin range
|
|
164
|
+
else: # probability_method == "pdf":
|
|
165
|
+
# For each bin, integrate the PDF over the bin range
|
|
166
|
+
expected_probabilities = np.array(
|
|
167
|
+
[quad(lambda x: pdf_func(x, params), bin_edges[i], bin_edges[i + 1])[0] for i in range(len(bin_edges) - 1)],
|
|
168
|
+
)
|
|
169
|
+
if normalized:
|
|
170
|
+
# Normalize probabilities to sum to 1
|
|
171
|
+
total_probability = np.sum(expected_probabilities)
|
|
172
|
+
expected_probabilities /= total_probability
|
|
173
|
+
return expected_probabilities
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def get_adjusted_nt(cdf, params, Nt, bin_edges):
|
|
177
|
+
"""Adjust Nt for the proportion of missing drops. See Johnson's et al., 2013 Eqs. 3 and 4."""
|
|
178
|
+
# Estimate proportion of missing drops (Johnson's 2011 Eqs. 3)
|
|
179
|
+
# --> Alternative: p = 1 - np.sum(pdf(diameter, params)* diameter_bin_width) # [-]
|
|
180
|
+
p = 1 - np.diff(cdf([bin_edges[0], bin_edges[-1]], params)).item() # [-]
|
|
181
|
+
# Adjusts Nt for the proportion of drops not observed
|
|
182
|
+
# p = np.clip(p, 0, 1 - 1e-12)
|
|
183
|
+
if np.isclose(p, 1, atol=1e-12):
|
|
184
|
+
return np.nan
|
|
185
|
+
return Nt / (1 - p) # [m-3]
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def compute_negative_log_likelihood(
|
|
189
|
+
params,
|
|
190
|
+
bin_edges,
|
|
191
|
+
counts,
|
|
192
|
+
cdf_func,
|
|
193
|
+
pdf_func,
|
|
194
|
+
param_constraints=None,
|
|
195
|
+
probability_method="cdf",
|
|
196
|
+
likelihood="multinomial",
|
|
197
|
+
truncated_likelihood=True,
|
|
198
|
+
):
|
|
199
|
+
"""
|
|
200
|
+
General negative log-likelihood function for fitting distributions to binned data.
|
|
201
|
+
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
params : array-like
|
|
205
|
+
Parameters of the distribution.
|
|
206
|
+
bin_edges : array-like
|
|
207
|
+
Edges of the bins (length N+1).
|
|
208
|
+
counts : array-like
|
|
209
|
+
Observed counts in each bin (length N).
|
|
210
|
+
cdf_func : callable
|
|
211
|
+
Cumulative distribution function of the distribution.
|
|
212
|
+
pdf_func : callable
|
|
213
|
+
Probability density function of the distribution.
|
|
214
|
+
param_constraints : callable, optional
|
|
215
|
+
Function that checks if parameters are valid.
|
|
216
|
+
probability_method : str, optional
|
|
217
|
+
Method to compute expected probabilities, either 'cdf' or 'pdf'. Default is 'cdf'.
|
|
218
|
+
likelihood : str, optional
|
|
219
|
+
Type of likelihood to compute, either 'multinomial' or 'poisson'. Default is 'multinomial'.
|
|
220
|
+
truncated_likelihood : bool, optional
|
|
221
|
+
Whether to normalize the expected probabilities. Default is True.
|
|
222
|
+
nll : float
|
|
223
|
+
Negative log-likelihood value.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
nll: float
|
|
228
|
+
The negative log-likelihood value.
|
|
229
|
+
"""
|
|
230
|
+
# Check if parameters are valid
|
|
231
|
+
if param_constraints is not None and not param_constraints(params):
|
|
232
|
+
return np.inf
|
|
233
|
+
|
|
234
|
+
# Compute (unormalized) expected probabilities using CDF
|
|
235
|
+
expected_probabilities = get_expected_probabilities(
|
|
236
|
+
params=params,
|
|
237
|
+
cdf_func=cdf_func,
|
|
238
|
+
pdf_func=pdf_func,
|
|
239
|
+
bin_edges=bin_edges,
|
|
240
|
+
probability_method=probability_method,
|
|
241
|
+
normalized=truncated_likelihood,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Ensure expected probabilities are valid
|
|
245
|
+
if np.any(expected_probabilities <= 0):
|
|
246
|
+
return np.inf
|
|
247
|
+
|
|
248
|
+
# Compute negative log-likelihood
|
|
249
|
+
if likelihood == "poisson":
|
|
250
|
+
n_total = np.sum(counts)
|
|
251
|
+
expected_counts = expected_probabilities * n_total
|
|
252
|
+
expected_counts = np.maximum(expected_counts, 1e-10) # Avoid zero expected counts
|
|
253
|
+
nll = -np.sum(counts * np.log(expected_counts) - expected_counts)
|
|
254
|
+
else: # likelihood == "multinomial":
|
|
255
|
+
# Compute likelihood
|
|
256
|
+
nll = -np.sum(counts * np.log(expected_probabilities))
|
|
257
|
+
return nll
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def estimate_lognormal_parameters(
|
|
261
|
+
counts,
|
|
262
|
+
bin_edges,
|
|
263
|
+
probability_method="cdf",
|
|
264
|
+
likelihood="multinomial",
|
|
265
|
+
truncated_likelihood=True,
|
|
266
|
+
output_dictionary=True,
|
|
267
|
+
optimizer="Nelder-Mead",
|
|
268
|
+
):
|
|
269
|
+
"""
|
|
270
|
+
Estimate the parameters of a lognormal distribution given histogram data.
|
|
271
|
+
|
|
272
|
+
Parameters
|
|
273
|
+
----------
|
|
274
|
+
counts : array-like
|
|
275
|
+
The counts for each bin in the histogram.
|
|
276
|
+
bin_edges : array-like
|
|
277
|
+
The edges of the bins.
|
|
278
|
+
probability_method : str, optional
|
|
279
|
+
The method to compute probabilities, either ``"cdf"`` or ``"pdf"``. The default value is ``"cdf"``.
|
|
280
|
+
likelihood : str, optional
|
|
281
|
+
The likelihood function to use, either ``"multinomial"`` or ``"poisson"``.
|
|
282
|
+
The default value is ``"multinomial"``.
|
|
283
|
+
truncated_likelihood : bool, optional
|
|
284
|
+
Whether to use truncated likelihood. The default value is ``True``.
|
|
285
|
+
output_dictionary : bool, optional
|
|
286
|
+
Whether to return the output as a dictionary.
|
|
287
|
+
If False, returns a numpy array. The default value is ``True``
|
|
288
|
+
optimizer : str, optional
|
|
289
|
+
The optimization method to use. Default is ``"Nelder-Mead"``.
|
|
290
|
+
|
|
291
|
+
Returns
|
|
292
|
+
-------
|
|
293
|
+
dict or numpy.ndarray
|
|
294
|
+
The estimated parameters of the lognormal distribution.
|
|
295
|
+
If ``output_dictionary`` is ``True``, returns a dictionary with keys ``Nt``, ``mu``, and ``sigma``.
|
|
296
|
+
If ``output_dictionary`` is ``False``,returns a numpy array with values [Nt, mu, sigma].
|
|
297
|
+
|
|
298
|
+
Notes
|
|
299
|
+
-----
|
|
300
|
+
The lognormal distribution is defined as:
|
|
301
|
+
N(D) = Nt / (sqrt(2 * pi) * sigma * D) * exp(-(ln(D) - mu)**2 / (2 * sigma**2))
|
|
302
|
+
where Nt is the total number of counts, mu is the mean of the log of the distribution,
|
|
303
|
+
and sigma is the standard deviation of the log of the distribution.
|
|
304
|
+
|
|
305
|
+
References
|
|
306
|
+
----------
|
|
307
|
+
.. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.lognorm.html#scipy.stats.lognorm
|
|
308
|
+
"""
|
|
309
|
+
# LogNormal
|
|
310
|
+
# - mu = log(scale)
|
|
311
|
+
# - loc = 0
|
|
312
|
+
|
|
313
|
+
# Initialize bad results
|
|
314
|
+
null_output = (
|
|
315
|
+
{"Nt": np.nan, "mu": np.nan, "sigma": np.nan} if output_dictionary else np.array([np.nan, np.nan, np.nan])
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Define the CDF and PDF functions for the lognormal distribution
|
|
319
|
+
def lognorm_cdf(x, params):
|
|
320
|
+
sigma, scale = params
|
|
321
|
+
return ss.lognorm.cdf(x, sigma, loc=0, scale=scale)
|
|
322
|
+
|
|
323
|
+
def lognorm_pdf(x, params):
|
|
324
|
+
sigma, scale = params
|
|
325
|
+
return ss.lognorm.pdf(x, sigma, loc=0, scale=scale)
|
|
326
|
+
|
|
327
|
+
# Define valid parameters for the lognormal distribution
|
|
328
|
+
def param_constraints(params):
|
|
329
|
+
sigma, scale = params
|
|
330
|
+
return sigma > 0 and scale > 0
|
|
331
|
+
|
|
332
|
+
# Definite initial guess for the parameters
|
|
333
|
+
initial_params = [1.0, 1.0] # sigma, scale
|
|
334
|
+
|
|
335
|
+
# Define bounds for sigma and scale
|
|
336
|
+
bounds = [(1e-6, None), (1e-6, None)]
|
|
337
|
+
|
|
338
|
+
# Minimize the negative log-likelihood
|
|
339
|
+
with suppress_warnings():
|
|
340
|
+
result = minimize(
|
|
341
|
+
compute_negative_log_likelihood,
|
|
342
|
+
initial_params,
|
|
343
|
+
args=(
|
|
344
|
+
bin_edges,
|
|
345
|
+
counts,
|
|
346
|
+
lognorm_cdf,
|
|
347
|
+
lognorm_pdf,
|
|
348
|
+
param_constraints,
|
|
349
|
+
probability_method,
|
|
350
|
+
likelihood,
|
|
351
|
+
truncated_likelihood,
|
|
352
|
+
),
|
|
353
|
+
bounds=bounds,
|
|
354
|
+
method=optimizer,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Check if the fit had success
|
|
358
|
+
if not result.success:
|
|
359
|
+
return null_output
|
|
360
|
+
|
|
361
|
+
# Define Nt
|
|
362
|
+
Nt = np.sum(counts).item()
|
|
363
|
+
|
|
364
|
+
# Retrieve parameters
|
|
365
|
+
params = result.x
|
|
366
|
+
if truncated_likelihood:
|
|
367
|
+
Nt = get_adjusted_nt(cdf=lognorm_cdf, params=params, Nt=Nt, bin_edges=bin_edges)
|
|
368
|
+
sigma, scale = params
|
|
369
|
+
mu = np.log(scale)
|
|
370
|
+
|
|
371
|
+
# Define output
|
|
372
|
+
output = {"Nt": Nt, "mu": mu, "sigma": sigma} if output_dictionary else np.array([Nt, mu, sigma])
|
|
373
|
+
return output
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def estimate_exponential_parameters(
|
|
377
|
+
counts,
|
|
378
|
+
bin_edges,
|
|
379
|
+
probability_method="cdf",
|
|
380
|
+
likelihood="multinomial",
|
|
381
|
+
truncated_likelihood=True,
|
|
382
|
+
output_dictionary=True,
|
|
383
|
+
optimizer="Nelder-Mead",
|
|
384
|
+
):
|
|
385
|
+
"""
|
|
386
|
+
Estimate the parameters of an exponential distribution given histogram data.
|
|
387
|
+
|
|
388
|
+
Parameters
|
|
389
|
+
----------
|
|
390
|
+
counts : array-like
|
|
391
|
+
The counts for each bin in the histogram.
|
|
392
|
+
bin_edges : array-like
|
|
393
|
+
The edges of the bins.
|
|
394
|
+
probability_method : str, optional
|
|
395
|
+
The method to compute probabilities, either ``"cdf"`` or ``"pdf"``. The default value is ``"cdf"``.
|
|
396
|
+
likelihood : str, optional
|
|
397
|
+
The likelihood function to use, either ``"multinomial"`` or ``"poisson"``.
|
|
398
|
+
The default value is ``"multinomial"``.
|
|
399
|
+
truncated_likelihood : bool, optional
|
|
400
|
+
Whether to use truncated likelihood. The default value is ``True``.
|
|
401
|
+
output_dictionary : bool, optional
|
|
402
|
+
Whether to return the output as a dictionary.
|
|
403
|
+
If False, returns a numpy array. The default value is ``True``
|
|
404
|
+
optimizer : str, optional
|
|
405
|
+
The optimization method to use. Default is ``"Nelder-Mead"``.
|
|
406
|
+
|
|
407
|
+
Returns
|
|
408
|
+
-------
|
|
409
|
+
dict or numpy.ndarray
|
|
410
|
+
The estimated parameters of the exponential distribution.
|
|
411
|
+
If ``output_dictionary`` is ``True``, returns a dictionary with keys ``N0`` and ``Lambda``.
|
|
412
|
+
If `output_dictionary` is ``False``, returns a numpy array with [N0, Lambda].
|
|
413
|
+
|
|
414
|
+
Notes
|
|
415
|
+
-----
|
|
416
|
+
The exponential distribution is defined as:
|
|
417
|
+
N(D) = N0 * exp(-Lambda * D) = Nt * Lambda * exp(-Lambda * D)
|
|
418
|
+
where Lambda = 1 / scale and N0 = Nt * Lambda.
|
|
419
|
+
|
|
420
|
+
References
|
|
421
|
+
----------
|
|
422
|
+
.. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.expon.html
|
|
423
|
+
"""
|
|
424
|
+
# Initialize bad results
|
|
425
|
+
null_output = {"N0": np.nan, "Lambda": np.nan} if output_dictionary else np.array([np.nan, np.nan])
|
|
426
|
+
|
|
427
|
+
# Define the CDF and PDF functions for the exponential distribution
|
|
428
|
+
def exp_cdf(x, params):
|
|
429
|
+
scale = params[0]
|
|
430
|
+
return ss.expon.cdf(x, loc=0, scale=scale)
|
|
431
|
+
|
|
432
|
+
def exp_pdf(x, params):
|
|
433
|
+
scale = params[0]
|
|
434
|
+
return ss.expon.pdf(x, loc=0, scale=scale)
|
|
435
|
+
|
|
436
|
+
# Define valid parameters for the exponential distribution
|
|
437
|
+
def param_constraints(params):
|
|
438
|
+
scale = params[0]
|
|
439
|
+
return scale > 0
|
|
440
|
+
|
|
441
|
+
# Definite initial guess for the scale parameter
|
|
442
|
+
initial_params = [1.0] # scale
|
|
443
|
+
|
|
444
|
+
# Define bounds for scale
|
|
445
|
+
bounds = [(1e-6, None)]
|
|
446
|
+
|
|
447
|
+
# Minimize the negative log-likelihood
|
|
448
|
+
with suppress_warnings():
|
|
449
|
+
result = minimize(
|
|
450
|
+
compute_negative_log_likelihood,
|
|
451
|
+
initial_params,
|
|
452
|
+
args=(
|
|
453
|
+
bin_edges,
|
|
454
|
+
counts,
|
|
455
|
+
exp_cdf,
|
|
456
|
+
exp_pdf,
|
|
457
|
+
param_constraints,
|
|
458
|
+
probability_method,
|
|
459
|
+
likelihood,
|
|
460
|
+
truncated_likelihood,
|
|
461
|
+
),
|
|
462
|
+
bounds=bounds,
|
|
463
|
+
method=optimizer,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
# Check if the fit had success
|
|
467
|
+
if not result.success:
|
|
468
|
+
return null_output
|
|
469
|
+
|
|
470
|
+
# Define Nt
|
|
471
|
+
Nt = np.sum(counts).item()
|
|
472
|
+
|
|
473
|
+
# Retrieve parameters
|
|
474
|
+
params = result.x
|
|
475
|
+
if truncated_likelihood:
|
|
476
|
+
Nt = get_adjusted_nt(cdf=exp_cdf, params=params, Nt=Nt, bin_edges=bin_edges)
|
|
477
|
+
scale = params[0]
|
|
478
|
+
Lambda = 1 / scale
|
|
479
|
+
N0 = Nt * Lambda
|
|
480
|
+
|
|
481
|
+
# Define output
|
|
482
|
+
output = {"N0": N0, "Lambda": Lambda} if output_dictionary else np.array([N0, Lambda])
|
|
483
|
+
return output
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def estimate_gamma_parameters(
|
|
487
|
+
counts,
|
|
488
|
+
a,
|
|
489
|
+
scale,
|
|
490
|
+
bin_edges,
|
|
491
|
+
probability_method="cdf",
|
|
492
|
+
likelihood="multinomial",
|
|
493
|
+
truncated_likelihood=True,
|
|
494
|
+
output_dictionary=True,
|
|
495
|
+
optimizer="Nelder-Mead",
|
|
496
|
+
):
|
|
497
|
+
"""
|
|
498
|
+
Estimate the parameters of a gamma distribution given histogram data.
|
|
499
|
+
|
|
500
|
+
Parameters
|
|
501
|
+
----------
|
|
502
|
+
counts : array-like
|
|
503
|
+
The counts for each bin in the histogram.
|
|
504
|
+
a: float
|
|
505
|
+
The shape parameter of the scipy.stats.gamma distribution.
|
|
506
|
+
A good default value is 1.
|
|
507
|
+
scale: float
|
|
508
|
+
The scale parameter of the scipy.stats.gamma distribution.
|
|
509
|
+
A good default value is 1.
|
|
510
|
+
bin_edges : array-like
|
|
511
|
+
The edges of the bins.
|
|
512
|
+
probability_method : str, optional
|
|
513
|
+
The method to compute probabilities, either ``"cdf"`` or ``"pdf"``. The default value is ``"cdf"``.
|
|
514
|
+
likelihood : str, optional
|
|
515
|
+
The likelihood function to use, either ``"multinomial"`` or ``"poisson"``.
|
|
516
|
+
The default value is ``"multinomial"``.
|
|
517
|
+
truncated_likelihood : bool, optional
|
|
518
|
+
Whether to use truncated likelihood. The default value is ``True``.
|
|
519
|
+
output_dictionary : bool, optional
|
|
520
|
+
Whether to return the output as a dictionary.
|
|
521
|
+
If False, returns a numpy array. The default value is ``True``
|
|
522
|
+
optimizer : str, optional
|
|
523
|
+
The optimization method to use. Default is ``"Nelder-Mead"``.
|
|
524
|
+
|
|
525
|
+
Returns
|
|
526
|
+
-------
|
|
527
|
+
dict or numpy.ndarray
|
|
528
|
+
The estimated parameters of the gamma distribution.
|
|
529
|
+
If ``output_dictionary`` is ``True``, returns a dictionary with keys ``N0``, ``mu`` and ``Lambda``.
|
|
530
|
+
If `output_dictionary` is ``False``, returns a numpy array with [N0, mu, Lambda].
|
|
531
|
+
|
|
532
|
+
Notes
|
|
533
|
+
-----
|
|
534
|
+
The gamma distribution is defined as:
|
|
535
|
+
N(D) = N0 * D**mu * exp(-Lambda*D)
|
|
536
|
+
where Lambda = 1/scale, and mu = a - 1 with ``a`` being the shape parameter of the gamma distribution.
|
|
537
|
+
N0 is defined as N0 = Nt*Lambda**(mu+1)/gamma(mu+1).
|
|
538
|
+
|
|
539
|
+
References
|
|
540
|
+
----------
|
|
541
|
+
.. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gamma.html
|
|
542
|
+
|
|
543
|
+
"""
|
|
544
|
+
# Initialize bad results
|
|
545
|
+
null_output = (
|
|
546
|
+
{"N0": np.nan, "mu": np.nan, "lambda": np.nan} if output_dictionary else np.array([np.nan, np.nan, np.nan])
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
# Define the CDF and PDF functions for the gamma distribution
|
|
550
|
+
def gamma_cdf(x, params):
|
|
551
|
+
a, scale = params
|
|
552
|
+
return ss.gamma.cdf(x, a, loc=0, scale=scale)
|
|
553
|
+
|
|
554
|
+
def gamma_pdf(x, params):
|
|
555
|
+
a, scale = params
|
|
556
|
+
return ss.gamma.pdf(x, a, loc=0, scale=scale)
|
|
557
|
+
|
|
558
|
+
# Define valid parameters for the gamma distribution
|
|
559
|
+
# mu = -0.99 is a vertical line essentially ...
|
|
560
|
+
def param_constraints(params):
|
|
561
|
+
a, scale = params
|
|
562
|
+
return a > 0.1 and scale > 0 # using a > 0 cause some troubles
|
|
563
|
+
|
|
564
|
+
# Definite initial guess for the parameters
|
|
565
|
+
initial_params = [a, scale] # (mu=a-1, a=mu+1)
|
|
566
|
+
|
|
567
|
+
# Define bounds for a and scale
|
|
568
|
+
bounds = [(1e-6, None), (1e-6, None)]
|
|
569
|
+
|
|
570
|
+
# Minimize the negative log-likelihood
|
|
571
|
+
with suppress_warnings():
|
|
572
|
+
result = minimize(
|
|
573
|
+
compute_negative_log_likelihood,
|
|
574
|
+
initial_params,
|
|
575
|
+
args=(
|
|
576
|
+
bin_edges,
|
|
577
|
+
counts,
|
|
578
|
+
gamma_cdf,
|
|
579
|
+
gamma_pdf,
|
|
580
|
+
param_constraints,
|
|
581
|
+
probability_method,
|
|
582
|
+
likelihood,
|
|
583
|
+
truncated_likelihood,
|
|
584
|
+
),
|
|
585
|
+
method=optimizer,
|
|
586
|
+
bounds=bounds,
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
# Check if the fit had success
|
|
590
|
+
if not result.success:
|
|
591
|
+
return null_output
|
|
592
|
+
|
|
593
|
+
# Define Nt
|
|
594
|
+
Nt = np.sum(counts).item()
|
|
595
|
+
|
|
596
|
+
# Retrieve parameters
|
|
597
|
+
params = result.x
|
|
598
|
+
if truncated_likelihood:
|
|
599
|
+
Nt = get_adjusted_nt(cdf=gamma_cdf, params=params, Nt=Nt, bin_edges=bin_edges)
|
|
600
|
+
a, scale = params
|
|
601
|
+
mu = a - 1
|
|
602
|
+
Lambda = 1 / scale
|
|
603
|
+
|
|
604
|
+
# Compute N0
|
|
605
|
+
# - Use logarithmic computations to prevent overflow
|
|
606
|
+
# - N0 = Nt * Lambda ** (mu + 1) / gamma(mu + 1)
|
|
607
|
+
with suppress_warnings():
|
|
608
|
+
log_N0 = np.log(Nt) + (mu + 1) * np.log(Lambda) - gammaln(mu + 1)
|
|
609
|
+
N0 = np.exp(log_N0)
|
|
610
|
+
|
|
611
|
+
# Set parameters to np.nan if any of the parameters is not a finite number
|
|
612
|
+
if not np.isfinite(N0) or not np.isfinite(mu) or not np.isfinite(Lambda):
|
|
613
|
+
return null_output
|
|
614
|
+
|
|
615
|
+
# Define output
|
|
616
|
+
output = {"N0": N0, "mu": mu, "Lambda": Lambda} if output_dictionary else np.array([N0, mu, Lambda])
|
|
617
|
+
return output
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def _get_initial_gamma_parameters(ds, mom_method=None):
|
|
621
|
+
if mom_method is None:
|
|
622
|
+
ds_init = xr.Dataset(
|
|
623
|
+
{
|
|
624
|
+
"a": xr.ones_like(ds["M1"]),
|
|
625
|
+
"scale": xr.ones_like(ds["M1"]),
|
|
626
|
+
},
|
|
627
|
+
)
|
|
628
|
+
else:
|
|
629
|
+
ds_init = get_mom_parameters(
|
|
630
|
+
ds=ds,
|
|
631
|
+
psd_model="GammaPSD",
|
|
632
|
+
mom_methods=mom_method,
|
|
633
|
+
)
|
|
634
|
+
ds_init["a"] = ds_init["mu"] + 1
|
|
635
|
+
ds_init["scale"] = 1 / ds_init["Lambda"]
|
|
636
|
+
# If initialization results in some not finite number, set default value
|
|
637
|
+
ds_init["a"] = xr.where(np.isfinite(ds_init["a"]), ds_init["a"], ds["M1"])
|
|
638
|
+
ds_init["scale"] = xr.where(np.isfinite(ds_init["scale"]), ds_init["scale"], ds["M1"])
|
|
639
|
+
return ds_init
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def get_gamma_parameters(
|
|
643
|
+
ds,
|
|
644
|
+
init_method=None,
|
|
645
|
+
probability_method="cdf",
|
|
646
|
+
likelihood="multinomial",
|
|
647
|
+
truncated_likelihood=True,
|
|
648
|
+
optimizer="Nelder-Mead",
|
|
649
|
+
):
|
|
650
|
+
"""
|
|
651
|
+
Estimate gamma distribution parameters for drop size distribution (DSD) data.
|
|
652
|
+
|
|
653
|
+
Parameters
|
|
654
|
+
----------
|
|
655
|
+
ds : xarray.Dataset
|
|
656
|
+
Input dataset containing drop size distribution data. It must include the following variables:
|
|
657
|
+
- ``drop_number_concentration``: The number concentration of drops.
|
|
658
|
+
- ``diameter_bin_width``": The width of each diameter bin.
|
|
659
|
+
- ``diameter_bin_lower``: The lower bounds of the diameter bins.
|
|
660
|
+
- ``diameter_bin_upper``: The upper bounds of the diameter bins.
|
|
661
|
+
- ``diameter_bin_center``: The center values of the diameter bins.
|
|
662
|
+
- The moments M0...M6 variables required to compute the initial parameters
|
|
663
|
+
with the specified mom_method.
|
|
664
|
+
init_method: str or list
|
|
665
|
+
The method(s) of moments used to initialize the gamma parameters.
|
|
666
|
+
If None, the scale parameter is set to 1 and mu to 0 (a=1).
|
|
667
|
+
probability_method : str, optional
|
|
668
|
+
Method to compute probabilities. The default value is ``cdf``.
|
|
669
|
+
likelihood : str, optional
|
|
670
|
+
Likelihood function to use for fitting. The default value is ``multinomial``.
|
|
671
|
+
truncated_likelihood : bool, optional
|
|
672
|
+
Whether to use truncated likelihood. The default value is ``True``.
|
|
673
|
+
optimizer : str, optional
|
|
674
|
+
Optimization method to use. The default value is ``Nelder-Mead``.
|
|
675
|
+
|
|
676
|
+
Returns
|
|
677
|
+
-------
|
|
678
|
+
xarray.Dataset
|
|
679
|
+
Dataset containing the estimated gamma distribution parameters:
|
|
680
|
+
- ``N0``: Intercept parameter.
|
|
681
|
+
- ``mu``: Shape parameter.
|
|
682
|
+
- ``Lambda``: Scale parameter.
|
|
683
|
+
The dataset will also have an attribute ``disdrodb_psd_model`` set to ``GammaPSD``.
|
|
684
|
+
|
|
685
|
+
Notes
|
|
686
|
+
-----
|
|
687
|
+
The function uses `xr.apply_ufunc` to fit the lognormal distribution parameters
|
|
688
|
+
in parallel, leveraging Dask for parallel computation.
|
|
689
|
+
|
|
690
|
+
"""
|
|
691
|
+
# Define inputs
|
|
692
|
+
counts = ds["drop_number_concentration"] * ds["diameter_bin_width"]
|
|
693
|
+
diameter_breaks = np.append(ds["diameter_bin_lower"].data, ds["diameter_bin_upper"].data[-1])
|
|
694
|
+
|
|
695
|
+
# Define initial parameters (a, scale)
|
|
696
|
+
ds_init = _get_initial_gamma_parameters(ds, mom_method=init_method)
|
|
697
|
+
|
|
698
|
+
# Define kwargs
|
|
699
|
+
kwargs = {
|
|
700
|
+
"output_dictionary": False,
|
|
701
|
+
"bin_edges": diameter_breaks,
|
|
702
|
+
"probability_method": probability_method,
|
|
703
|
+
"likelihood": likelihood,
|
|
704
|
+
"truncated_likelihood": truncated_likelihood,
|
|
705
|
+
"optimizer": optimizer,
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
# Fit distribution in parallel
|
|
709
|
+
da_params = xr.apply_ufunc(
|
|
710
|
+
estimate_gamma_parameters,
|
|
711
|
+
counts,
|
|
712
|
+
ds_init["a"],
|
|
713
|
+
ds_init["scale"],
|
|
714
|
+
kwargs=kwargs,
|
|
715
|
+
input_core_dims=[["diameter_bin_center"], [], []],
|
|
716
|
+
output_core_dims=[["parameters"]],
|
|
717
|
+
vectorize=True,
|
|
718
|
+
dask="parallelized",
|
|
719
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
|
|
720
|
+
output_dtypes=["float64"],
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
ds_init.isel(velocity_method=0, time=-3)
|
|
724
|
+
|
|
725
|
+
# Add parameters coordinates
|
|
726
|
+
da_params = da_params.assign_coords({"parameters": ["N0", "mu", "Lambda"]})
|
|
727
|
+
|
|
728
|
+
# Create parameters dataset
|
|
729
|
+
ds_params = da_params.to_dataset(dim="parameters")
|
|
730
|
+
|
|
731
|
+
# Add DSD model name to the attribute
|
|
732
|
+
ds_params.attrs["disdrodb_psd_model"] = "GammaPSD"
|
|
733
|
+
return ds_params
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def get_lognormal_parameters(
|
|
737
|
+
ds,
|
|
738
|
+
init_method=None, # noqa: ARG001
|
|
739
|
+
probability_method="cdf",
|
|
740
|
+
likelihood="multinomial",
|
|
741
|
+
truncated_likelihood=True,
|
|
742
|
+
optimizer="Nelder-Mead",
|
|
743
|
+
):
|
|
744
|
+
"""
|
|
745
|
+
Estimate lognormal distribution parameters for drop size distribution (DSD) data.
|
|
746
|
+
|
|
747
|
+
Parameters
|
|
748
|
+
----------
|
|
749
|
+
ds : xarray.Dataset
|
|
750
|
+
Input dataset containing drop size distribution data. It must include the following variables:
|
|
751
|
+
- ``drop_number_concentration``: The number concentration of drops.
|
|
752
|
+
- ``diameter_bin_width``": The width of each diameter bin.
|
|
753
|
+
- ``diameter_bin_lower``: The lower bounds of the diameter bins.
|
|
754
|
+
- ``diameter_bin_upper``: The upper bounds of the diameter bins.
|
|
755
|
+
- ``diameter_bin_center``: The center values of the diameter bins.
|
|
756
|
+
probability_method : str, optional
|
|
757
|
+
Method to compute probabilities. The default value is ``cdf``.
|
|
758
|
+
likelihood : str, optional
|
|
759
|
+
Likelihood function to use for fitting. The default value is ``multinomial``.
|
|
760
|
+
truncated_likelihood : bool, optional
|
|
761
|
+
Whether to use truncated likelihood. The default value is ``True``.
|
|
762
|
+
optimizer : str, optional
|
|
763
|
+
Optimization method to use. The default value is ``Nelder-Mead``.
|
|
764
|
+
|
|
765
|
+
Returns
|
|
766
|
+
-------
|
|
767
|
+
xarray.Dataset
|
|
768
|
+
Dataset containing the estimated lognormal distribution parameters:
|
|
769
|
+
- ``Nt``: Total number concentration.
|
|
770
|
+
- ``mu``: Mean of the lognormal distribution.
|
|
771
|
+
- ``sigma``: Standard deviation of the lognormal distribution.
|
|
772
|
+
The resulting dataset will have an attribute ``disdrodb_psd_model`` set to ``LognormalPSD``.
|
|
773
|
+
|
|
774
|
+
Notes
|
|
775
|
+
-----
|
|
776
|
+
The function uses `xr.apply_ufunc` to fit the lognormal distribution parameters
|
|
777
|
+
in parallel, leveraging Dask for parallel computation.
|
|
778
|
+
|
|
779
|
+
"""
|
|
780
|
+
# Define inputs
|
|
781
|
+
counts = ds["drop_number_concentration"] * ds["diameter_bin_width"]
|
|
782
|
+
diameter_breaks = np.append(ds["diameter_bin_lower"].data, ds["diameter_bin_upper"].data[-1])
|
|
783
|
+
|
|
784
|
+
# Define kwargs
|
|
785
|
+
kwargs = {
|
|
786
|
+
"output_dictionary": False,
|
|
787
|
+
"bin_edges": diameter_breaks,
|
|
788
|
+
"probability_method": probability_method,
|
|
789
|
+
"likelihood": likelihood,
|
|
790
|
+
"truncated_likelihood": truncated_likelihood,
|
|
791
|
+
"optimizer": optimizer,
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
# Fit distribution in parallel
|
|
795
|
+
da_params = xr.apply_ufunc(
|
|
796
|
+
estimate_lognormal_parameters,
|
|
797
|
+
counts,
|
|
798
|
+
kwargs=kwargs,
|
|
799
|
+
input_core_dims=[["diameter_bin_center"]],
|
|
800
|
+
output_core_dims=[["parameters"]],
|
|
801
|
+
vectorize=True,
|
|
802
|
+
dask="parallelized",
|
|
803
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
|
|
804
|
+
output_dtypes=["float64"],
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
# Add parameters coordinates
|
|
808
|
+
da_params = da_params.assign_coords({"parameters": ["Nt", "mu", "sigma"]})
|
|
809
|
+
|
|
810
|
+
# Create parameters dataset
|
|
811
|
+
ds_params = da_params.to_dataset(dim="parameters")
|
|
812
|
+
|
|
813
|
+
# Add DSD model name to the attribute
|
|
814
|
+
ds_params.attrs["disdrodb_psd_model"] = "LognormalPSD"
|
|
815
|
+
|
|
816
|
+
return ds_params
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def get_exponential_parameters(
|
|
820
|
+
ds,
|
|
821
|
+
init_method=None, # noqa: ARG001
|
|
822
|
+
probability_method="cdf",
|
|
823
|
+
likelihood="multinomial",
|
|
824
|
+
truncated_likelihood=True,
|
|
825
|
+
optimizer="Nelder-Mead",
|
|
826
|
+
):
|
|
827
|
+
"""
|
|
828
|
+
Estimate the parameters of an exponential particle size distribution (PSD) from the given dataset.
|
|
829
|
+
|
|
830
|
+
Fitting this model is equivalent to fitting a GammaPSD model fixing ``mu`` to 0.
|
|
831
|
+
|
|
832
|
+
Parameters
|
|
833
|
+
----------
|
|
834
|
+
ds : xarray.Dataset
|
|
835
|
+
Input dataset containing drop number concentration data and diameter information.
|
|
836
|
+
It must include the following variables:
|
|
837
|
+
- ``drop_number_concentration``: The number concentration of drops.
|
|
838
|
+
- ``diameter_bin_width``": The width of each diameter bin.
|
|
839
|
+
- ``diameter_bin_lower``: The lower bounds of the diameter bins.
|
|
840
|
+
- ``diameter_bin_upper``: The upper bounds of the diameter bins.
|
|
841
|
+
- ``diameter_bin_center``: The center values of the diameter bins.
|
|
842
|
+
probability_method : str, optional
|
|
843
|
+
Method to compute probabilities. The default value is ``cdf``.
|
|
844
|
+
likelihood : str, optional
|
|
845
|
+
Likelihood function to use for fitting. The default value is ``multinomial``.
|
|
846
|
+
truncated_likelihood : bool, optional
|
|
847
|
+
Whether to use truncated likelihood. The default value is ``True``.
|
|
848
|
+
optimizer : str, optional
|
|
849
|
+
Optimization method to use. The default value is ``Nelder-Mead``.
|
|
850
|
+
|
|
851
|
+
Returns
|
|
852
|
+
-------
|
|
853
|
+
xarray.Dataset
|
|
854
|
+
Dataset containing the estimated expontial distribution parameters:
|
|
855
|
+
- ``N0``: Intercept parameter.
|
|
856
|
+
- ``Lambda``: Scale parameter.
|
|
857
|
+
The resulting dataset will have an attribute ``disdrodb_psd_model`` set to ``ExponentialPSD``.
|
|
858
|
+
|
|
859
|
+
Notes
|
|
860
|
+
-----
|
|
861
|
+
The function uses `xr.apply_ufunc` to fit the exponential distribution parameters
|
|
862
|
+
in parallel, leveraging Dask for parallel computation.
|
|
863
|
+
|
|
864
|
+
"""
|
|
865
|
+
# Define inputs
|
|
866
|
+
counts = ds["drop_number_concentration"] * ds["diameter_bin_width"]
|
|
867
|
+
diameter_breaks = np.append(ds["diameter_bin_lower"].data, ds["diameter_bin_upper"].data[-1])
|
|
868
|
+
|
|
869
|
+
# Define kwargs
|
|
870
|
+
kwargs = {
|
|
871
|
+
"output_dictionary": False,
|
|
872
|
+
"bin_edges": diameter_breaks,
|
|
873
|
+
"probability_method": probability_method,
|
|
874
|
+
"likelihood": likelihood,
|
|
875
|
+
"truncated_likelihood": truncated_likelihood,
|
|
876
|
+
"optimizer": optimizer,
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
# Fit distribution in parallel
|
|
880
|
+
da_params = xr.apply_ufunc(
|
|
881
|
+
estimate_exponential_parameters,
|
|
882
|
+
counts,
|
|
883
|
+
kwargs=kwargs,
|
|
884
|
+
input_core_dims=[["diameter_bin_center"]],
|
|
885
|
+
output_core_dims=[["parameters"]],
|
|
886
|
+
vectorize=True,
|
|
887
|
+
dask="parallelized",
|
|
888
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 2}}, # lengths of the new output_core_dims dimensions.
|
|
889
|
+
output_dtypes=["float64"],
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
# Add parameters coordinates
|
|
893
|
+
da_params = da_params.assign_coords({"parameters": ["N0", "Lambda"]})
|
|
894
|
+
|
|
895
|
+
# Create parameters dataset
|
|
896
|
+
ds_params = da_params.to_dataset(dim="parameters")
|
|
897
|
+
|
|
898
|
+
# Add DSD model name to the attribute
|
|
899
|
+
ds_params.attrs["disdrodb_psd_model"] = "ExponentialPSD"
|
|
900
|
+
return ds_params
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
####-------------------------------------------------------------------------------------------------------------------.
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
def _estimate_gamma_parameters_johnson(
|
|
907
|
+
drop_number_concentration,
|
|
908
|
+
diameter,
|
|
909
|
+
diameter_breaks,
|
|
910
|
+
output_dictionary=True,
|
|
911
|
+
method="Nelder-Mead",
|
|
912
|
+
mu=0.5,
|
|
913
|
+
Lambda=3,
|
|
914
|
+
**kwargs,
|
|
915
|
+
):
|
|
916
|
+
"""Deprecated Maximum likelihood estimation of Gamma model.
|
|
917
|
+
|
|
918
|
+
N(D) = N_t * lambda**(mu+1) / gamma(mu+1) D**mu exp(-lambda*D)
|
|
919
|
+
|
|
920
|
+
Args:
|
|
921
|
+
spectra: The DSD for which to find parameters [mm-1 m-3].
|
|
922
|
+
widths: Class widths for each DSD bin [mm].
|
|
923
|
+
diams: Class-centre diameters for each DSD bin [mm].
|
|
924
|
+
mu: Initial value for shape parameter mu [-].
|
|
925
|
+
lambda_param: Initial value for slope parameter lambda [mm^-1].
|
|
926
|
+
kwargs: Extra arguments for the optimization process.
|
|
927
|
+
|
|
928
|
+
Returns
|
|
929
|
+
-------
|
|
930
|
+
Dictionary with estimated mu, lambda, and N0.
|
|
931
|
+
mu (shape) N0 (scale) lambda(slope)
|
|
932
|
+
|
|
933
|
+
Notes
|
|
934
|
+
-----
|
|
935
|
+
The last bin counts are not accounted in the fitting procedure !
|
|
936
|
+
|
|
937
|
+
References
|
|
938
|
+
----------
|
|
939
|
+
Johnson, R. W., D. V. Kliche, and P. L. Smith, 2011: Comparison of Estimators for Parameters of Gamma Distributions
|
|
940
|
+
with Left-Truncated Samples. J. Appl. Meteor. Climatol., 50, 296-310, https://doi.org/10.1175/2010JAMC2478.1
|
|
941
|
+
|
|
942
|
+
Johnson, R.W., Kliche, D., & Smith, P.L. (2010).
|
|
943
|
+
Maximum likelihood estimation of gamma parameters for coarsely binned and truncated raindrop size data.
|
|
944
|
+
Quarterly Journal of the Royal Meteorological Society, 140. DOI:10.1002/qj.2209
|
|
945
|
+
|
|
946
|
+
"""
|
|
947
|
+
# Initialize bad results
|
|
948
|
+
if output_dictionary:
|
|
949
|
+
null_output = {"mu": np.nan, "lambda": np.nan, "N0": np.nan}
|
|
950
|
+
else:
|
|
951
|
+
null_output = np.array([np.nan, np.nan, np.nan])
|
|
952
|
+
|
|
953
|
+
# Initialize parameters
|
|
954
|
+
# --> Ideally with method of moments estimate
|
|
955
|
+
# --> See equation 8 of Johnson's 2013
|
|
956
|
+
x0 = [mu, Lambda]
|
|
957
|
+
|
|
958
|
+
# Compute diameter_bin_width
|
|
959
|
+
diameter_bin_width = np.diff(diameter_breaks)
|
|
960
|
+
|
|
961
|
+
# Convert drop_number_concentration from mm-1 m-3 to m-3.
|
|
962
|
+
spectra = np.asarray(drop_number_concentration) * diameter_bin_width
|
|
963
|
+
|
|
964
|
+
# Define cost function
|
|
965
|
+
# - Parameter to be optimized on first positions
|
|
966
|
+
def _cost_function(parameters, spectra, diameter_breaks):
|
|
967
|
+
# Assume spectra to be in unit [m-3] (drop_number_concentration*diameter_bin_width) !
|
|
968
|
+
mu, Lambda = parameters
|
|
969
|
+
# Precompute gamma integrals between various diameter bins
|
|
970
|
+
# - gamminc(mu+1) already divides the integral by gamma(mu+1) !
|
|
971
|
+
pgamma_d = gammainc(mu + 1, Lambda * diameter_breaks)
|
|
972
|
+
# Compute probability with interval
|
|
973
|
+
delta_pgamma_bins = pgamma_d[1:] - pgamma_d[:-1]
|
|
974
|
+
# Compute normalization over interval
|
|
975
|
+
denominator = pgamma_d[-1] - pgamma_d[0]
|
|
976
|
+
# Compute cost function
|
|
977
|
+
# a = mu - 1, x = lambda
|
|
978
|
+
if mu > -1 and Lambda > 0:
|
|
979
|
+
cost = np.sum(-spectra * np.log(delta_pgamma_bins / denominator))
|
|
980
|
+
return cost
|
|
981
|
+
return np.inf
|
|
982
|
+
|
|
983
|
+
# Minimize the cost function
|
|
984
|
+
with suppress_warnings():
|
|
985
|
+
bounds = [(0, None), (0, None)] # Force mu and lambda to be non-negative
|
|
986
|
+
res = minimize(
|
|
987
|
+
_cost_function,
|
|
988
|
+
x0=x0,
|
|
989
|
+
args=(spectra, diameter_breaks),
|
|
990
|
+
method=method,
|
|
991
|
+
bounds=bounds,
|
|
992
|
+
**kwargs,
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
# Check if the fit had success
|
|
996
|
+
if not res.success:
|
|
997
|
+
return null_output
|
|
998
|
+
|
|
999
|
+
# Extract parameters
|
|
1000
|
+
mu = res.x[0] # [-]
|
|
1001
|
+
Lambda = res.x[1] # [mm-1]
|
|
1002
|
+
|
|
1003
|
+
# Estimate tilde_N_T using the total drop concentration
|
|
1004
|
+
tilde_N_T = np.sum(drop_number_concentration * diameter_bin_width) # [m-3]
|
|
1005
|
+
|
|
1006
|
+
# Estimate proportion of missing drops (Johnson's 2011 Eqs. 3)
|
|
1007
|
+
with suppress_warnings():
|
|
1008
|
+
D = diameter
|
|
1009
|
+
p = 1 - np.sum((Lambda ** (mu + 1)) / gamma(mu + 1) * D**mu * np.exp(-Lambda * D) * diameter_bin_width) # [-]
|
|
1010
|
+
|
|
1011
|
+
# Convert tilde_N_T to N_T using Johnson's 2013 Eqs. 3 and 4.
|
|
1012
|
+
# - Adjusts for the proportion of drops not observed
|
|
1013
|
+
N_T = tilde_N_T / (1 - p) # [m-3]
|
|
1014
|
+
|
|
1015
|
+
# Compute N0
|
|
1016
|
+
N0 = N_T * (Lambda ** (mu + 1)) / gamma(mu + 1) # [m-3 * mm^(-mu-1)]
|
|
1017
|
+
|
|
1018
|
+
# Compute Dm
|
|
1019
|
+
# Dm = (mu + 4)/ Lambda
|
|
1020
|
+
|
|
1021
|
+
# Compute Nw
|
|
1022
|
+
# Nw = N0* D^mu / f(mu) , with f(mu of the Normalized PSD)
|
|
1023
|
+
|
|
1024
|
+
# Define output
|
|
1025
|
+
output = {"mu": mu, "Lambda": Lambda, "N0": N0} if output_dictionary else np.array([mu, Lambda, N0])
|
|
1026
|
+
return output
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
def get_gamma_parameters_johnson2014(ds, method="Nelder-Mead"):
|
|
1030
|
+
"""Deprecated model. See Gamma Model with truncated_likelihood and 'pdf'."""
|
|
1031
|
+
drop_number_concentration = ds["drop_number_concentration"]
|
|
1032
|
+
diameter = ds["diameter_bin_center"]
|
|
1033
|
+
diameter_breaks = np.append(ds["diameter_bin_lower"].data, ds["diameter_bin_upper"].data[-1])
|
|
1034
|
+
# Define kwargs
|
|
1035
|
+
kwargs = {
|
|
1036
|
+
"output_dictionary": False,
|
|
1037
|
+
"diameter_breaks": diameter_breaks,
|
|
1038
|
+
"method": method,
|
|
1039
|
+
}
|
|
1040
|
+
da_params = xr.apply_ufunc(
|
|
1041
|
+
_estimate_gamma_parameters_johnson,
|
|
1042
|
+
drop_number_concentration,
|
|
1043
|
+
diameter,
|
|
1044
|
+
# diameter_bin_width,
|
|
1045
|
+
kwargs=kwargs,
|
|
1046
|
+
input_core_dims=[["diameter_bin_center"], ["diameter_bin_center"]], # ["diameter_bin_center"],
|
|
1047
|
+
output_core_dims=[["parameters"]],
|
|
1048
|
+
vectorize=True,
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1051
|
+
# Add parameters coordinates
|
|
1052
|
+
da_params = da_params.assign_coords({"parameters": ["mu", "Lambda", "N0"]})
|
|
1053
|
+
|
|
1054
|
+
# Convert to skill Dataset
|
|
1055
|
+
ds_params = da_params.to_dataset(dim="parameters")
|
|
1056
|
+
return ds_params
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
####-----------------------------------------------------------------------------------------.
|
|
1060
|
+
#### Grid Search (GS)
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
def _compute_rain_rate(ND, D, dD, V):
|
|
1064
|
+
axis = 1 if ND.ndim == 2 else None
|
|
1065
|
+
rain_rate = np.pi / 6 * np.sum(ND * V * (D / 1000) ** 3 * dD, axis=axis) * 3600 * 1000
|
|
1066
|
+
return rain_rate # mm/h
|
|
1067
|
+
|
|
1068
|
+
|
|
1069
|
+
def _compute_lwc(ND, D, dD, rho_w=1000):
|
|
1070
|
+
axis = 1 if ND.ndim == 2 else None
|
|
1071
|
+
lwc = np.pi / 6.0 * (rho_w * 1000) * np.sum((D / 1000) ** 3 * ND * dD, axis=axis)
|
|
1072
|
+
return lwc # g/m3
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
def _compute_z(ND, D, dD):
|
|
1076
|
+
axis = 1 if ND.ndim == 2 else None
|
|
1077
|
+
z = np.sum(((D) ** 6 * ND * dD), axis=axis) # mm⁶·m⁻³
|
|
1078
|
+
Z = 10 * np.log10(z)
|
|
1079
|
+
return Z
|
|
1080
|
+
|
|
1081
|
+
|
|
1082
|
+
def _compute_cost_function(ND_obs, ND_preds, D, dD, V, target, transformation, error_order):
|
|
1083
|
+
# Assume ND_obs of shape (D bins) and ND_preds of shape (# params, D bins)
|
|
1084
|
+
if target == "ND":
|
|
1085
|
+
if transformation == "identity":
|
|
1086
|
+
errors = np.mean(np.abs(ND_obs[None, :] - ND_preds) ** error_order, axis=1)
|
|
1087
|
+
if transformation == "log":
|
|
1088
|
+
errors = np.mean(np.abs(np.log(ND_obs[None, :] + 1) - np.log(ND_preds + 1)) ** error_order, axis=1)
|
|
1089
|
+
if transformation == "np.sqrt":
|
|
1090
|
+
errors = np.mean(np.abs(np.sqrt(ND_obs[None, :]) - np.sqrt(ND_preds)) ** error_order, axis=1)
|
|
1091
|
+
elif target == "Z":
|
|
1092
|
+
errors = np.abs(_compute_z(ND_obs, D, dD) - _compute_z(ND_preds, D, dD))
|
|
1093
|
+
elif target == "R":
|
|
1094
|
+
errors = np.abs(_compute_rain_rate(ND_obs, D, dD, V) - _compute_rain_rate(ND_preds, D, dD, V))
|
|
1095
|
+
elif target == "LWC":
|
|
1096
|
+
errors = np.abs(_compute_lwc(ND_obs, D, dD) - _compute_lwc(ND_preds, D, dD))
|
|
1097
|
+
else:
|
|
1098
|
+
raise ValueError("Invalid target")
|
|
1099
|
+
return errors
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
def apply_exponential_gs(
|
|
1103
|
+
Nt,
|
|
1104
|
+
ND_obs,
|
|
1105
|
+
V,
|
|
1106
|
+
# Coords
|
|
1107
|
+
D,
|
|
1108
|
+
dD,
|
|
1109
|
+
# Error options
|
|
1110
|
+
target,
|
|
1111
|
+
transformation,
|
|
1112
|
+
error_order,
|
|
1113
|
+
):
|
|
1114
|
+
"""Apply Grid Search for the ExponentialPSD distribution."""
|
|
1115
|
+
# Define set of mu values
|
|
1116
|
+
lambda_arr = np.arange(0.01, 20, step=0.01)
|
|
1117
|
+
|
|
1118
|
+
# Perform grid search
|
|
1119
|
+
with suppress_warnings():
|
|
1120
|
+
# Compute ND
|
|
1121
|
+
N0_arr = Nt * lambda_arr
|
|
1122
|
+
ND_preds = ExponentialPSD.formula(D=D[None, :], N0=N0_arr[:, None], Lambda=lambda_arr[:, None])
|
|
1123
|
+
|
|
1124
|
+
# Compute errors
|
|
1125
|
+
errors = _compute_cost_function(
|
|
1126
|
+
ND_obs=ND_obs,
|
|
1127
|
+
ND_preds=ND_preds,
|
|
1128
|
+
D=D,
|
|
1129
|
+
dD=dD,
|
|
1130
|
+
V=V,
|
|
1131
|
+
target=target,
|
|
1132
|
+
transformation=transformation,
|
|
1133
|
+
error_order=error_order,
|
|
1134
|
+
)
|
|
1135
|
+
|
|
1136
|
+
# Identify best parameter set
|
|
1137
|
+
best_index = np.argmin(errors)
|
|
1138
|
+
return np.array([N0_arr[best_index].item(), lambda_arr[best_index].item()])
|
|
1139
|
+
|
|
1140
|
+
|
|
1141
|
+
def _apply_gamma_gs(mu_values, lambda_values, Nt, ND_obs, D, dD, V, target, transformation, error_order):
|
|
1142
|
+
"""Routine for GammaPSD parameters grid search."""
|
|
1143
|
+
# Define combinations of parameters for grid search
|
|
1144
|
+
combo = np.meshgrid(mu_values, lambda_values, indexing="xy")
|
|
1145
|
+
mu_arr = combo[0].ravel()
|
|
1146
|
+
lambda_arr = combo[1].ravel()
|
|
1147
|
+
|
|
1148
|
+
# Perform grid search
|
|
1149
|
+
with suppress_warnings():
|
|
1150
|
+
# Compute ND
|
|
1151
|
+
N0 = np.exp(np.log(Nt) + (mu_arr[:, None] + 1) * np.log(lambda_arr[:, None]) - gammaln(mu_arr[:, None] + 1))
|
|
1152
|
+
ND_preds = GammaPSD.formula(D=D[None, :], N0=N0, Lambda=lambda_arr[:, None], mu=mu_arr[:, None])
|
|
1153
|
+
|
|
1154
|
+
# Compute errors
|
|
1155
|
+
errors = _compute_cost_function(
|
|
1156
|
+
ND_obs=ND_obs,
|
|
1157
|
+
ND_preds=ND_preds,
|
|
1158
|
+
D=D,
|
|
1159
|
+
dD=dD,
|
|
1160
|
+
V=V,
|
|
1161
|
+
target=target,
|
|
1162
|
+
transformation=transformation,
|
|
1163
|
+
error_order=error_order,
|
|
1164
|
+
)
|
|
1165
|
+
|
|
1166
|
+
# Best parameter
|
|
1167
|
+
best_index = np.argmin(errors)
|
|
1168
|
+
return N0[best_index].item(), mu_arr[best_index].item(), lambda_arr[best_index].item()
|
|
1169
|
+
|
|
1170
|
+
|
|
1171
|
+
def apply_gamma_gs(
|
|
1172
|
+
Nt,
|
|
1173
|
+
ND_obs,
|
|
1174
|
+
V,
|
|
1175
|
+
# Coords
|
|
1176
|
+
D,
|
|
1177
|
+
dD,
|
|
1178
|
+
# Error options
|
|
1179
|
+
target,
|
|
1180
|
+
transformation,
|
|
1181
|
+
error_order,
|
|
1182
|
+
):
|
|
1183
|
+
"""Estimate GammaPSD model parameters using Grid Search."""
|
|
1184
|
+
# Define initial set of parameters
|
|
1185
|
+
mu_step = 0.5
|
|
1186
|
+
lambda_step = 0.5
|
|
1187
|
+
mu_values = np.arange(0.01, 20, step=mu_step)
|
|
1188
|
+
lambda_values = np.arange(0, 60, step=lambda_step)
|
|
1189
|
+
|
|
1190
|
+
# First round of GS
|
|
1191
|
+
N0, mu, Lambda = _apply_gamma_gs(
|
|
1192
|
+
mu_values=mu_values,
|
|
1193
|
+
lambda_values=lambda_values,
|
|
1194
|
+
Nt=Nt,
|
|
1195
|
+
ND_obs=ND_obs,
|
|
1196
|
+
D=D,
|
|
1197
|
+
dD=dD,
|
|
1198
|
+
V=V,
|
|
1199
|
+
target=target,
|
|
1200
|
+
transformation=transformation,
|
|
1201
|
+
error_order=error_order,
|
|
1202
|
+
)
|
|
1203
|
+
|
|
1204
|
+
# Second round of GS
|
|
1205
|
+
mu_values = np.arange(mu - mu_step * 2, mu + mu_step * 2, step=mu_step / 20)
|
|
1206
|
+
lambda_values = np.arange(Lambda - lambda_step * 2, Lambda + lambda_step * 2, step=lambda_step / 20)
|
|
1207
|
+
N0, mu, Lambda = _apply_gamma_gs(
|
|
1208
|
+
mu_values=mu_values,
|
|
1209
|
+
lambda_values=lambda_values,
|
|
1210
|
+
Nt=Nt,
|
|
1211
|
+
ND_obs=ND_obs,
|
|
1212
|
+
D=D,
|
|
1213
|
+
dD=dD,
|
|
1214
|
+
V=V,
|
|
1215
|
+
target=target,
|
|
1216
|
+
transformation=transformation,
|
|
1217
|
+
error_order=error_order,
|
|
1218
|
+
)
|
|
1219
|
+
|
|
1220
|
+
return np.array([N0, mu, Lambda])
|
|
1221
|
+
|
|
1222
|
+
|
|
1223
|
+
def _apply_lognormal_gs(mu_values, sigma_values, Nt, ND_obs, D, dD, V, target, transformation, error_order):
|
|
1224
|
+
"""Routine for LognormalPSD parameters grid search."""
|
|
1225
|
+
# Define combinations of parameters for grid search
|
|
1226
|
+
combo = np.meshgrid(mu_values, sigma_values, indexing="xy")
|
|
1227
|
+
mu_arr = combo[0].ravel()
|
|
1228
|
+
sigma_arr = combo[1].ravel()
|
|
1229
|
+
|
|
1230
|
+
# Perform grid search
|
|
1231
|
+
with suppress_warnings():
|
|
1232
|
+
# Compute ND
|
|
1233
|
+
ND_preds = LognormalPSD.formula(D=D[None, :], Nt=Nt, mu=mu_arr[:, None], sigma=sigma_arr[:, None])
|
|
1234
|
+
|
|
1235
|
+
# Compute errors
|
|
1236
|
+
errors = _compute_cost_function(
|
|
1237
|
+
ND_obs=ND_obs,
|
|
1238
|
+
ND_preds=ND_preds,
|
|
1239
|
+
D=D,
|
|
1240
|
+
dD=dD,
|
|
1241
|
+
V=V,
|
|
1242
|
+
target=target,
|
|
1243
|
+
transformation=transformation,
|
|
1244
|
+
error_order=error_order,
|
|
1245
|
+
)
|
|
1246
|
+
|
|
1247
|
+
# Best parameter
|
|
1248
|
+
best_index = np.argmin(errors)
|
|
1249
|
+
return Nt, mu_arr[best_index].item(), sigma_arr[best_index].item()
|
|
1250
|
+
|
|
1251
|
+
|
|
1252
|
+
def apply_lognormal_gs(
|
|
1253
|
+
Nt,
|
|
1254
|
+
ND_obs,
|
|
1255
|
+
V,
|
|
1256
|
+
# Coords
|
|
1257
|
+
D,
|
|
1258
|
+
dD,
|
|
1259
|
+
# Error options
|
|
1260
|
+
target,
|
|
1261
|
+
transformation,
|
|
1262
|
+
error_order,
|
|
1263
|
+
):
|
|
1264
|
+
"""Estimate LognormalPSD model parameters using Grid Search."""
|
|
1265
|
+
# Define initial set of parameters
|
|
1266
|
+
mu_step = 0.5
|
|
1267
|
+
sigma_step = 0.5
|
|
1268
|
+
mu_values = np.arange(0.01, 20, step=mu_step) # TODO: define realistic values
|
|
1269
|
+
sigma_values = np.arange(0, 20, step=sigma_step) # TODO: define realistic values
|
|
1270
|
+
|
|
1271
|
+
# First round of GS
|
|
1272
|
+
Nt, mu, sigma = _apply_lognormal_gs(
|
|
1273
|
+
mu_values=mu_values,
|
|
1274
|
+
sigma_values=sigma_values,
|
|
1275
|
+
Nt=Nt,
|
|
1276
|
+
ND_obs=ND_obs,
|
|
1277
|
+
D=D,
|
|
1278
|
+
dD=dD,
|
|
1279
|
+
V=V,
|
|
1280
|
+
target=target,
|
|
1281
|
+
transformation=transformation,
|
|
1282
|
+
error_order=error_order,
|
|
1283
|
+
)
|
|
1284
|
+
|
|
1285
|
+
# Second round of GS
|
|
1286
|
+
mu_values = np.arange(mu - mu_step * 2, mu + mu_step * 2, step=mu_step / 20)
|
|
1287
|
+
sigma_values = np.arange(sigma - sigma_step * 2, sigma + sigma_step * 2, step=sigma_step / 20)
|
|
1288
|
+
Nt, mu, sigma = _apply_lognormal_gs(
|
|
1289
|
+
mu_values=mu_values,
|
|
1290
|
+
sigma_values=sigma_values,
|
|
1291
|
+
Nt=Nt,
|
|
1292
|
+
ND_obs=ND_obs,
|
|
1293
|
+
D=D,
|
|
1294
|
+
dD=dD,
|
|
1295
|
+
V=V,
|
|
1296
|
+
target=target,
|
|
1297
|
+
transformation=transformation,
|
|
1298
|
+
error_order=error_order,
|
|
1299
|
+
)
|
|
1300
|
+
|
|
1301
|
+
return np.array([Nt, mu, sigma])
|
|
1302
|
+
|
|
1303
|
+
|
|
1304
|
+
def apply_normalized_gamma_gs(
|
|
1305
|
+
Nw,
|
|
1306
|
+
D50,
|
|
1307
|
+
ND_obs,
|
|
1308
|
+
V,
|
|
1309
|
+
# Coords
|
|
1310
|
+
D,
|
|
1311
|
+
dD,
|
|
1312
|
+
# Error options
|
|
1313
|
+
target,
|
|
1314
|
+
transformation,
|
|
1315
|
+
error_order,
|
|
1316
|
+
):
|
|
1317
|
+
"""Estimate NormalizedGammaPSD model parameters using Grid Search."""
|
|
1318
|
+
# Define set of mu values
|
|
1319
|
+
mu_arr = np.arange(0.01, 20, step=0.01)
|
|
1320
|
+
|
|
1321
|
+
# Perform grid search
|
|
1322
|
+
with suppress_warnings():
|
|
1323
|
+
# Compute ND
|
|
1324
|
+
ND_preds = NormalizedGammaPSD.formula(D=D[None, :], D50=D50, Nw=Nw, mu=mu_arr[:, None])
|
|
1325
|
+
|
|
1326
|
+
# Compute errors
|
|
1327
|
+
errors = _compute_cost_function(
|
|
1328
|
+
ND_obs=ND_obs,
|
|
1329
|
+
ND_preds=ND_preds,
|
|
1330
|
+
D=D,
|
|
1331
|
+
dD=dD,
|
|
1332
|
+
V=V,
|
|
1333
|
+
target=target,
|
|
1334
|
+
transformation=transformation,
|
|
1335
|
+
error_order=error_order,
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
# Identify best parameter set
|
|
1339
|
+
mu = mu_arr[np.argmin(errors)]
|
|
1340
|
+
return np.array([Nw, mu, D50])
|
|
1341
|
+
|
|
1342
|
+
|
|
1343
|
+
def get_exponential_parameters_gs(ds, target="ND", transformation="log", error_order=1):
|
|
1344
|
+
"""Estimate the parameters of an Exponential distribution using Grid Search."""
|
|
1345
|
+
# "target": ["ND", "LWC", "Z", "R"]
|
|
1346
|
+
# "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
|
|
1347
|
+
# "error_order": 1, # MAE/MSE ... only for drop_number_concentration
|
|
1348
|
+
|
|
1349
|
+
# Define kwargs
|
|
1350
|
+
kwargs = {
|
|
1351
|
+
"D": ds["diameter_bin_center"].data,
|
|
1352
|
+
"dD": ds["diameter_bin_width"].data,
|
|
1353
|
+
"target": target,
|
|
1354
|
+
"transformation": transformation,
|
|
1355
|
+
"error_order": error_order,
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
# Fit distribution in parallel
|
|
1359
|
+
da_params = xr.apply_ufunc(
|
|
1360
|
+
apply_exponential_gs,
|
|
1361
|
+
# Variables varying over time
|
|
1362
|
+
ds["Nt"],
|
|
1363
|
+
ds["drop_number_concentration"],
|
|
1364
|
+
ds["fall_velocity"],
|
|
1365
|
+
# Other options
|
|
1366
|
+
kwargs=kwargs,
|
|
1367
|
+
# Settings
|
|
1368
|
+
input_core_dims=[[], ["diameter_bin_center"], ["diameter_bin_center"]],
|
|
1369
|
+
output_core_dims=[["parameters"]],
|
|
1370
|
+
vectorize=True,
|
|
1371
|
+
dask="parallelized",
|
|
1372
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 2}}, # lengths of the new output_core_dims dimensions.
|
|
1373
|
+
output_dtypes=["float64"],
|
|
1374
|
+
)
|
|
1375
|
+
|
|
1376
|
+
# Add parameters coordinates
|
|
1377
|
+
da_params = da_params.assign_coords({"parameters": ["N0", "Lambda"]})
|
|
1378
|
+
|
|
1379
|
+
# Create parameters dataset
|
|
1380
|
+
ds_params = da_params.to_dataset(dim="parameters")
|
|
1381
|
+
|
|
1382
|
+
# Add DSD model name to the attribute
|
|
1383
|
+
ds_params.attrs["disdrodb_psd_model"] = "ExponentialPSD"
|
|
1384
|
+
return ds_params
|
|
1385
|
+
|
|
1386
|
+
|
|
1387
|
+
def get_gamma_parameters_gs(ds, target="ND", transformation="log", error_order=1):
|
|
1388
|
+
"""Compute Grid Search to identify mu and Lambda Gamma distribution parameters."""
|
|
1389
|
+
# "target": ["ND", "LWC", "Z", "R"]
|
|
1390
|
+
# "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
|
|
1391
|
+
# "error_order": 1, # MAE/MSE ... only for drop_number_concentration
|
|
1392
|
+
|
|
1393
|
+
# Define kwargs
|
|
1394
|
+
kwargs = {
|
|
1395
|
+
"D": ds["diameter_bin_center"].data,
|
|
1396
|
+
"dD": ds["diameter_bin_width"].data,
|
|
1397
|
+
"target": target,
|
|
1398
|
+
"transformation": transformation,
|
|
1399
|
+
"error_order": error_order,
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1402
|
+
# Fit distribution in parallel
|
|
1403
|
+
da_params = xr.apply_ufunc(
|
|
1404
|
+
apply_gamma_gs,
|
|
1405
|
+
# Variables varying over time
|
|
1406
|
+
ds["Nt"],
|
|
1407
|
+
ds["drop_number_concentration"],
|
|
1408
|
+
ds["fall_velocity"],
|
|
1409
|
+
# Other options
|
|
1410
|
+
kwargs=kwargs,
|
|
1411
|
+
# Settings
|
|
1412
|
+
input_core_dims=[[], ["diameter_bin_center"], ["diameter_bin_center"]],
|
|
1413
|
+
output_core_dims=[["parameters"]],
|
|
1414
|
+
vectorize=True,
|
|
1415
|
+
dask="parallelized",
|
|
1416
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
|
|
1417
|
+
output_dtypes=["float64"],
|
|
1418
|
+
)
|
|
1419
|
+
|
|
1420
|
+
# Add parameters coordinates
|
|
1421
|
+
da_params = da_params.assign_coords({"parameters": ["N0", "mu", "Lambda"]})
|
|
1422
|
+
|
|
1423
|
+
# Create parameters dataset
|
|
1424
|
+
ds_params = da_params.to_dataset(dim="parameters")
|
|
1425
|
+
|
|
1426
|
+
# Add DSD model name to the attribute
|
|
1427
|
+
ds_params.attrs["disdrodb_psd_model"] = "GammaPSD"
|
|
1428
|
+
return ds_params
|
|
1429
|
+
|
|
1430
|
+
|
|
1431
|
+
def get_lognormal_parameters_gs(ds, target="ND", transformation="log", error_order=1):
|
|
1432
|
+
"""Compute Grid Search to identify mu and sigma lognormal distribution parameters."""
|
|
1433
|
+
# "target": ["ND", "LWC", "Z", "R"]
|
|
1434
|
+
# "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
|
|
1435
|
+
# "error_order": 1, # MAE/MSE ... only for drop_number_concentration
|
|
1436
|
+
|
|
1437
|
+
# Define kwargs
|
|
1438
|
+
kwargs = {
|
|
1439
|
+
"D": ds["diameter_bin_center"].data,
|
|
1440
|
+
"dD": ds["diameter_bin_width"].data,
|
|
1441
|
+
"target": target,
|
|
1442
|
+
"transformation": transformation,
|
|
1443
|
+
"error_order": error_order,
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1446
|
+
# Fit distribution in parallel
|
|
1447
|
+
da_params = xr.apply_ufunc(
|
|
1448
|
+
apply_lognormal_gs,
|
|
1449
|
+
# Variables varying over time
|
|
1450
|
+
ds["Nt"],
|
|
1451
|
+
ds["drop_number_concentration"],
|
|
1452
|
+
ds["fall_velocity"],
|
|
1453
|
+
# Other options
|
|
1454
|
+
kwargs=kwargs,
|
|
1455
|
+
# Settings
|
|
1456
|
+
input_core_dims=[[], ["diameter_bin_center"], ["diameter_bin_center"]],
|
|
1457
|
+
output_core_dims=[["parameters"]],
|
|
1458
|
+
vectorize=True,
|
|
1459
|
+
dask="parallelized",
|
|
1460
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
|
|
1461
|
+
output_dtypes=["float64"],
|
|
1462
|
+
)
|
|
1463
|
+
|
|
1464
|
+
# Add parameters coordinates
|
|
1465
|
+
da_params = da_params.assign_coords({"parameters": ["Nt", "mu", "sigma"]})
|
|
1466
|
+
|
|
1467
|
+
# Create parameters dataset
|
|
1468
|
+
ds_params = da_params.to_dataset(dim="parameters")
|
|
1469
|
+
|
|
1470
|
+
# Add DSD model name to the attribute
|
|
1471
|
+
ds_params.attrs["disdrodb_psd_model"] = "LognormalPSD"
|
|
1472
|
+
return ds_params
|
|
1473
|
+
|
|
1474
|
+
|
|
1475
|
+
def get_normalized_gamma_parameters_gs(ds, target="ND", transformation="log", error_order=1):
|
|
1476
|
+
r"""Estimate $\mu$ of a Normalized Gamma distribution using Grid Search.
|
|
1477
|
+
|
|
1478
|
+
The D50 and Nw parameters of the Normalized Gamma distribution are derived empirically from the observed DSD.
|
|
1479
|
+
$\mu$ is derived by minimizing the errors between the observed DSD and modelled Normalized Gamma distribution.
|
|
1480
|
+
|
|
1481
|
+
Parameters
|
|
1482
|
+
----------
|
|
1483
|
+
Nd : array_like
|
|
1484
|
+
A drop size distribution
|
|
1485
|
+
D50: optional, float
|
|
1486
|
+
Median drop diameter in mm. If none is given, it will be estimated.
|
|
1487
|
+
Nw: optional, float
|
|
1488
|
+
Normalized Intercept Parameter. If none is given, it will be estimated.
|
|
1489
|
+
order: optional, float
|
|
1490
|
+
Order to which square the error when computing the sum of errors.
|
|
1491
|
+
Order = 2 is equivalent to minimize the mean squared error (MSE) (L2 norm). The default is 2.
|
|
1492
|
+
Order = 1 is equivalent to minimize the mean absolute error (MAE) (L1 norm).
|
|
1493
|
+
Higher orders typically stretch higher the gamma distribution.
|
|
1494
|
+
|
|
1495
|
+
Returns
|
|
1496
|
+
-------
|
|
1497
|
+
ds_params : xarray.Dataset
|
|
1498
|
+
Dataset containing the estimated Normalized Gamma distribution parameters.
|
|
1499
|
+
"""
|
|
1500
|
+
# "target": ["ND", "LWC", "Z", "R"]
|
|
1501
|
+
# "transformation": "log", "identity", "sqrt", # only for drop_number_concentration
|
|
1502
|
+
# "error_order": 1, # MAE/MSE ... only for drop_number_concentration
|
|
1503
|
+
|
|
1504
|
+
# Define kwargs
|
|
1505
|
+
kwargs = {
|
|
1506
|
+
"D": ds["diameter_bin_center"].data,
|
|
1507
|
+
"dD": ds["diameter_bin_width"].data,
|
|
1508
|
+
"target": target,
|
|
1509
|
+
"transformation": transformation,
|
|
1510
|
+
"error_order": error_order,
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
# Fit distribution in parallel
|
|
1514
|
+
da_params = xr.apply_ufunc(
|
|
1515
|
+
apply_normalized_gamma_gs,
|
|
1516
|
+
# Variables varying over time
|
|
1517
|
+
ds["Nw"],
|
|
1518
|
+
ds["D50"],
|
|
1519
|
+
ds["drop_number_concentration"],
|
|
1520
|
+
ds["fall_velocity"],
|
|
1521
|
+
# Other options
|
|
1522
|
+
kwargs=kwargs,
|
|
1523
|
+
# Settings
|
|
1524
|
+
input_core_dims=[[], [], ["diameter_bin_center"], ["diameter_bin_center"]],
|
|
1525
|
+
output_core_dims=[["parameters"]],
|
|
1526
|
+
vectorize=True,
|
|
1527
|
+
dask="parallelized",
|
|
1528
|
+
dask_gufunc_kwargs={"output_sizes": {"parameters": 3}}, # lengths of the new output_core_dims dimensions.
|
|
1529
|
+
output_dtypes=["float64"],
|
|
1530
|
+
)
|
|
1531
|
+
|
|
1532
|
+
# Add parameters coordinates
|
|
1533
|
+
da_params = da_params.assign_coords({"parameters": ["Nw", "mu", "D50"]})
|
|
1534
|
+
|
|
1535
|
+
# Create parameters dataset
|
|
1536
|
+
ds_params = da_params.to_dataset(dim="parameters")
|
|
1537
|
+
|
|
1538
|
+
# Add DSD model name to the attribute
|
|
1539
|
+
ds_params.attrs["disdrodb_psd_model"] = "NormalizedGammaPSD"
|
|
1540
|
+
return ds_params
|
|
1541
|
+
|
|
1542
|
+
|
|
1543
|
+
####-----------------------------------------------------------------.
|
|
1544
|
+
#### Methods of Moments (MOM)
|
|
1545
|
+
# - M246 DEFAULT FOR GAMMA ?
|
|
1546
|
+
# - LMOM (Johnson et al., 2014)
|
|
1547
|
+
|
|
1548
|
+
|
|
1549
|
+
def get_exponential_parameters_Zhang2008(moment_l, moment_m, l, m): # noqa: E741
|
|
1550
|
+
"""Calculate Exponential DSD parameters using the method of moments (MOM).
|
|
1551
|
+
|
|
1552
|
+
The choice of moments is given in the parameters.
|
|
1553
|
+
|
|
1554
|
+
Parameters
|
|
1555
|
+
----------
|
|
1556
|
+
moment_l: float
|
|
1557
|
+
First moment to use.
|
|
1558
|
+
moment_l: float
|
|
1559
|
+
Second moment to use.
|
|
1560
|
+
l : float
|
|
1561
|
+
Moment order.
|
|
1562
|
+
m : float
|
|
1563
|
+
Moment order,
|
|
1564
|
+
|
|
1565
|
+
References
|
|
1566
|
+
----------
|
|
1567
|
+
[1] Zhang, et. al., 2008, Diagnosing the Intercept Parameter for Exponential Raindrop Size
|
|
1568
|
+
Distribution Based on Video Disdrometer Observations: Model Development. J. Appl.
|
|
1569
|
+
Meteor. Climatol.,
|
|
1570
|
+
https://doi.org/10.1175/2008JAMC1876.1
|
|
1571
|
+
"""
|
|
1572
|
+
num = moment_l * gamma(m + 1)
|
|
1573
|
+
den = moment_m * gamma(l + 1)
|
|
1574
|
+
Lambda = np.power(num / den, (1 / (m - l)))
|
|
1575
|
+
N0 = moment_l * np.power(Lambda, l + 1) / gamma(l + 1)
|
|
1576
|
+
return N0, Lambda
|
|
1577
|
+
|
|
1578
|
+
|
|
1579
|
+
def get_exponential_parameters_M34(moment_3, moment_4):
|
|
1580
|
+
"""Compute exponential distribution parameters following Testud 2001.
|
|
1581
|
+
|
|
1582
|
+
References
|
|
1583
|
+
----------
|
|
1584
|
+
Testud, J., S. Oury, R. A. Black, P. Amayenc, and X. Dou, 2001:
|
|
1585
|
+
The Concept of “Normalized” Distribution to Describe Raindrop Spectra:
|
|
1586
|
+
A Tool for Cloud Physics and Cloud Remote Sensing.
|
|
1587
|
+
J. Appl. Meteor. Climatol., 40, 1118-1140,
|
|
1588
|
+
https://doi.org/10.1175/1520-0450(2001)040<1118:TCONDT>2.0.CO;2
|
|
1589
|
+
"""
|
|
1590
|
+
N0 = 256 / gamma(4) * moment_3**5 / moment_4**4
|
|
1591
|
+
Dm = moment_4 / moment_3
|
|
1592
|
+
Lambda = 4 / Dm
|
|
1593
|
+
return N0, Lambda
|
|
1594
|
+
|
|
1595
|
+
|
|
1596
|
+
def get_gamma_parameters_M012(M0, M1, M2):
|
|
1597
|
+
"""Compute gamma distribution parameters following Cao et al., 2009.
|
|
1598
|
+
|
|
1599
|
+
References
|
|
1600
|
+
----------
|
|
1601
|
+
Cao, Q., and G. Zhang, 2009:
|
|
1602
|
+
Errors in Estimating Raindrop Size Distribution Parameters Employing Disdrometer and Simulated Raindrop Spectra.
|
|
1603
|
+
J. Appl. Meteor. Climatol., 48, 406-425, https://doi.org/10.1175/2008JAMC2026.1.
|
|
1604
|
+
"""
|
|
1605
|
+
# TODO: really bad results. check formula !
|
|
1606
|
+
G = M1**3 / M0 / M2
|
|
1607
|
+
mu = 1 / (1 - G) - 2
|
|
1608
|
+
Lambda = M0 / M1 * (mu + 1)
|
|
1609
|
+
N0 = Lambda ** (mu + 1) * M0 / gamma(mu + 1)
|
|
1610
|
+
return N0, mu, Lambda
|
|
1611
|
+
|
|
1612
|
+
|
|
1613
|
+
def get_gamma_parameters_M234(M2, M3, M4):
|
|
1614
|
+
"""Compute gamma distribution parameters following Cao et al., 2009.
|
|
1615
|
+
|
|
1616
|
+
References
|
|
1617
|
+
----------
|
|
1618
|
+
Cao, Q., and G. Zhang, 2009:
|
|
1619
|
+
Errors in Estimating Raindrop Size Distribution Parameters Employing Disdrometer and Simulated Raindrop Spectra.
|
|
1620
|
+
J. Appl. Meteor. Climatol., 48, 406-425, https://doi.org/10.1175/2008JAMC2026.1.
|
|
1621
|
+
"""
|
|
1622
|
+
G = M3**2 / M2 / M4
|
|
1623
|
+
mu = 1 / (1 - G) - 4
|
|
1624
|
+
Lambda = M2 / M3 * (mu + 3)
|
|
1625
|
+
N0 = Lambda ** (mu + 3) * M2 / gamma(mu + 3)
|
|
1626
|
+
return N0, mu, Lambda
|
|
1627
|
+
|
|
1628
|
+
|
|
1629
|
+
def get_gamma_parameters_M246(M2, M4, M6):
|
|
1630
|
+
"""Compute gamma distribution parameters following Ulbrich 1998.
|
|
1631
|
+
|
|
1632
|
+
References
|
|
1633
|
+
----------
|
|
1634
|
+
Ulbrich, C. W., and D. Atlas, 1998:
|
|
1635
|
+
Rainfall Microphysics and Radar Properties: Analysis Methods for Drop Size Spectra.
|
|
1636
|
+
J. Appl. Meteor. Climatol., 37, 912-923,
|
|
1637
|
+
https://doi.org/10.1175/1520-0450(1998)037<0912:RMARPA>2.0.CO;2
|
|
1638
|
+
|
|
1639
|
+
Cao, Q., and G. Zhang, 2009:
|
|
1640
|
+
Errors in Estimating Raindrop Size Distribution Parameters Employing Disdrometer and Simulated Raindrop Spectra.
|
|
1641
|
+
J. Appl. Meteor. Climatol., 48, 406-425, https://doi.org/10.1175/2008JAMC2026.1.
|
|
1642
|
+
|
|
1643
|
+
Thurai, M., Williams, C.R., Bringi, V.N., 2014:
|
|
1644
|
+
Examining the correlations between drop size distribution parameters using data
|
|
1645
|
+
from two side-by-side 2D-video disdrometers.
|
|
1646
|
+
Atmospheric Research, 144, 95-110, https://doi.org/10.1016/j.atmosres.2014.01.002.
|
|
1647
|
+
"""
|
|
1648
|
+
G = M4**2 / M2 / M6
|
|
1649
|
+
|
|
1650
|
+
# TODO: Different formulas !
|
|
1651
|
+
# Thurai et al., 2014 (A4), Ulbrich et al., 1998 (2)
|
|
1652
|
+
# mu = ((7.0 - 11.0 * G) -
|
|
1653
|
+
# np.sqrt((7.0 - 11.0 * G) ** 2.0 - 4.0 * (G - 1.0) * (30.0 * G - 12.0)) / (2.0 * (G - 1.0)))
|
|
1654
|
+
mu = (7.0 - 11.0 * G) - np.sqrt(G**2 + 89 * G + 1) / (2.0 * (G - 1.0))
|
|
1655
|
+
|
|
1656
|
+
# Cao et al., 2009 (B3)
|
|
1657
|
+
# --> Wrong ???
|
|
1658
|
+
mu = (7.0 - 11.0 * G) - np.sqrt(G**2 + 14 * G + 1) / (2.0 * (G - 1.0))
|
|
1659
|
+
|
|
1660
|
+
Lambda = np.sqrt((4 + mu) * (3 + mu) * M2 / M4)
|
|
1661
|
+
# Cao et al., 2009
|
|
1662
|
+
N0 = M2 * Lambda ** (3 + mu) / gamma(3 + mu)
|
|
1663
|
+
# # Thurai et al., 2014
|
|
1664
|
+
# N0 = M3 * Lambda ** (4 + mu) / gamma(4 + mu)
|
|
1665
|
+
# # Ulbrich et al., 1998
|
|
1666
|
+
# N0 = M6 * Lambda ** (7.0 + mu) / gamma(7 + mu)
|
|
1667
|
+
return N0, mu, Lambda
|
|
1668
|
+
|
|
1669
|
+
|
|
1670
|
+
def get_gamma_parameters_M456(M4, M5, M6):
|
|
1671
|
+
"""Compute gamma distribution parameters following Cao et al., 2009.
|
|
1672
|
+
|
|
1673
|
+
References
|
|
1674
|
+
----------
|
|
1675
|
+
Cao, Q., and G. Zhang, 2009:
|
|
1676
|
+
Errors in Estimating Raindrop Size Distribution Parameters Employing Disdrometer and Simulated Raindrop Spectra.
|
|
1677
|
+
J. Appl. Meteor. Climatol., 48, 406-425, https://doi.org/10.1175/2008JAMC2026.1.
|
|
1678
|
+
"""
|
|
1679
|
+
G = M5**2 / M4 / M6
|
|
1680
|
+
mu = 1 / (1 - G) - 6
|
|
1681
|
+
Lambda = M4 / M5 * (mu + 5)
|
|
1682
|
+
N0 = Lambda ** (mu + 5) * M4 / gamma(mu + 5)
|
|
1683
|
+
return N0, mu, Lambda
|
|
1684
|
+
|
|
1685
|
+
|
|
1686
|
+
def get_gamma_parameters_M346(M3, M4, M6):
|
|
1687
|
+
"""Compute gamma distribution parameters following Kozu 1991.
|
|
1688
|
+
|
|
1689
|
+
References
|
|
1690
|
+
----------
|
|
1691
|
+
Kozu, T., and K. Nakamura, 1991:
|
|
1692
|
+
Rainfall Parameter Estimation from Dual-Radar Measurements
|
|
1693
|
+
Combining Reflectivity Profile and Path-integrated Attenuation.
|
|
1694
|
+
J. Atmos. Oceanic Technol., 8, 259-270, https://doi.org/10.1175/1520-0426(1991)008<0259:RPEFDR>2.0.CO;2
|
|
1695
|
+
|
|
1696
|
+
Tokay, A., and D. A. Short, 1996:
|
|
1697
|
+
Evidence from Tropical Raindrop Spectra of the Origin of Rain from
|
|
1698
|
+
Stratiform versus Convective Clouds.
|
|
1699
|
+
J. Appl. Meteor. Climatol., 35, 355-371,
|
|
1700
|
+
https://doi.org/10.1175/1520-0450(1996)035<0355:EFTRSO>2.0.CO;2
|
|
1701
|
+
|
|
1702
|
+
Cao, Q., and G. Zhang, 2009:
|
|
1703
|
+
Errors in Estimating Raindrop Size Distribution Parameters Employing Disdrometer and Simulated Raindrop Spectra.
|
|
1704
|
+
J. Appl. Meteor. Climatol., 48, 406-425, https://doi.org/10.1175/2008JAMC2026.1.
|
|
1705
|
+
"""
|
|
1706
|
+
G = M4**3 / M3**2 / M6
|
|
1707
|
+
|
|
1708
|
+
# Kozu
|
|
1709
|
+
mu = (5.5 * G - 4 + np.sqrt(G * (G * 0.25 + 2))) / (1 - G)
|
|
1710
|
+
|
|
1711
|
+
# Cao et al., 2009 (equivalent)
|
|
1712
|
+
# mu = (11 * G - 8 + np.sqrt(G * (G + 8))) / (2 * (1 - G))
|
|
1713
|
+
|
|
1714
|
+
Lambda = (mu + 4) * M3 / M4
|
|
1715
|
+
N0 = Lambda ** (mu + 4) * M3 / gamma(mu + 4)
|
|
1716
|
+
return N0, mu, Lambda
|
|
1717
|
+
|
|
1718
|
+
|
|
1719
|
+
def get_lognormal_parameters_M346(M3, M4, M6):
|
|
1720
|
+
"""Compute lognormal distribution parameters following Kozu1991.
|
|
1721
|
+
|
|
1722
|
+
References
|
|
1723
|
+
----------
|
|
1724
|
+
Kozu, T., and K. Nakamura, 1991:
|
|
1725
|
+
Rainfall Parameter Estimation from Dual-Radar Measurements
|
|
1726
|
+
Combining Reflectivity Profile and Path-integrated Attenuation.
|
|
1727
|
+
J. Atmos. Oceanic Technol., 8, 259-270, https://doi.org/10.1175/1520-0426(1991)008<0259:RPEFDR>2.0.CO;2
|
|
1728
|
+
"""
|
|
1729
|
+
L3 = np.log(M3)
|
|
1730
|
+
L4 = np.log(M4)
|
|
1731
|
+
L6 = np.log(M6)
|
|
1732
|
+
Nt = np.exp((24 * L3 - 27 * L4 - 6 * L6) / 3)
|
|
1733
|
+
mu = (-10 * L3 + 13.5 * L4 - 3.5 * L6) / 3
|
|
1734
|
+
sigma = (2 * L3 - 3 * L4 + L6) / 3
|
|
1735
|
+
return Nt, mu, sigma
|
|
1736
|
+
|
|
1737
|
+
|
|
1738
|
+
def _get_gamma_parameters_mom(ds: xr.Dataset, mom_method: str) -> xr.Dataset:
|
|
1739
|
+
# Get the correct function and list of variables for the requested method
|
|
1740
|
+
func, needed_moments = MOM_METHODS_DICT["GammaPSD"][mom_method]
|
|
1741
|
+
|
|
1742
|
+
# Extract the required arrays from the dataset
|
|
1743
|
+
arrs = [ds[var_name] for var_name in needed_moments]
|
|
1744
|
+
|
|
1745
|
+
# Apply the function. This will produce (mu, Lambda, N0) with the same coords/shapes as input data
|
|
1746
|
+
N0, mu, Lambda = func(*arrs)
|
|
1747
|
+
|
|
1748
|
+
# Return a new Dataset containing the results
|
|
1749
|
+
ds = xr.Dataset(
|
|
1750
|
+
{
|
|
1751
|
+
"N0": N0,
|
|
1752
|
+
"mu": mu,
|
|
1753
|
+
"Lambda": Lambda,
|
|
1754
|
+
},
|
|
1755
|
+
coords=ds.coords,
|
|
1756
|
+
)
|
|
1757
|
+
return ds
|
|
1758
|
+
|
|
1759
|
+
|
|
1760
|
+
def _get_lognormal_parameters_mom(ds: xr.Dataset, mom_method: str) -> xr.Dataset:
|
|
1761
|
+
# Get the correct function and list of variables for the requested method
|
|
1762
|
+
func, needed_moments = MOM_METHODS_DICT["LognormalPSD"][mom_method]
|
|
1763
|
+
|
|
1764
|
+
# Extract the required arrays from the dataset
|
|
1765
|
+
arrs = [ds[var_name] for var_name in needed_moments]
|
|
1766
|
+
|
|
1767
|
+
# Apply the function. This will produce (mu, Lambda, N0) with the same coords/shapes as input data
|
|
1768
|
+
Nt, mu, sigma = func(*arrs)
|
|
1769
|
+
|
|
1770
|
+
# Return a new Dataset containing the results
|
|
1771
|
+
ds = xr.Dataset(
|
|
1772
|
+
{
|
|
1773
|
+
"Nt": Nt,
|
|
1774
|
+
"mu": mu,
|
|
1775
|
+
"sigma": sigma,
|
|
1776
|
+
},
|
|
1777
|
+
coords=ds.coords,
|
|
1778
|
+
)
|
|
1779
|
+
return ds
|
|
1780
|
+
|
|
1781
|
+
|
|
1782
|
+
def _get_exponential_parameters_mom(ds: xr.Dataset, mom_method: str) -> xr.Dataset:
|
|
1783
|
+
# Get the correct function and list of variables for the requested method
|
|
1784
|
+
func, needed_moments = MOM_METHODS_DICT["ExponentialPSD"][mom_method]
|
|
1785
|
+
|
|
1786
|
+
# Extract the required arrays from the dataset
|
|
1787
|
+
arrs = [ds[var_name] for var_name in needed_moments]
|
|
1788
|
+
|
|
1789
|
+
# Apply the function. This will produce (mu, Lambda, N0) with the same coords/shapes as input data
|
|
1790
|
+
N0, Lambda = func(*arrs)
|
|
1791
|
+
|
|
1792
|
+
# Return a new Dataset containing the results
|
|
1793
|
+
ds = xr.Dataset(
|
|
1794
|
+
{
|
|
1795
|
+
"N0": N0,
|
|
1796
|
+
"Lambda": Lambda,
|
|
1797
|
+
},
|
|
1798
|
+
coords=ds.coords,
|
|
1799
|
+
)
|
|
1800
|
+
return ds
|
|
1801
|
+
|
|
1802
|
+
|
|
1803
|
+
####--------------------------------------------------------------------------------------.
|
|
1804
|
+
#### Routines dictionary
|
|
1805
|
+
|
|
1806
|
+
|
|
1807
|
+
MOM_METHODS_DICT = {
|
|
1808
|
+
"GammaPSD": {
|
|
1809
|
+
# "M012": (get_gamma_parameters_M012, ["M0", "M1", "M2"]),
|
|
1810
|
+
"M234": (get_gamma_parameters_M234, ["M2", "M3", "M4"]),
|
|
1811
|
+
"M246": (get_gamma_parameters_M246, ["M2", "M4", "M6"]),
|
|
1812
|
+
"M456": (get_gamma_parameters_M456, ["M4", "M5", "M6"]),
|
|
1813
|
+
"M346": (get_gamma_parameters_M346, ["M3", "M4", "M6"]),
|
|
1814
|
+
},
|
|
1815
|
+
"LognormalPSD": {
|
|
1816
|
+
"M346": (get_lognormal_parameters_M346, ["M3", "M4", "M6"]),
|
|
1817
|
+
},
|
|
1818
|
+
"ExponentialPSD": {
|
|
1819
|
+
"M234": (get_exponential_parameters_M34, ["M3", "M4"]),
|
|
1820
|
+
},
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
|
|
1824
|
+
OPTIMIZATION_ROUTINES_DICT = {
|
|
1825
|
+
"MOM": {
|
|
1826
|
+
"GammaPSD": _get_gamma_parameters_mom,
|
|
1827
|
+
"LognormalPSD": _get_lognormal_parameters_mom,
|
|
1828
|
+
"ExponentialPSD": _get_exponential_parameters_mom,
|
|
1829
|
+
},
|
|
1830
|
+
"GS": {
|
|
1831
|
+
"GammaPSD": get_gamma_parameters_gs,
|
|
1832
|
+
"NormalizedGammaPSD": get_normalized_gamma_parameters_gs,
|
|
1833
|
+
"LognormalPSD": get_lognormal_parameters_gs,
|
|
1834
|
+
"ExponentialPSD": get_exponential_parameters_gs,
|
|
1835
|
+
},
|
|
1836
|
+
"ML": {
|
|
1837
|
+
"GammaPSD": get_gamma_parameters,
|
|
1838
|
+
"LognormalPSD": get_lognormal_parameters,
|
|
1839
|
+
"ExponentialPSD": get_exponential_parameters,
|
|
1840
|
+
},
|
|
1841
|
+
}
|
|
1842
|
+
|
|
1843
|
+
|
|
1844
|
+
def available_mom_methods(psd_model):
|
|
1845
|
+
"""Implemented MOM methods for a given PSD model."""
|
|
1846
|
+
return list(MOM_METHODS_DICT[psd_model])
|
|
1847
|
+
|
|
1848
|
+
|
|
1849
|
+
def available_optimization(psd_model):
|
|
1850
|
+
"""Implemented fitting methods for a given PSD model."""
|
|
1851
|
+
return [opt for opt in list(OPTIMIZATION_ROUTINES_DICT) if psd_model in OPTIMIZATION_ROUTINES_DICT[opt]]
|
|
1852
|
+
|
|
1853
|
+
|
|
1854
|
+
####--------------------------------------------------------------------------------------.
|
|
1855
|
+
#### Argument checkers
|
|
1856
|
+
|
|
1857
|
+
|
|
1858
|
+
def check_psd_model(psd_model, optimization):
|
|
1859
|
+
"""Check valid psd_model argument."""
|
|
1860
|
+
valid_psd_models = list(OPTIMIZATION_ROUTINES_DICT[optimization])
|
|
1861
|
+
if psd_model not in valid_psd_models:
|
|
1862
|
+
msg = (
|
|
1863
|
+
f"{optimization} optimization is not available for 'psd_model' {psd_model}. "
|
|
1864
|
+
f"Accepted PSD models are {valid_psd_models}."
|
|
1865
|
+
)
|
|
1866
|
+
raise ValueError(msg)
|
|
1867
|
+
|
|
1868
|
+
|
|
1869
|
+
def check_target(target):
|
|
1870
|
+
"""Check valid target argument."""
|
|
1871
|
+
valid_targets = ["ND", "R", "Z", "LWC"]
|
|
1872
|
+
if target not in valid_targets:
|
|
1873
|
+
raise ValueError(f"Invalid 'target' {target}. Valid targets are {valid_targets}.")
|
|
1874
|
+
return target
|
|
1875
|
+
|
|
1876
|
+
|
|
1877
|
+
def check_transformation(transformation):
|
|
1878
|
+
"""Check valid transformation argument."""
|
|
1879
|
+
valid_transformation = ["identity", "log", "sqrt"]
|
|
1880
|
+
if transformation not in valid_transformation:
|
|
1881
|
+
raise ValueError(
|
|
1882
|
+
f"Invalid 'transformation' {transformation}. Valid transformations are {transformation}.",
|
|
1883
|
+
)
|
|
1884
|
+
return transformation
|
|
1885
|
+
|
|
1886
|
+
|
|
1887
|
+
def check_likelihood(likelihood):
|
|
1888
|
+
"""Check valid likelihood argument."""
|
|
1889
|
+
valid_likelihood = ["multinomial", "poisson"]
|
|
1890
|
+
if likelihood not in valid_likelihood:
|
|
1891
|
+
raise ValueError(f"Invalid 'likelihood' {likelihood}. Valid values are {valid_likelihood}.")
|
|
1892
|
+
return likelihood
|
|
1893
|
+
|
|
1894
|
+
|
|
1895
|
+
def check_truncated_likelihood(truncated_likelihood):
|
|
1896
|
+
"""Check valid truncated_likelihood argument."""
|
|
1897
|
+
if not isinstance(truncated_likelihood, bool):
|
|
1898
|
+
raise TypeError(f"Invalid 'truncated_likelihood' argument {truncated_likelihood}. Must be True or False.")
|
|
1899
|
+
return truncated_likelihood
|
|
1900
|
+
|
|
1901
|
+
|
|
1902
|
+
def check_probability_method(probability_method):
|
|
1903
|
+
"""Check valid probability_method argument."""
|
|
1904
|
+
# Check valid probability_method
|
|
1905
|
+
valid_probability_method = ["cdf", "pdf"]
|
|
1906
|
+
if probability_method not in valid_probability_method:
|
|
1907
|
+
raise ValueError(
|
|
1908
|
+
f"Invalid 'probability_method' {probability_method}. Valid values are {valid_probability_method}.",
|
|
1909
|
+
)
|
|
1910
|
+
return probability_method
|
|
1911
|
+
|
|
1912
|
+
|
|
1913
|
+
def check_optimizer(optimizer):
|
|
1914
|
+
"""Check valid optimizer argument."""
|
|
1915
|
+
# Check valid probability_method
|
|
1916
|
+
valid_optimizer = ["Nelder-Mead", "Powell", "L-BFGS-B"]
|
|
1917
|
+
if optimizer not in valid_optimizer:
|
|
1918
|
+
raise ValueError(
|
|
1919
|
+
f"Invalid 'optimizer' {optimizer}. Valid values are {valid_optimizer}.",
|
|
1920
|
+
)
|
|
1921
|
+
return optimizer
|
|
1922
|
+
|
|
1923
|
+
|
|
1924
|
+
def check_mom_methods(mom_methods, psd_model):
|
|
1925
|
+
"""Check valid mom_methods arguments."""
|
|
1926
|
+
if isinstance(mom_methods, str):
|
|
1927
|
+
mom_methods = [mom_methods]
|
|
1928
|
+
valid_mom_methods = available_mom_methods(psd_model)
|
|
1929
|
+
invalid_mom_methods = np.array(mom_methods)[np.isin(mom_methods, valid_mom_methods, invert=True)]
|
|
1930
|
+
if len(invalid_mom_methods) > 0:
|
|
1931
|
+
raise ValueError(
|
|
1932
|
+
f"Unknown mom_methods '{invalid_mom_methods}' for {psd_model}. Choose from {valid_mom_methods}.",
|
|
1933
|
+
)
|
|
1934
|
+
return mom_methods
|
|
1935
|
+
|
|
1936
|
+
|
|
1937
|
+
def check_optimization(optimization):
|
|
1938
|
+
"""Check valid optimization argument."""
|
|
1939
|
+
valid_optimization = list(OPTIMIZATION_ROUTINES_DICT)
|
|
1940
|
+
if optimization not in valid_optimization:
|
|
1941
|
+
raise ValueError(
|
|
1942
|
+
f"Invalid 'optimization' {optimization}. Valid procedure are {valid_optimization}.",
|
|
1943
|
+
)
|
|
1944
|
+
return optimization
|
|
1945
|
+
|
|
1946
|
+
|
|
1947
|
+
def check_optimization_kwargs(optimization_kwargs, optimization, psd_model):
|
|
1948
|
+
"""Check valid optimization_kwargs."""
|
|
1949
|
+
dict_arguments = {
|
|
1950
|
+
"ML": {
|
|
1951
|
+
"init_method": None,
|
|
1952
|
+
"probability_method": check_probability_method,
|
|
1953
|
+
"likelihood": check_likelihood,
|
|
1954
|
+
"truncated_likelihood": check_truncated_likelihood,
|
|
1955
|
+
"optimizer": check_optimizer,
|
|
1956
|
+
},
|
|
1957
|
+
"GS": {
|
|
1958
|
+
"target": check_target,
|
|
1959
|
+
"transformation": check_transformation,
|
|
1960
|
+
"error_order": None,
|
|
1961
|
+
},
|
|
1962
|
+
"MOM": {
|
|
1963
|
+
"mom_methods": None,
|
|
1964
|
+
},
|
|
1965
|
+
}
|
|
1966
|
+
optimization = check_optimization(optimization)
|
|
1967
|
+
check_psd_model(psd_model=psd_model, optimization=optimization)
|
|
1968
|
+
|
|
1969
|
+
# Retrieve the expected arguments for the given optimization method
|
|
1970
|
+
expected_arguments = dict_arguments.get(optimization, {})
|
|
1971
|
+
|
|
1972
|
+
# Check for missing arguments in optimization_kwargs
|
|
1973
|
+
missing_args = [arg for arg in expected_arguments if arg not in optimization_kwargs]
|
|
1974
|
+
if missing_args:
|
|
1975
|
+
raise ValueError(f"Missing required arguments for {optimization} optimization: {missing_args}")
|
|
1976
|
+
|
|
1977
|
+
# Validate argument values
|
|
1978
|
+
_ = [check(optimization_kwargs[arg]) for arg, check in expected_arguments.items() if callable(check)]
|
|
1979
|
+
|
|
1980
|
+
# Further special checks
|
|
1981
|
+
if optimization == "MOM":
|
|
1982
|
+
_ = check_mom_methods(mom_methods=optimization_kwargs["mom_methods"], psd_model=psd_model)
|
|
1983
|
+
if optimization == "ML" and optimization_kwargs["init_method"] is not None:
|
|
1984
|
+
_ = check_mom_methods(mom_methods=optimization_kwargs["init_method"], psd_model=psd_model)
|
|
1985
|
+
|
|
1986
|
+
|
|
1987
|
+
####--------------------------------------------------------------------------------------.
|
|
1988
|
+
#### Wrappers for fitting
|
|
1989
|
+
|
|
1990
|
+
|
|
1991
|
+
def get_mom_parameters(ds: xr.Dataset, psd_model: str, mom_methods: str) -> xr.Dataset:
|
|
1992
|
+
"""
|
|
1993
|
+
Compute PSD model parameters using various method-of-moments (MOM) approaches.
|
|
1994
|
+
|
|
1995
|
+
The method is specified by the `mom_methods` acronym, e.g. 'M012', 'M234', 'M246'.
|
|
1996
|
+
|
|
1997
|
+
Parameters
|
|
1998
|
+
----------
|
|
1999
|
+
ds : xarray.Dataset
|
|
2000
|
+
An xarray Dataset with the required moments M0...M6 as data variables.
|
|
2001
|
+
mom_methods: str or list
|
|
2002
|
+
Valid MOM methods are {'M012', 'M234', 'M246', 'M456', 'M346'}.
|
|
2003
|
+
|
|
2004
|
+
Returns
|
|
2005
|
+
-------
|
|
2006
|
+
xarray.Dataset
|
|
2007
|
+
A Dataset containing mu, Lambda, and N0 variables.
|
|
2008
|
+
If multiple mom_methods are specified, the dataset has the dimension mom_method.
|
|
2009
|
+
|
|
2010
|
+
"""
|
|
2011
|
+
# Check inputs
|
|
2012
|
+
check_psd_model(psd_model=psd_model, optimization="MOM")
|
|
2013
|
+
mom_methods = check_mom_methods(mom_methods, psd_model=psd_model)
|
|
2014
|
+
|
|
2015
|
+
# Retrieve function
|
|
2016
|
+
func = OPTIMIZATION_ROUTINES_DICT["MOM"][psd_model]
|
|
2017
|
+
|
|
2018
|
+
# Compute parameters
|
|
2019
|
+
if len(mom_methods) == 1:
|
|
2020
|
+
ds = func(ds=ds, mom_method=mom_methods[0])
|
|
2021
|
+
ds.attrs["mom_method"] = mom_methods[0]
|
|
2022
|
+
return ds
|
|
2023
|
+
list_ds = [func(ds=ds, mom_method=mom_method) for mom_method in mom_methods]
|
|
2024
|
+
ds = xr.concat(list_ds, dim="mom_method")
|
|
2025
|
+
ds = ds.assign_coords({"mom_method": mom_methods})
|
|
2026
|
+
return ds
|
|
2027
|
+
|
|
2028
|
+
|
|
2029
|
+
def get_ml_parameters(
|
|
2030
|
+
ds,
|
|
2031
|
+
psd_model,
|
|
2032
|
+
init_method=None,
|
|
2033
|
+
probability_method="cdf",
|
|
2034
|
+
likelihood="multinomial",
|
|
2035
|
+
truncated_likelihood=True,
|
|
2036
|
+
optimizer="Nelder-Mead",
|
|
2037
|
+
):
|
|
2038
|
+
"""
|
|
2039
|
+
Estimate model parameters for a given distribution using Maximum Likelihood.
|
|
2040
|
+
|
|
2041
|
+
Parameters
|
|
2042
|
+
----------
|
|
2043
|
+
ds : xarray.Dataset
|
|
2044
|
+
Input dataset containing drop number concentration data and diameter information.
|
|
2045
|
+
It must include the following variables:
|
|
2046
|
+
- ``drop_number_concentration``: The number concentration of drops.
|
|
2047
|
+
- ``diameter_bin_width``": The width of each diameter bin.
|
|
2048
|
+
- ``diameter_bin_lower``: The lower bounds of the diameter bins.
|
|
2049
|
+
- ``diameter_bin_upper``: The upper bounds of the diameter bins.
|
|
2050
|
+
- ``diameter_bin_center``: The center values of the diameter bins.
|
|
2051
|
+
psd_model : str
|
|
2052
|
+
The PSD model to fit. See ``available_psd_models()``.
|
|
2053
|
+
init_method: str or list
|
|
2054
|
+
The method(s) of moments used to initialize the PSD model parameters.
|
|
2055
|
+
See ``available_mom_methods(psd_model)``.
|
|
2056
|
+
probability_method : str, optional
|
|
2057
|
+
Method to compute probabilities. The default value is ``cdf``.
|
|
2058
|
+
likelihood : str, optional
|
|
2059
|
+
Likelihood function to use for fitting. The default value is ``multinomial``.
|
|
2060
|
+
truncated_likelihood : bool, optional
|
|
2061
|
+
Whether to use Truncated Maximum Likelihood (TML). The default value is ``True``.
|
|
2062
|
+
optimizer : str, optional
|
|
2063
|
+
Optimization method to use. The default value is ``Nelder-Mead``.
|
|
2064
|
+
|
|
2065
|
+
Returns
|
|
2066
|
+
-------
|
|
2067
|
+
xarray.Dataset
|
|
2068
|
+
The dataset containing the estimated parameters.
|
|
2069
|
+
|
|
2070
|
+
"""
|
|
2071
|
+
# -----------------------------------------------------------------------------.
|
|
2072
|
+
# Check arguments
|
|
2073
|
+
check_psd_model(psd_model, optimization="ML")
|
|
2074
|
+
likelihood = check_likelihood(likelihood)
|
|
2075
|
+
probability_method = check_probability_method(probability_method)
|
|
2076
|
+
optimizer = check_optimizer(optimizer)
|
|
2077
|
+
|
|
2078
|
+
# Check valid init_method
|
|
2079
|
+
if init_method is not None:
|
|
2080
|
+
init_method = check_mom_methods(mom_methods=init_method, psd_model=psd_model)
|
|
2081
|
+
|
|
2082
|
+
# Retrieve estimation function
|
|
2083
|
+
func = OPTIMIZATION_ROUTINES_DICT["ML"][psd_model]
|
|
2084
|
+
|
|
2085
|
+
# Retrieve parameters
|
|
2086
|
+
ds_params = func(
|
|
2087
|
+
ds=ds,
|
|
2088
|
+
init_method=init_method,
|
|
2089
|
+
probability_method=probability_method,
|
|
2090
|
+
likelihood=likelihood,
|
|
2091
|
+
truncated_likelihood=truncated_likelihood,
|
|
2092
|
+
optimizer=optimizer,
|
|
2093
|
+
)
|
|
2094
|
+
# Return dataset with parameters
|
|
2095
|
+
return ds_params
|
|
2096
|
+
|
|
2097
|
+
|
|
2098
|
+
def get_gs_parameters(ds, psd_model, target="ND", transformation="log", error_order=1):
|
|
2099
|
+
"""Retrieve PSD model parameters using Grid Search."""
|
|
2100
|
+
# Check valid psd_model
|
|
2101
|
+
check_psd_model(psd_model, optimization="GS")
|
|
2102
|
+
|
|
2103
|
+
# Check valid target
|
|
2104
|
+
target = check_target(target)
|
|
2105
|
+
|
|
2106
|
+
# Check valid transformation
|
|
2107
|
+
transformation = check_transformation(transformation)
|
|
2108
|
+
|
|
2109
|
+
# Retrieve estimation function
|
|
2110
|
+
func = OPTIMIZATION_ROUTINES_DICT["GS"][psd_model]
|
|
2111
|
+
|
|
2112
|
+
# Estimate parameters
|
|
2113
|
+
ds_params = func(ds, target=target, transformation=transformation, error_order=error_order)
|
|
2114
|
+
|
|
2115
|
+
# Return dataset with parameters
|
|
2116
|
+
return ds_params
|
|
2117
|
+
|
|
2118
|
+
|
|
2119
|
+
def estimate_model_parameters(
|
|
2120
|
+
ds,
|
|
2121
|
+
psd_model,
|
|
2122
|
+
optimization,
|
|
2123
|
+
optimization_kwargs,
|
|
2124
|
+
):
|
|
2125
|
+
"""Routine to estimate PSD model parameters."""
|
|
2126
|
+
optimization = check_optimization(optimization)
|
|
2127
|
+
check_optimization_kwargs(optimization_kwargs=optimization_kwargs, optimization=optimization, psd_model=psd_model)
|
|
2128
|
+
|
|
2129
|
+
# Define function
|
|
2130
|
+
dict_func = {
|
|
2131
|
+
"ML": get_ml_parameters,
|
|
2132
|
+
"MOM": get_mom_parameters,
|
|
2133
|
+
"GS": get_gs_parameters,
|
|
2134
|
+
}
|
|
2135
|
+
func = dict_func[optimization]
|
|
2136
|
+
|
|
2137
|
+
# Retrieve parameters
|
|
2138
|
+
ds_params = func(ds, psd_model=psd_model, **optimization_kwargs)
|
|
2139
|
+
|
|
2140
|
+
# Finalize attributes
|
|
2141
|
+
ds_params.attrs["disdrodb_psd_model"] = psd_model
|
|
2142
|
+
ds_params.attrs["disdrodb_psd_optimization"] = optimization
|
|
2143
|
+
if optimization == "GS":
|
|
2144
|
+
ds_params.attrs["disdrodb_psd_optimization_target"] = optimization_kwargs["target"]
|
|
2145
|
+
|
|
2146
|
+
return ds_params
|