richvalues 4.1.4__tar.gz → 4.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {richvalues-4.1.4 → richvalues-4.2.0}/PKG-INFO +1 -1
- {richvalues-4.1.4 → richvalues-4.2.0}/pyproject.toml +1 -1
- {richvalues-4.1.4 → richvalues-4.2.0}/richvalues/__init__.py +656 -127
- {richvalues-4.1.4 → richvalues-4.2.0}/richvalues.egg-info/PKG-INFO +1 -1
- {richvalues-4.1.4 → richvalues-4.2.0}/setup.py +1 -1
- {richvalues-4.1.4 → richvalues-4.2.0}/LICENSE +0 -0
- {richvalues-4.1.4 → richvalues-4.2.0}/README.md +0 -0
- {richvalues-4.1.4 → richvalues-4.2.0}/richvalues.egg-info/SOURCES.txt +0 -0
- {richvalues-4.1.4 → richvalues-4.2.0}/richvalues.egg-info/dependency_links.txt +0 -0
- {richvalues-4.1.4 → richvalues-4.2.0}/richvalues.egg-info/requires.txt +0 -0
- {richvalues-4.1.4 → richvalues-4.2.0}/richvalues.egg-info/top_level.txt +0 -0
- {richvalues-4.1.4 → richvalues-4.2.0}/setup.cfg +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "richvalues"
|
7
|
-
version = "4.
|
7
|
+
version = "4.2.0"
|
8
8
|
description = "Python library for working with uncertainties and upper/lower limits"
|
9
9
|
license = {file="LICENSE"}
|
10
10
|
authors = [{name="Andrés Megías Toledano"}]
|
@@ -4,7 +4,7 @@
|
|
4
4
|
"""
|
5
5
|
Rich Values Library
|
6
6
|
-------------------
|
7
|
-
Version 4.
|
7
|
+
Version 4.2
|
8
8
|
|
9
9
|
Copyright (C) 2024 - Andrés Megías Toledano
|
10
10
|
|
@@ -37,17 +37,19 @@ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
37
37
|
POSSIBILITY OF SUCH DAMAGE.
|
38
38
|
"""
|
39
39
|
|
40
|
-
__version__ = '4.
|
40
|
+
__version__ = '4.2.0'
|
41
41
|
__author__ = 'Andrés Megías Toledano'
|
42
42
|
|
43
43
|
import copy
|
44
44
|
import math
|
45
|
+
import warnings
|
45
46
|
import itertools
|
46
47
|
import numpy as np
|
47
48
|
import pandas as pd
|
48
49
|
import matplotlib.pyplot as plt
|
49
|
-
|
50
|
-
|
50
|
+
import scipy.stats
|
51
|
+
import scipy.optimize
|
52
|
+
import scipy.interpolate
|
51
53
|
|
52
54
|
variable_count = 0
|
53
55
|
variable_dict = {}
|
@@ -60,7 +62,7 @@ defaultparams = {
|
|
60
62
|
'maximum number of decimals': 5,
|
61
63
|
'limit for extra significant figure': 2.5,
|
62
64
|
'use extra significant figure for exact values': True,
|
63
|
-
'use extra significant figure for finite intervals': True,
|
65
|
+
'use extra significant figure for finite intervals': True,
|
64
66
|
'omit ones in scientific notation in LaTeX': False,
|
65
67
|
'multiplication symbol for scientific notation in LaTeX': '\\cdot',
|
66
68
|
'sigmas to define upper/lower limits from read values': 2.,
|
@@ -75,7 +77,9 @@ defaultparams = {
|
|
75
77
|
'show domain': False,
|
76
78
|
'assume integers': False,
|
77
79
|
'show asterisk for rich values with custom PDF': True,
|
78
|
-
'save PDF in rich values': False
|
80
|
+
'save PDF in rich values': False,
|
81
|
+
'use split normal distributions to model rich values': False,
|
82
|
+
'use generative extreme value distributions to model rich values': False,
|
79
83
|
}
|
80
84
|
|
81
85
|
original_defaultparams = copy.copy(defaultparams)
|
@@ -234,7 +238,7 @@ def round_sf_unc(x, dx, n=None, min_exp=None, max_dec=None, extra_sf_lim=None):
|
|
234
238
|
if not use_exp:
|
235
239
|
m = len(dy.split('.')[1]) if '.' in dy else 0
|
236
240
|
y = '{:.{}f}'.format(x, m) if x != 0. else '0'
|
237
|
-
if m == 0:
|
241
|
+
if m == 0 and dx < x:
|
238
242
|
num_digits_y = len(y)
|
239
243
|
num_digits_dy = len(dy)
|
240
244
|
m = n + int(num_digits_y - num_digits_dy)
|
@@ -244,7 +248,7 @@ def round_sf_unc(x, dx, n=None, min_exp=None, max_dec=None, extra_sf_lim=None):
|
|
244
248
|
base_dy = '{:e}'.format(float(dy)).split('e')[0]
|
245
249
|
if float(base_dy) <= extra_sf_lim:
|
246
250
|
m += 1
|
247
|
-
y = round_sf(x, m, min_exp, extra_sf_lim=1-1e-8)
|
251
|
+
y = round_sf(x, m, min_exp=np.inf, extra_sf_lim=1-1e-8)
|
248
252
|
else:
|
249
253
|
base_y, exp_y = '{:e}'.format(x).split('e')
|
250
254
|
base_dy, exp_dy = '{:e}'.format(dx).split('e')
|
@@ -260,7 +264,6 @@ def round_sf_unc(x, dx, n=None, min_exp=None, max_dec=None, extra_sf_lim=None):
|
|
260
264
|
base_y = '{:.{}f}'.format(float(base_y), m)
|
261
265
|
base_dy = '{:.{}f}'.format(float(base_dy), m)
|
262
266
|
y = '{}e{}'.format(base_y, exp_y)
|
263
|
-
dy = '{}e{}'.format(base_dy, exp_y)
|
264
267
|
elif d == 0:
|
265
268
|
if 'e' in dy:
|
266
269
|
base_dy, exp_dy = dy.split('e')
|
@@ -272,8 +275,12 @@ def round_sf_unc(x, dx, n=None, min_exp=None, max_dec=None, extra_sf_lim=None):
|
|
272
275
|
y = '{}e{}'.format(base_y, exp_dy)
|
273
276
|
else:
|
274
277
|
f = 10**(-int(exp_y))
|
275
|
-
base_y,
|
278
|
+
base_y, base_dy = round_sf_unc(x*f, dx*f, n, np.inf, max_dec, extra_sf_lim)
|
276
279
|
y = '{}e{}'.format(base_y, exp_y)
|
280
|
+
dy = '{}e{}'.format(base_dy, exp_y)
|
281
|
+
if len(dy) > len(y)+1:
|
282
|
+
min_exp = np.inf
|
283
|
+
y, dy = round_sf_unc(x, dx, n, min_exp, max_dec, extra_sf_lim)
|
277
284
|
elif dx == 0:
|
278
285
|
y = round_sf(x, n, min_exp, extra_sf_lim)
|
279
286
|
dy = '0e0'
|
@@ -347,6 +354,10 @@ def round_sf_uncs(x, dx, n=None, min_exp=None, max_dec=None, extra_sf_lim=None):
|
|
347
354
|
dx1, dx2 = dx
|
348
355
|
y1, dy1 = round_sf_unc(x, dx1, n, min_exp, max_dec, extra_sf_lim)
|
349
356
|
y2, dy2 = round_sf_unc(x, dx2, n, min_exp, max_dec, extra_sf_lim)
|
357
|
+
if 'e' in y1 and 'e' not in y2 or 'e' in y2 and 'e' not in y1:
|
358
|
+
min_exp = 0
|
359
|
+
y1, dy1 = round_sf_unc(x, dx1, n, min_exp, max_dec, extra_sf_lim)
|
360
|
+
y2, dy2 = round_sf_unc(x, dx2, n, min_exp, max_dec, extra_sf_lim)
|
350
361
|
num_dec_1 = len(y1.split('e')[0].split('.')[1]) if '.' in y1 else 0
|
351
362
|
num_dec_2 = len(y2.split('e')[0].split('.')[1]) if '.' in y2 else 0
|
352
363
|
if num_dec_2 > num_dec_1:
|
@@ -544,7 +555,7 @@ class RichValue():
|
|
544
555
|
else:
|
545
556
|
main = (x1 + x2) / 2
|
546
557
|
unc = [(x2 - x1) / 2] * 2
|
547
|
-
|
558
|
+
|
548
559
|
global variable_count
|
549
560
|
if domain[0] != domain[1]:
|
550
561
|
variable_count += 1
|
@@ -736,9 +747,99 @@ class RichValue():
|
|
736
747
|
else:
|
737
748
|
s = np.nan
|
738
749
|
return s
|
750
|
+
|
751
|
+
def median(self, num_points=None):
|
752
|
+
"""Median of the PDF of the rich value."""
|
753
|
+
if self.is_finite:
|
754
|
+
if num_points is None and self.pdf_info == 'default':
|
755
|
+
m = (self.center if self.is_centr or self.is_range
|
756
|
+
else np.mean(self.interval()))
|
757
|
+
else:
|
758
|
+
if num_points is None:
|
759
|
+
num_points = int(1e4)
|
760
|
+
distr = self.sample(num_points)
|
761
|
+
m = np.nanmedian(distr)
|
762
|
+
else:
|
763
|
+
m = np.nan
|
764
|
+
return m
|
765
|
+
|
766
|
+
def mean(self, num_points=int(1e4), sigmas=8.):
|
767
|
+
"""Mean of the PDF of the rich value."""
|
768
|
+
if self.is_finite:
|
769
|
+
if not self.is_exact:
|
770
|
+
x1, x2 = self.interval(sigmas)
|
771
|
+
x = np.linspace(x1, x2, num_points)
|
772
|
+
y = self.pdf(x)
|
773
|
+
m = np.trapz(y*x, x)
|
774
|
+
else:
|
775
|
+
m = self.main
|
776
|
+
else:
|
777
|
+
m = np.nan
|
778
|
+
return m
|
779
|
+
|
780
|
+
def mode(self, num_points=int(1e4), sigmas=8.):
|
781
|
+
"""Mode of the PDF of the rich value."""
|
782
|
+
if self.is_finite:
|
783
|
+
x1, x2 = self.interval(sigmas)
|
784
|
+
x = np.linspace(x1, x2, num_points)
|
785
|
+
y = self.pdf(x)
|
786
|
+
idx = np.argmax(y)
|
787
|
+
ymax = y[idx]
|
788
|
+
m = np.mean(x[y==ymax])
|
789
|
+
else:
|
790
|
+
m = np.nan
|
791
|
+
return m
|
792
|
+
|
793
|
+
def var(self, num_points=int(1e4), sigmas=8.):
|
794
|
+
"""Variance of the PDF of the rich value."""
|
795
|
+
if self.is_finite:
|
796
|
+
x1, x2 = self.interval(sigmas)
|
797
|
+
x = np.linspace(x1, x2, num_points)
|
798
|
+
y = self.pdf(x)
|
799
|
+
m = np.trapz(y*x, x)
|
800
|
+
v = np.trapz(y*(x-m)**2, x)
|
801
|
+
else:
|
802
|
+
v = np.nan
|
803
|
+
return v
|
804
|
+
|
805
|
+
def std(self, num_points=int(1e4), sigmas=8.):
|
806
|
+
"""Standard deviation of the PDF of the rich value."""
|
807
|
+
v = self.var(num_points, sigmas)
|
808
|
+
s = np.sqrt(v)
|
809
|
+
return s
|
810
|
+
|
811
|
+
def moment(self, n, central=True, standarized=False,
|
812
|
+
num_points=int(1e4), sigmas=8.):
|
813
|
+
"""n-Moment of the PDF of the rich value."""
|
814
|
+
if self.is_finite:
|
815
|
+
x1, x2 = self.interval(sigmas)
|
816
|
+
x = np.linspace(x1, x2, num_points)
|
817
|
+
y = self.pdf(x)
|
818
|
+
c = np.trapz(y*x, x) if central or standarized else 0.
|
819
|
+
s = np.sqrt(np.trapz(y*(x-c)**2)) if standarized else 1.
|
820
|
+
moment = np.trapz((y*(x-c)**n)) / s**n
|
821
|
+
else:
|
822
|
+
moment = np.nan
|
823
|
+
return moment
|
824
|
+
|
825
|
+
def skewness(self, num_points=int(1e4), sigmas=8.):
|
826
|
+
"""Skewness of the PDF of the rich value."""
|
827
|
+
sk = self.moment(3, standarized=True, num_points=num_points, sigmas=sigmas)
|
828
|
+
return sk
|
829
|
+
|
830
|
+
def kurtosis(self, num_points=int(1e4), sigmas=8.):
|
831
|
+
"""Kurtosis of the PDF of the rich value."""
|
832
|
+
kt = self.moment(4, standarized=True, num_points=num_points, sigmas=sigmas)
|
833
|
+
return kt
|
834
|
+
|
835
|
+
def excess_kurtosis(self, num_points=int(1e4), sigmas=8.):
|
836
|
+
"""Excess kurtosis of the PDF of the rich value."""
|
837
|
+
kt = self.kurtosis(num_points, sigmas)
|
838
|
+
ekt = kt - 3.
|
839
|
+
return ekt
|
739
840
|
|
740
841
|
def set_lim_unc(self, factor=4.):
|
741
|
-
"""Set uncertainties of limits with respect to
|
842
|
+
"""Set uncertainties of limits with respect to central values."""
|
742
843
|
if self.is_lim:
|
743
844
|
self.unc = [self.main / factor, self.main / factor]
|
744
845
|
|
@@ -827,8 +928,11 @@ class RichValue():
|
|
827
928
|
sign = '<'
|
828
929
|
y = int(np.ceil(float(y))) if is_int else y
|
829
930
|
text = '{} {} e{}'.format(sign, y, a)
|
830
|
-
if show_asterisk and self.pdf_info != 'default'
|
831
|
-
|
931
|
+
if (show_asterisk and self.pdf_info != 'default'
|
932
|
+
and ('name' not in self.pdf_info
|
933
|
+
or 'name' in self.pdf_info
|
934
|
+
and 'default' not in self.pdf_info['name'])):
|
935
|
+
text = ('*' + text).replace('*< ', '< *').replace('*> ', '> *')
|
832
936
|
if use_exp:
|
833
937
|
text = text.replace('e-0', 'e-').replace(' *','')
|
834
938
|
a = int(text.split('e')[1])
|
@@ -867,18 +971,48 @@ class RichValue():
|
|
867
971
|
return text
|
868
972
|
|
869
973
|
def __repr__(self):
|
870
|
-
|
974
|
+
if self.is_exact:
|
975
|
+
return self.main.__format__('')
|
976
|
+
else:
|
977
|
+
return self._format_as_rich_value()
|
871
978
|
|
872
979
|
def __str__(self):
|
873
980
|
return self._format_as_rich_value()
|
981
|
+
|
982
|
+
def __format__(self, format_spec=''):
|
983
|
+
if format_spec == '':
|
984
|
+
return self._format_as_rich_value()
|
985
|
+
else:
|
986
|
+
if self.is_exact:
|
987
|
+
text = self.main.__format__(format_spec)
|
988
|
+
elif self.is_centr:
|
989
|
+
text = self.main.__format__(format_spec)
|
990
|
+
if self.unc[0] == self.unc[1]:
|
991
|
+
text += '+/-' + self.unc[0].__format__(format_spec)
|
992
|
+
else:
|
993
|
+
text += ('-' + self.unc[0].__format__(format_spec)
|
994
|
+
+ '+' + self.unc[1].__format__(format_spec))
|
995
|
+
elif self.is_lim:
|
996
|
+
char = '<' if self.is_uplim else '>'
|
997
|
+
text = char + self.main.__format__(format_spec)
|
998
|
+
elif self.is_range:
|
999
|
+
x1 = self.main - self.unc[0]
|
1000
|
+
x2 = self.main + self.unc[1]
|
1001
|
+
text = x1.__format__(format_spec) + ' -- ' + x2.__format__(format_spec)
|
1002
|
+
else:
|
1003
|
+
text = str(self)
|
1004
|
+
return text
|
874
1005
|
|
875
1006
|
def latex(self, show_dollars=True, mult_symbol=None,
|
876
|
-
use_extra_sf_in_exacts=None,
|
1007
|
+
use_extra_sf_in_exacts=None, use_extra_sf_in_ranges=None,
|
1008
|
+
omit_ones_in_sci_notation=None):
|
877
1009
|
"""Display rich value in LaTeX format."""
|
878
1010
|
mult_symbol = set_default_value(mult_symbol,
|
879
1011
|
'multiplication symbol for scientific notation in LaTeX')
|
880
1012
|
use_extra_sf_in_exacts = set_default_value(use_extra_sf_in_exacts,
|
881
1013
|
'use extra significant figure for exact values')
|
1014
|
+
use_extra_sf_in_ranges = set_default_value(use_extra_sf_in_ranges,
|
1015
|
+
'use extra significant figure for finite intervals')
|
882
1016
|
omit_ones_in_sci_notation = set_default_value(omit_ones_in_sci_notation,
|
883
1017
|
'omit ones in scientific notation in LaTeX')
|
884
1018
|
kwargs = (show_dollars, mult_symbol, use_extra_sf_in_exacts,
|
@@ -921,7 +1055,8 @@ class RichValue():
|
|
921
1055
|
if not self.is_lim:
|
922
1056
|
if unc_r[0] == unc_r[1]:
|
923
1057
|
if unc_r[0] == 0:
|
924
|
-
if not range_bound and use_extra_sf_in_exacts
|
1058
|
+
if (not range_bound and use_extra_sf_in_exacts
|
1059
|
+
or range_bound and use_extra_sf_in_ranges):
|
925
1060
|
n += 1
|
926
1061
|
y = (int(round(x)) if is_int else
|
927
1062
|
round_sf(x, n, np.inf, extra_sf_lim))
|
@@ -946,7 +1081,8 @@ class RichValue():
|
|
946
1081
|
if not self.is_lim:
|
947
1082
|
if unc_r[0] == unc_r[1]:
|
948
1083
|
if unc_r[0] == 0:
|
949
|
-
if not range_bound and use_extra_sf_in_exacts
|
1084
|
+
if (not range_bound and use_extra_sf_in_exacts
|
1085
|
+
or range_bound and use_extra_sf_in_ranges):
|
950
1086
|
n += 1
|
951
1087
|
min_exp = 0
|
952
1088
|
y = str(round(x)) if is_int else round_sf(x, n,
|
@@ -1037,6 +1173,12 @@ class RichValue():
|
|
1037
1173
|
if not show_dollars:
|
1038
1174
|
text = text.replace('$','')
|
1039
1175
|
return text
|
1176
|
+
|
1177
|
+
def __float__(self):
|
1178
|
+
return float(self.main)
|
1179
|
+
|
1180
|
+
def __int__(self):
|
1181
|
+
return int(self.main)
|
1040
1182
|
|
1041
1183
|
def __neg__(self):
|
1042
1184
|
if not self.is_interv:
|
@@ -1377,6 +1519,8 @@ class RichValue():
|
|
1377
1519
|
main = self.main
|
1378
1520
|
unc = self.unc
|
1379
1521
|
domain = copy.copy(self.domain)
|
1522
|
+
if type(x) in (int, float):
|
1523
|
+
x = [x]
|
1380
1524
|
x = np.array(x)
|
1381
1525
|
y = np.zeros(len(x))
|
1382
1526
|
if self.is_exact:
|
@@ -1387,6 +1531,24 @@ class RichValue():
|
|
1387
1531
|
else:
|
1388
1532
|
if not self.is_interv:
|
1389
1533
|
y = general_pdf(x, main, unc, domain)
|
1534
|
+
if unc[0] != unc[1]:
|
1535
|
+
use_splitnorm = defaultparams['use split normal'
|
1536
|
+
' distributions to model rich values']
|
1537
|
+
use_genextreme = defaultparams['use generative extreme'
|
1538
|
+
' value distributions to model rich values']
|
1539
|
+
if not use_splitnorm:
|
1540
|
+
use_genextreme = False
|
1541
|
+
if use_splitnorm:
|
1542
|
+
a_s = min(self.rel_ampl)
|
1543
|
+
s_s = max(unc[1]/unc[0], unc[0]/unc[1])
|
1544
|
+
if (a_s > 3. and s_s < 1.4
|
1545
|
+
or (use_genextreme) and s_s < 2.1):
|
1546
|
+
x1, x2 = self.interval(sigmas=8.)
|
1547
|
+
x_ = np.linspace(x1, x2, 800)
|
1548
|
+
y_ = general_pdf(x_, main, unc, domain)
|
1549
|
+
pdf_info = {'values': x_, 'probs': y_,
|
1550
|
+
'name': 'default-alt'}
|
1551
|
+
self.pdf_info = pdf_info
|
1390
1552
|
elif self.is_lolim and not self.is_uplim:
|
1391
1553
|
y[x > main] = 1e-3
|
1392
1554
|
elif self.is_uplim and not self.is_lolim:
|
@@ -1397,8 +1559,7 @@ class RichValue():
|
|
1397
1559
|
else:
|
1398
1560
|
x_, y_ = pdf_info['values'], pdf_info['probs']
|
1399
1561
|
mask = np.argsort(x_)
|
1400
|
-
pdf_ = lambda x: np.interp(x, x_[mask], y_[mask],
|
1401
|
-
left=0., right=0.)
|
1562
|
+
pdf_ = lambda x: np.interp(x, x_[mask], y_[mask], left=0., right=0.)
|
1402
1563
|
y = pdf_(x)
|
1403
1564
|
return y
|
1404
1565
|
|
@@ -1520,6 +1681,8 @@ class RichValue():
|
|
1520
1681
|
propagation_score = prop_score
|
1521
1682
|
is_not_a_number = is_nan
|
1522
1683
|
is_infinite = is_inf
|
1684
|
+
variance = var
|
1685
|
+
standard_deviation = stdev = std
|
1523
1686
|
# Method acronyms.
|
1524
1687
|
probability_density_function = pdf
|
1525
1688
|
set_limit_uncertainty = set_lim_unc
|
@@ -1792,6 +1955,30 @@ class RichArray(np.ndarray):
|
|
1792
1955
|
return (np.array([x.sign(sigmas) for x in self.flat])
|
1793
1956
|
.reshape(self.shape))
|
1794
1957
|
|
1958
|
+
def medians(self, num_points=None):
|
1959
|
+
return np.array([x.median(num_points)
|
1960
|
+
for x in self.flat]).reshape(self.shape)
|
1961
|
+
|
1962
|
+
def means(self, num_points=int(1e4), sigmas=8.):
|
1963
|
+
return np.array([x.mean(num_points, sigmas)
|
1964
|
+
for x in self.flat]).reshape(self.shape)
|
1965
|
+
|
1966
|
+
def modes(self, num_points=int(1e4), sigmas=8.):
|
1967
|
+
return np.array([x.mode(num_points, sigmas)
|
1968
|
+
for x in self.flat]).reshape(self.shape)
|
1969
|
+
|
1970
|
+
def variances(self, num_points=int(1e4), sigmas=8.):
|
1971
|
+
return np.array([x.var(num_points, sigmas)
|
1972
|
+
for x in self.flat]).reshape(self.shape)
|
1973
|
+
|
1974
|
+
def stds(self, num_points=int(1e4), sigmas=8.):
|
1975
|
+
return np.array([x.std(num_points, sigmas)
|
1976
|
+
for x in self.flat]).reshape(self.shape)
|
1977
|
+
|
1978
|
+
def moments(self, n, central=True, standarized=False):
|
1979
|
+
return np.array([x.moments(n, central, standarized)
|
1980
|
+
for x in self.flat]).reshape(self.shape)
|
1981
|
+
|
1795
1982
|
def set_params(self, params):
|
1796
1983
|
"""Set the rich value parameters of each entry of the rich array."""
|
1797
1984
|
abbreviations = {'is integer': 'is_int',
|
@@ -1888,6 +2075,7 @@ class RichArray(np.ndarray):
|
|
1888
2075
|
normalized_uncertainties = norm_uncs
|
1889
2076
|
propagation_scores = prop_scores
|
1890
2077
|
# Method acronyms.
|
2078
|
+
standard_deviations = stdevs = stds
|
1891
2079
|
set_limits_uncertainties = set_lims_uncs
|
1892
2080
|
set_parameters = set_params
|
1893
2081
|
|
@@ -3258,8 +3446,8 @@ def rich_value(text=None, domain=None, is_int=None, pdf=None,
|
|
3258
3446
|
distr = sample_from_pdf(pdf_, size=4e4, low=domain[0], high=domain[1])
|
3259
3447
|
if is_int:
|
3260
3448
|
distr = np.round(distr).astype(int)
|
3261
|
-
rvalue = evaluate_distr(distr, domain, consider_intervs)
|
3262
|
-
x1, x2 = rvalue.interval(
|
3449
|
+
rvalue = evaluate_distr(distr, domain, consider_intervs=consider_intervs)
|
3450
|
+
x1, x2 = rvalue.interval(8.)
|
3263
3451
|
x = np.linspace(x1, x2, int(1e4))
|
3264
3452
|
y = pdf_(x)
|
3265
3453
|
norm = np.trapz(y, x)
|
@@ -3341,7 +3529,7 @@ def rich_array(array, domain=None, is_int=None,
|
|
3341
3529
|
rarray.set_params({'min_exp': min_exp, 'extra_sf_lim': extra_sf_lim})
|
3342
3530
|
return rarray
|
3343
3531
|
|
3344
|
-
def rich_dataframe(df, domains=None, are_ints=None,
|
3532
|
+
def rich_dataframe(df, domains=None, are_ints=None, ignore_columns=[],
|
3345
3533
|
use_default_extra_sf_lim=False, **kwargs):
|
3346
3534
|
"""
|
3347
3535
|
Convert the values of the input dataframe of text strings to rich values.
|
@@ -3365,6 +3553,8 @@ def rich_dataframe(df, domains=None, are_ints=None,
|
|
3365
3553
|
input dataframe is already a rich value, its original domain will be
|
3366
3554
|
preserved; if not, the default domain will be used, that is,
|
3367
3555
|
[-np.inf, np.inf].
|
3556
|
+
ignore_columns : list, optional
|
3557
|
+
List of columns to be preserved as the original type.
|
3368
3558
|
use_default_extra_sf_lim : bool, optional
|
3369
3559
|
If True, the default limit for extra significant figure will be used
|
3370
3560
|
instead of inferring it from the input text. This will reduce the
|
@@ -3386,6 +3576,9 @@ def rich_dataframe(df, domains=None, are_ints=None,
|
|
3386
3576
|
for (i,row) in df.iterrows():
|
3387
3577
|
for col in df:
|
3388
3578
|
entry = df.at[i,col]
|
3579
|
+
if col in ignore_columns:
|
3580
|
+
df.at[i,col] = entry
|
3581
|
+
continue
|
3389
3582
|
is_rich_value = type(entry) in (RichValue, ComplexRichValue)
|
3390
3583
|
domain = domains[col] if col in domains else None
|
3391
3584
|
is_int = are_ints[col] if col in are_ints else None
|
@@ -3455,61 +3648,194 @@ def bounded_gaussian(x, m=0., s=1., a=np.inf):
|
|
3455
3648
|
y /= s * sqrt_tau
|
3456
3649
|
return y
|
3457
3650
|
|
3458
|
-
def
|
3651
|
+
def splitgaussian(x, m=0, s1=1., s2=1.):
|
3459
3652
|
"""
|
3460
|
-
|
3653
|
+
Split gaussian function.
|
3654
|
+
|
3655
|
+
Parameters
|
3656
|
+
----------
|
3657
|
+
x : array (float)
|
3658
|
+
Independent variable.
|
3659
|
+
m : float, optional
|
3660
|
+
Mode of the curve. The default is 0.
|
3661
|
+
s1, s2 : float, optional
|
3662
|
+
Widths of the curve for the left and right sides that define the
|
3663
|
+
1-sigma credibility interval with respect to the mode.
|
3664
|
+
The default is 1.
|
3461
3665
|
|
3462
|
-
|
3463
|
-
|
3464
|
-
|
3465
|
-
|
3666
|
+
Returns
|
3667
|
+
-------
|
3668
|
+
y : array (float)
|
3669
|
+
Resulting array.
|
3670
|
+
"""
|
3671
|
+
sm = (s1 + s2) / 2
|
3672
|
+
y = np.zeros(len(x))
|
3673
|
+
y[x<m] = 1 / (math.tau**(1/2) * sm) * np.exp(-1/2*((x[x<m]-m)/s1)**2)
|
3674
|
+
y[x>m] = 1 / (math.tau**(1/2) * sm) * np.exp(-1/2*((x[x>m]-m)/s2)**2)
|
3675
|
+
return y
|
3676
|
+
|
3677
|
+
def genextreme(x, m=0., s=1., e=0.):
|
3678
|
+
"""
|
3679
|
+
Generative extreme value function.
|
3466
3680
|
|
3467
3681
|
Parameters
|
3468
3682
|
----------
|
3469
3683
|
x : array (float)
|
3470
3684
|
Independent variable.
|
3471
|
-
|
3685
|
+
m : float, optional
|
3686
|
+
Mode of the curve. The default is 0.
|
3687
|
+
s : float, optional
|
3688
|
+
Width of the curve (similar to the standard deviation).
|
3689
|
+
The default is 1.
|
3690
|
+
e : float, parameter
|
3691
|
+
Shape parameter, that defines the asymmetry of the curve.
|
3692
|
+
|
3693
|
+
Returns
|
3694
|
+
-------
|
3695
|
+
y : array (float)
|
3696
|
+
Resulting array.
|
3697
|
+
"""
|
3698
|
+
with warnings.catch_warnings():
|
3699
|
+
warnings.simplefilter('ignore', category=RuntimeWarning)
|
3700
|
+
y = scipy.stats.genextreme.pdf(x, e, m, s)
|
3701
|
+
return y
|
3702
|
+
|
3703
|
+
def qsplitgaussian(x, m=0., s1=1., s2=1., num_points=1200, sigmas=8.,
|
3704
|
+
loss_limit=1e-3,):
|
3705
|
+
"""
|
3706
|
+
Split gaussian function with quantile-based parameters.
|
3707
|
+
|
3708
|
+
Parameters
|
3709
|
+
----------
|
3710
|
+
x : array (float)
|
3711
|
+
Independent variable.
|
3712
|
+
m : float, optional
|
3713
|
+
Median of the curve. The default is 0.
|
3714
|
+
s1, s2 : float, optional
|
3715
|
+
Widths of the curve for the left and right sides that define the
|
3716
|
+
1-sigma confidence interval. The default is 1.
|
3717
|
+
loss_limit : float, optional
|
3718
|
+
Limit in the root mean squared error (RMSE) in the quantiles
|
3719
|
+
defined with the resulting function. The default 1e-3.
|
3720
|
+
|
3721
|
+
Returns
|
3722
|
+
-------
|
3723
|
+
y : array (float)
|
3724
|
+
Resulting array.
|
3725
|
+
"""
|
3726
|
+
alpha = 0.15865
|
3727
|
+
if type(x) in (int, float):
|
3728
|
+
x = [x]
|
3729
|
+
x = np.array(x)
|
3730
|
+
x_ = np.linspace(m - sigmas*s1, m + sigmas*s2, num_points)
|
3731
|
+
def loss_function(params):
|
3732
|
+
m_, s1_, s2_ = params
|
3733
|
+
x_1 = x_[(x_ < m)]
|
3734
|
+
x_2 = x_[(x_ < m - s1)]
|
3735
|
+
x_3 = x_[(x_ < m + s2)]
|
3736
|
+
err1 = np.trapz(splitgaussian(x_1, m_, s1_, s2_), x_1) - 0.5
|
3737
|
+
err2 = np.trapz(splitgaussian(x_2, m_, s1_, s2_), x_2) - alpha
|
3738
|
+
err3 = np.trapz(splitgaussian(x_3, m_, s1_, s2_), x_3) - (1.-alpha)
|
3739
|
+
loss = (err1**2 + err2**2 + err3**2) / 3
|
3740
|
+
return loss
|
3741
|
+
guess = [m - s2*scipy.stats.norm.ppf(1 - (s1+s2)/s2/4), s1, s2]
|
3742
|
+
result = scipy.optimize.minimize(loss_function, x0=guess, method='Nelder-Mead')
|
3743
|
+
params = result.x
|
3744
|
+
rmse = np.sqrt(result.fun)
|
3745
|
+
if result.success and rmse < loss_limit:
|
3746
|
+
y = splitgaussian(x, *params)
|
3747
|
+
else:
|
3748
|
+
y = np.nan * np.ones(len(x))
|
3749
|
+
return y
|
3750
|
+
|
3751
|
+
def qgenextreme(x, m=0., s1=1., s2=1., num_points=800, sigmas=12.,
|
3752
|
+
loss_limit=1e-3):
|
3753
|
+
"""
|
3754
|
+
Generative extreme value function with quantile-based parameters.
|
3755
|
+
|
3756
|
+
Parameters
|
3757
|
+
----------
|
3758
|
+
x : array (float)
|
3759
|
+
Independent variable.
|
3760
|
+
m : float, optional
|
3761
|
+
Median of the curve. The default is 0.
|
3762
|
+
s1, s2 : float, optional
|
3763
|
+
Widths of the curve for the left and right sides that define the
|
3764
|
+
1-sigma confidence interval. The default is 1.
|
3765
|
+
loss_limit : float, optional
|
3766
|
+
Limit in the root mean squared error (RMSE) in the quantiles
|
3767
|
+
defined with the resulting function. The default 1e-3.
|
3768
|
+
|
3769
|
+
Returns
|
3770
|
+
-------
|
3771
|
+
y : array (float)
|
3772
|
+
Resulting array.
|
3773
|
+
"""
|
3774
|
+
alpha = 0.15865
|
3775
|
+
if type(x) in (int, float):
|
3776
|
+
x = [x]
|
3777
|
+
x = np.array(x)
|
3778
|
+
x_ = np.linspace(m - s1*sigmas, m + s2*sigmas, num_points)
|
3779
|
+
def loss_function(params, m, s1, s2):
|
3780
|
+
m_, s_, e_ = params
|
3781
|
+
m_, s1_, s2_ = params
|
3782
|
+
x_1 = x_[(x_ < m)]
|
3783
|
+
x_2 = x_[(x_ < m - s1)]
|
3784
|
+
x_3 = x_[(x_ < m + s2)]
|
3785
|
+
err1 = np.trapz(genextreme(x_1, m_, s_, e_), x_1) - 0.5
|
3786
|
+
err2 = np.trapz(genextreme(x_2, m_, s_, e_), x_2) - alpha
|
3787
|
+
err3 = np.trapz(genextreme(x_3, m_, s_, e_), x_3) - (1.-alpha)
|
3788
|
+
loss = (err1**2 + err2**2 + err3**2) / 3
|
3789
|
+
return loss
|
3790
|
+
guess = [0., 1., 0.]
|
3791
|
+
result = scipy.optimize.minimize(loss_function, x0=guess, args=(0.,1.,1.),
|
3792
|
+
method='Nelder-Mead')
|
3793
|
+
params = result.x
|
3794
|
+
e = params[-1] if result.success else 0.
|
3795
|
+
s = (s1 + s2)/2
|
3796
|
+
m_ = m + s*np.log(np.log(2)) if e == 0 else m - s*(np.log(2)**(-e)-1)/e
|
3797
|
+
guess = [m_, s, e]
|
3798
|
+
result = scipy.optimize.minimize(loss_function, x0=guess, args=(m,s1,s2),
|
3799
|
+
method='Nelder-Mead')
|
3800
|
+
params = result.x
|
3801
|
+
rmse = np.sqrt(result.fun)
|
3802
|
+
if result.success and rmse < loss_limit:
|
3803
|
+
y = genextreme(x, *params)
|
3804
|
+
else:
|
3805
|
+
y = np.nan * np.ones(len(x))
|
3806
|
+
return y
|
3807
|
+
|
3808
|
+
def asymmetric_bounded_gaussian(x, m=0, s1=1., s2=1., a1=np.inf, a2=np.inf,
|
3809
|
+
corrected=True):
|
3810
|
+
"""
|
3811
|
+
Versatile PDF with given median and uncertainties for the given domain.
|
3812
|
+
|
3813
|
+
Parameters
|
3814
|
+
----------
|
3815
|
+
x : array (float)
|
3816
|
+
Independent variable.
|
3817
|
+
m : float, optional
|
3472
3818
|
Median of the distribution. The default is 0.
|
3473
|
-
|
3474
|
-
|
3819
|
+
s1, s2 : float, optional
|
3820
|
+
Uncertainties of the distribution (1-sigma confidence interval).
|
3475
3821
|
The default is 1. It can be a list with lower and upper uncertainties.
|
3476
3822
|
The resulting 1-sigma confidence interval must be lower than the
|
3477
3823
|
boundaries of the independent variable.
|
3478
|
-
|
3479
|
-
|
3480
|
-
The default is
|
3824
|
+
a1, a2 : list (float), optional
|
3825
|
+
Amplitudes of the independent variable.
|
3826
|
+
The default is np.inf.
|
3827
|
+
corrected : bool, optional
|
3828
|
+
If True, the two halves of the bounded gaussian will be interpolated
|
3829
|
+
so that the PDF is continuous.
|
3481
3830
|
|
3482
3831
|
Returns
|
3483
3832
|
-------
|
3484
3833
|
y : array (float)
|
3485
3834
|
Resulting PDF for the input array.
|
3486
3835
|
"""
|
3487
|
-
|
3488
|
-
def symmetric_general_pdf(x, m, s, a):
|
3489
|
-
"""
|
3490
|
-
Symmetric general PDF with given median (m) and uncertainty (s) with a
|
3491
|
-
boundary mained in the median with a given amplitude (a).
|
3492
|
-
"""
|
3493
|
-
if a > s:
|
3494
|
-
y = bounded_gaussian(x, m, s, a)
|
3495
|
-
else:
|
3496
|
-
raise Exception('Amplitude must be greater than uncertainty.')
|
3497
|
-
return y
|
3498
|
-
|
3499
|
-
m, s, b = loc, scale, bounds
|
3500
3836
|
x = np.array(x)
|
3501
|
-
|
3502
|
-
if not b[0] < m < b[1]:
|
3503
|
-
raise Exception('Center ({}) is not inside the boundaries {}.'
|
3504
|
-
.format(m, b))
|
3505
|
-
a = [m - b[0], b[1] - m]
|
3506
|
-
if not hasattr(s, '__iter__'):
|
3507
|
-
s = [s]*2
|
3508
|
-
s1, s2 = s
|
3509
|
-
a1, a2 = a
|
3510
|
-
|
3511
3837
|
if s1 == s2 and a1 == a2:
|
3512
|
-
y =
|
3838
|
+
y = bounded_gaussian(x, m, s1, a1)
|
3513
3839
|
else:
|
3514
3840
|
def correction(x, m, s, c, sign=1):
|
3515
3841
|
"""Correction to smooth the final asymmetric PDF."""
|
@@ -3525,41 +3851,139 @@ def general_pdf(x, loc=0, scale=1, bounds=[-np.inf,np.inf]):
|
|
3525
3851
|
return g
|
3526
3852
|
y = np.zeros(x.size)
|
3527
3853
|
cond1 = x < m
|
3528
|
-
cond2 = x
|
3529
|
-
y1 =
|
3530
|
-
y2 =
|
3531
|
-
|
3532
|
-
|
3533
|
-
|
3534
|
-
|
3535
|
-
|
3536
|
-
|
3537
|
-
|
3538
|
-
|
3539
|
-
|
3540
|
-
|
3541
|
-
|
3542
|
-
|
3543
|
-
|
3544
|
-
c1 =
|
3545
|
-
|
3546
|
-
|
3547
|
-
|
3548
|
-
|
3549
|
-
|
3550
|
-
|
3551
|
-
|
3552
|
-
|
3553
|
-
|
3554
|
-
|
3555
|
-
|
3556
|
-
|
3557
|
-
|
3558
|
-
|
3559
|
-
|
3854
|
+
cond2 = x >= m
|
3855
|
+
y1 = bounded_gaussian(x[cond1], m, s1, a1)
|
3856
|
+
y2 = bounded_gaussian(x[cond2], m, s2, a2)
|
3857
|
+
y[cond1] = y1
|
3858
|
+
y[cond2] = y2
|
3859
|
+
if corrected:
|
3860
|
+
h1 = bounded_gaussian(m, m, s1, a1)
|
3861
|
+
h2 = bounded_gaussian(m, m, s2, a2)
|
3862
|
+
h1_, h2_ = h1.copy(), h2.copy()
|
3863
|
+
h1, h2 = min(h1, h2), max(h1, h2)
|
3864
|
+
cond1_ = x[cond1] > m - s1
|
3865
|
+
cond2_ = x[cond2] < m + s2
|
3866
|
+
frac = 0.
|
3867
|
+
y_min = 0.
|
3868
|
+
lim = 1/4 * h1
|
3869
|
+
i = 0
|
3870
|
+
c1, c2 = 0., h2-h1
|
3871
|
+
while y_min < lim:
|
3872
|
+
if i > 0:
|
3873
|
+
frac += 1/8
|
3874
|
+
c1 = frac * (h2 - h1)
|
3875
|
+
c2 = h2 - h1 - c1
|
3876
|
+
if h1_ < h2_:
|
3877
|
+
c1, c2 = -c2, c1
|
3878
|
+
x_ = np.linspace(m - s1, m + s2, int(1e3))
|
3879
|
+
cond1_ = x_ < m
|
3880
|
+
cond2_ = x_ > m
|
3881
|
+
y1_ = bounded_gaussian(x_[cond1_], m, s1, a1)
|
3882
|
+
y2_ = bounded_gaussian(x_[cond2_], m, s2, a2)
|
3883
|
+
y1_ -= correction(x_[cond1_], m, s1, c1, sign=1)
|
3884
|
+
y2_ += correction(x_[cond2_], m, s2, c2, sign=-1)
|
3885
|
+
y_min = np.min(np.append(y1_, y2_))
|
3886
|
+
i += 1
|
3887
|
+
y[cond1] -= correction(x[cond1], m, s1, c1, sign=1)
|
3888
|
+
y[cond2] += correction(x[cond2], m, s2, c2, sign=-1)
|
3889
|
+
y[x==m] = h1 + (1-frac) * (h2 - h1)
|
3560
3890
|
if x.shape == ():
|
3561
3891
|
y = y[0]
|
3892
|
+
return y
|
3893
|
+
|
3894
|
+
def general_pdf(x, loc=0, scale=1, bounds=[-np.inf,np.inf],
|
3895
|
+
use_splitnorm=None, use_genextreme=None):
|
3896
|
+
"""
|
3897
|
+
General PDF with given median and uncertainties for the given domain.
|
3898
|
+
|
3899
|
+
It is based on corrected asymmetrical bounded gaussians, splitgaussians,
|
3900
|
+
and generative extreme value functions.
|
3562
3901
|
|
3902
|
+
Parameters
|
3903
|
+
----------
|
3904
|
+
x : array (float)
|
3905
|
+
Independent variable.
|
3906
|
+
loc : float, optional
|
3907
|
+
Median of the distribution. The default is 0.
|
3908
|
+
scale : float, optional
|
3909
|
+
Uncertainty of the distribution (1-sigma confidence interval).
|
3910
|
+
The default is 1. It can be a list with lower and upper uncertainties.
|
3911
|
+
The resulting 1-sigma confidence interval must be lower than the
|
3912
|
+
boundaries of the independent variable.
|
3913
|
+
bounds : list (float), optional
|
3914
|
+
Boundaries of the independent variable.
|
3915
|
+
The default is [-np.inf, np.inf].
|
3916
|
+
use_splitnorm : bool, optional
|
3917
|
+
If True, split-normal distributions will be used to model the PDF
|
3918
|
+
with small asymmetry in the uncertainties, if the bounds are large
|
3919
|
+
enough. By default it is False.
|
3920
|
+
use_genextreme : bool, optional
|
3921
|
+
If True, generative extreme value (GEV) distributions will be used to
|
3922
|
+
model the PDF with small/moderate asymmetry is the uncertainties, if
|
3923
|
+
the bounds are large enough. By default it is False.
|
3924
|
+
|
3925
|
+
Returns
|
3926
|
+
-------
|
3927
|
+
y : array (float)
|
3928
|
+
Resulting PDF for the input array.
|
3929
|
+
"""
|
3930
|
+
use_splitnorm = set_default_value(use_splitnorm, 'use split normal'
|
3931
|
+
' distributions to model rich values')
|
3932
|
+
use_genextreme = set_default_value(use_genextreme, 'use generative extreme'
|
3933
|
+
' value distributions to model rich values')
|
3934
|
+
if not use_splitnorm:
|
3935
|
+
use_genextreme = False
|
3936
|
+
m, s, b = loc, scale, bounds
|
3937
|
+
x = np.array(x)
|
3938
|
+
s = np.abs(s)
|
3939
|
+
if not b[0] < m < b[1]:
|
3940
|
+
raise Exception('Center ({}) is not inside the boundaries {}.'
|
3941
|
+
.format(m, b))
|
3942
|
+
a = [m - b[0], b[1] - m]
|
3943
|
+
if not hasattr(s, '__iter__'):
|
3944
|
+
s = [s]*2
|
3945
|
+
s1, s2 = s
|
3946
|
+
a1, a2 = a
|
3947
|
+
y = asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2)
|
3948
|
+
a_s = min(a1/s1, a2/s2)
|
3949
|
+
s_s = max(s2/s1, s1/s2)
|
3950
|
+
if not (use_splitnorm or use_genextreme):
|
3951
|
+
y = asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2)
|
3952
|
+
else:
|
3953
|
+
lim = 1.5 + 0.5*min(1., max(0., a_s - 3.) / (4. - 3.))
|
3954
|
+
if s1 == s2:
|
3955
|
+
y = asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2)
|
3956
|
+
elif a_s <= 3.:
|
3957
|
+
y = asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2)
|
3958
|
+
elif a_s >= 4.:
|
3959
|
+
if s_s >= lim+0.1:
|
3960
|
+
y = asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2)
|
3961
|
+
elif s_s <= 1.3:
|
3962
|
+
y = qsplitgaussian(x, m, s1, s2)
|
3963
|
+
elif 1.4 <= s_s <= lim:
|
3964
|
+
y = (qgenextreme(x, m, s1, s2) if use_genextreme
|
3965
|
+
else asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2))
|
3966
|
+
elif 1.3 < s_s < 1.4:
|
3967
|
+
y1 = qsplitgaussian(x, m, s1, s2)
|
3968
|
+
y2 = (qgenextreme(x, m, s1, s2) if use_genextreme else
|
3969
|
+
asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2))
|
3970
|
+
c = (s_s - 1.3) / (1.4 - 1.3)
|
3971
|
+
y = (1-c)*y1 + c*y2
|
3972
|
+
elif lim < s_s < lim+0.1:
|
3973
|
+
if use_genextreme:
|
3974
|
+
y1 = qgenextreme(x, m, s1, s2)
|
3975
|
+
y2 = asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2)
|
3976
|
+
c = (s_s - lim) / 0.1
|
3977
|
+
y = (1-c)*y1 + c*y2
|
3978
|
+
else:
|
3979
|
+
y = asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2)
|
3980
|
+
else:
|
3981
|
+
y1 = asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2)
|
3982
|
+
y2 = general_pdf(x, m, s)
|
3983
|
+
c = (a_s - 3.) / (4. - 3.)
|
3984
|
+
y = (1-c)*y1 + c*y2
|
3985
|
+
if any(np.isnan(y)):
|
3986
|
+
y = asymmetric_bounded_gaussian(x, m, s1, s2, a1, a2)
|
3563
3987
|
return y
|
3564
3988
|
|
3565
3989
|
def sample_from_pdf(pdf, size, low=None, high=None, **kwargs):
|
@@ -3742,7 +4166,8 @@ def distr_with_rich_values(function, args, len_samples=None,
|
|
3742
4166
|
np.array([function(*args_distr[:,i]) for i in range(len_samples)]))
|
3743
4167
|
return distr
|
3744
4168
|
|
3745
|
-
def center_and_uncs(distr,
|
4169
|
+
def center_and_uncs(distr, center_function='median', interval=68.27,
|
4170
|
+
fraction=1.):
|
3746
4171
|
"""
|
3747
4172
|
Return the central value and uncertainties of the input distribution.
|
3748
4173
|
|
@@ -3750,9 +4175,9 @@ def center_and_uncs(distr, function=np.median, interval=68.27, fraction=1.):
|
|
3750
4175
|
----------
|
3751
4176
|
distr : array (float)
|
3752
4177
|
Input distribution.
|
3753
|
-
|
4178
|
+
center_function : function, optional.
|
3754
4179
|
Function to calculate the central value of the distribution.
|
3755
|
-
|
4180
|
+
Possibles values are 'median' (by default), 'mean' and 'mode'.
|
3756
4181
|
interval : float, optional
|
3757
4182
|
Size of the interval, in percentile, around the main value which
|
3758
4183
|
defines the uncertainties. The default is 68.27 (1 sigma confidence
|
@@ -3763,7 +4188,7 @@ def center_and_uncs(distr, function=np.median, interval=68.27, fraction=1.):
|
|
3763
4188
|
|
3764
4189
|
Returns
|
3765
4190
|
-------
|
3766
|
-
|
4191
|
+
center : float
|
3767
4192
|
Central value of the distribution.
|
3768
4193
|
uncs : tuple (float)
|
3769
4194
|
Lower and upper uncertainties of the distribution.
|
@@ -3771,29 +4196,56 @@ def center_and_uncs(distr, function=np.median, interval=68.27, fraction=1.):
|
|
3771
4196
|
distr = np.array(distr)
|
3772
4197
|
distr = np.sort(distr[np.isfinite(distr)].flatten())
|
3773
4198
|
size = len(distr)
|
3774
|
-
if fraction != 1 and size > 1:
|
3775
|
-
margin = (1 - fraction) / 2
|
4199
|
+
if fraction != 1. and size > 1:
|
4200
|
+
margin = (1. - fraction) / 2.
|
3776
4201
|
distr = distr[round(margin*size):round((1-margin)*size)]
|
3777
|
-
|
3778
|
-
|
3779
|
-
|
3780
|
-
ind =
|
3781
|
-
|
3782
|
-
|
3783
|
-
|
3784
|
-
|
3785
|
-
perc2
|
3786
|
-
perc1
|
3787
|
-
|
3788
|
-
|
3789
|
-
perc2
|
3790
|
-
|
3791
|
-
|
3792
|
-
|
3793
|
-
|
3794
|
-
|
4202
|
+
if center_function in ('median', 'mean'):
|
4203
|
+
center_function = np.mean if center_function == 'mean' else np.median
|
4204
|
+
center = center_function(distr)
|
4205
|
+
ind = np.argmin(np.abs(distr - center))
|
4206
|
+
if hasattr(ind, '__iter__'):
|
4207
|
+
ind = int(np.median(ind))
|
4208
|
+
ind = 100. * ind / len(distr)
|
4209
|
+
perc1 = ind - interval/2
|
4210
|
+
perc2 = ind + interval/2
|
4211
|
+
if perc1 < 0.:
|
4212
|
+
perc2 += abs(0. - perc1)
|
4213
|
+
perc1 = 0.
|
4214
|
+
if perc2 > 100.:
|
4215
|
+
perc1 -= abs(perc2 - 100.)
|
4216
|
+
perc2 = 100.
|
4217
|
+
conf_interv_1 = np.percentile(distr, perc1)
|
4218
|
+
conf_interv_2 = np.percentile(distr, perc2)
|
4219
|
+
elif center_function == 'mode':
|
4220
|
+
m, (s1, s2) = center_and_uncs(distr, center_function='median',
|
4221
|
+
interval=interval, fraction=fraction)
|
4222
|
+
x = np.linspace(m-3*s1, m+3*s2, len(distr))
|
4223
|
+
dx = x[1] - x[0]
|
4224
|
+
y_, x_ = np.histogram(distr, bins='auto', density=True)
|
4225
|
+
idx = np.argmax(y_)
|
4226
|
+
center = x_[idx]
|
4227
|
+
x_ = (x_[1:] + x_[:-1]) / 2
|
4228
|
+
y_s = rolling_function(np.mean, y_, size=5)
|
4229
|
+
s = sum((y_s - y_)**2)
|
4230
|
+
spline = scipy.interpolate.UnivariateSpline(x_, y_, s=s, ext='zeros')
|
4231
|
+
y = spline(x)
|
4232
|
+
inds = np.argsort(y)[::-1]
|
4233
|
+
x = x[inds]
|
4234
|
+
y = y[inds]
|
4235
|
+
area = np.cumsum(y*dx)
|
4236
|
+
values = x[area <= interval]
|
4237
|
+
conf_interv_1 = np.min(values)
|
4238
|
+
conf_interv_2 = np.max(values)
|
4239
|
+
else:
|
4240
|
+
raise Exception("Wrong function type,"
|
4241
|
+
" it should be 'median', 'mean' or 'mode'.")
|
4242
|
+
unc1 = center - conf_interv_1
|
4243
|
+
unc2 = conf_interv_2 - center
|
4244
|
+
if fraction != 1.:
|
4245
|
+
unc1 *= 1. + margin
|
4246
|
+
unc2 *= 1. + margin
|
3795
4247
|
uncs = [unc1, unc2]
|
3796
|
-
return
|
4248
|
+
return center, uncs
|
3797
4249
|
|
3798
4250
|
# Pair of functions used when evaluating distributions.
|
3799
4251
|
def add_zero_infs(interval, zero_log, inf_log):
|
@@ -3975,6 +4427,7 @@ def evaluate_distr(distr, domain=None, function=None, args=None,
|
|
3975
4427
|
if is_range_small:
|
3976
4428
|
bins = np.arange(x1, x2+2) - 0.5
|
3977
4429
|
probs, _ = np.histogram(distr, bins=bins, density=True)
|
4430
|
+
probs = rolling_function(np.mean, probs, size=5)
|
3978
4431
|
values = np.arange(x1, x2+1)
|
3979
4432
|
pdf = lambda x: np.interp(x, values, probs, left=0., right=0.)
|
3980
4433
|
distr = sample_from_pdf(pdf, size=size, low=x1, high=x2)
|
@@ -4079,13 +4532,14 @@ def evaluate_distr(distr, domain=None, function=None, args=None,
|
|
4079
4532
|
rvalue.expression = expression
|
4080
4533
|
|
4081
4534
|
if save_pdf:
|
4082
|
-
x1, x2 = rvalue.interval(
|
4535
|
+
x1, x2 = rvalue.interval(8.)
|
4083
4536
|
if is_int and is_range_small:
|
4084
4537
|
bins = np.arange(x1, x2+2) - 0.5
|
4085
4538
|
else:
|
4086
|
-
num_bins = max(
|
4539
|
+
num_bins = max(120, size//100)
|
4087
4540
|
bins = np.linspace(x1, x2, num_bins)
|
4088
4541
|
probs, _ = np.histogram(distr, bins=bins, density=True)
|
4542
|
+
probs = rolling_function(np.mean, probs, size=5)
|
4089
4543
|
values = np.mean([bins[0:-1], bins[1:]], axis=0)
|
4090
4544
|
pdf_info = {'values': values, 'probs': probs}
|
4091
4545
|
rvalue.pdf_info = pdf_info
|
@@ -4413,8 +4867,7 @@ def function_with_rich_values(function, args, unc_function=None,
|
|
4413
4867
|
if prop_score > lim1:
|
4414
4868
|
factor = 1. - 0.5 * (min(prop_score,20.)-lim1) / (lim2-lim1)
|
4415
4869
|
len_samples = int(factor * len_samples)
|
4416
|
-
distr = distr_with_rich_values(function, args, len_samples,
|
4417
|
-
is_vectorizable)
|
4870
|
+
distr = distr_with_rich_values(function, args, len_samples, is_vectorizable)
|
4418
4871
|
if output_size == 1 and len(distr.shape) == 1:
|
4419
4872
|
distr = np.array([distr]).transpose()
|
4420
4873
|
output = []
|
@@ -4693,7 +5146,7 @@ def errorbar(x, y, lims_factor=None, **kwargs):
|
|
4693
5146
|
if len(xc) > 0:
|
4694
5147
|
with np.errstate(divide='ignore', invalid='ignore'):
|
4695
5148
|
try:
|
4696
|
-
r = abs(linregress(xc, np.arange(len(xc))).rvalue)
|
5149
|
+
r = abs(scipy.stats.linregress(xc, np.arange(len(xc))).rvalue)
|
4697
5150
|
except:
|
4698
5151
|
r = 0
|
4699
5152
|
else:
|
@@ -4818,6 +5271,7 @@ def curve_fit(x, y, function, guess, num_samples=3000,
|
|
4818
5271
|
Number of times that the fit failed, for the iterations among the
|
4819
5272
|
different samples (the number of iterations is num_samples).
|
4820
5273
|
"""
|
5274
|
+
verbose = True if num_samples > 100 else False
|
4821
5275
|
if len(x) != len(y):
|
4822
5276
|
raise Exception('Input arrays have not the same size.')
|
4823
5277
|
num_points = len(y)
|
@@ -4871,14 +5325,18 @@ def curve_fit(x, y, function, guess, num_samples=3000,
|
|
4871
5325
|
return error
|
4872
5326
|
losses, dispersions = [], []
|
4873
5327
|
samples = [[] for i in range(num_params)]
|
4874
|
-
|
5328
|
+
if verbose:
|
5329
|
+
print('Fitting...')
|
4875
5330
|
num_fails = 0
|
4876
5331
|
x_sample = x.sample(num_samples)
|
4877
5332
|
y_sample = y.sample(num_samples)
|
5333
|
+
if num_samples == 1:
|
5334
|
+
x_sample = [x_sample]
|
5335
|
+
y_sample = [y_sample]
|
4878
5336
|
cond = x.are_centrs & y.are_centrs
|
4879
5337
|
num_disp_points = cond.sum()
|
4880
5338
|
for (i, (xs, ys)) in enumerate(zip(x_sample, y_sample)):
|
4881
|
-
result = minimize(loss_function, guess, args=(xs,ys), **kwargs)
|
5339
|
+
result = scipy.optimize.minimize(loss_function, guess, args=(xs,ys), **kwargs)
|
4882
5340
|
if result.success:
|
4883
5341
|
params_i = result.x
|
4884
5342
|
for j in range(num_params):
|
@@ -4890,7 +5348,7 @@ def curve_fit(x, y, function, guess, num_samples=3000,
|
|
4890
5348
|
losses += [result.fun]
|
4891
5349
|
else:
|
4892
5350
|
num_fails += 1
|
4893
|
-
if ((i+1) % (num_samples//4)) == 0:
|
5351
|
+
if verbose and ((i+1) % (num_samples//4)) == 0:
|
4894
5352
|
print(' {} %'.format(100*(i+1)//num_samples))
|
4895
5353
|
if num_fails > 0.9*num_samples:
|
4896
5354
|
raise Exception('The fit failed more than 90 % of the time.')
|
@@ -4929,6 +5387,7 @@ def point_fit(y, function, guess, num_samples=3000,
|
|
4929
5387
|
|
4930
5388
|
The parameters and the outputs are the same as in the 'curve_fit' function.
|
4931
5389
|
"""
|
5390
|
+
verbose = True if num_samples > 100 else False
|
4932
5391
|
ya = rich_array(y)
|
4933
5392
|
y = rich_array([y]) if len(ya.shape) == 0 else ya
|
4934
5393
|
num_points = len(y)
|
@@ -4960,13 +5419,14 @@ def point_fit(y, function, guess, num_samples=3000,
|
|
4960
5419
|
return error
|
4961
5420
|
losses, dispersions = [], []
|
4962
5421
|
samples = [[] for i in range(num_params)]
|
4963
|
-
|
5422
|
+
if verbose:
|
5423
|
+
print('Fitting...')
|
4964
5424
|
num_fails = 0
|
4965
5425
|
y_sample = y.sample(num_samples)
|
4966
5426
|
cond = y.are_centrs
|
4967
5427
|
num_disp_points = cond.sum()
|
4968
5428
|
for (i,ys) in enumerate(y_sample):
|
4969
|
-
result = minimize(loss_function, guess, args=ys, **kwargs)
|
5429
|
+
result = scipy.optimize.minimize(loss_function, guess, args=ys, **kwargs)
|
4970
5430
|
if result.success:
|
4971
5431
|
params_i = result.x
|
4972
5432
|
for j in range(num_params):
|
@@ -5006,7 +5466,76 @@ def point_fit(y, function, guess, num_samples=3000,
|
|
5006
5466
|
result = {'parameters': params_fit, 'dispersion': dispersion, 'loss': loss,
|
5007
5467
|
'parameters samples': samples, 'dispersion sample': dispersions,
|
5008
5468
|
'loss sample': losses, 'number of fails': num_fails}
|
5009
|
-
return result
|
5469
|
+
return result
|
5470
|
+
|
5471
|
+
def rolling_function(func, x, size, **kwargs):
|
5472
|
+
"""
|
5473
|
+
Apply a function in a rolling way, in windows of the specified size.
|
5474
|
+
|
5475
|
+
Parameters
|
5476
|
+
----------
|
5477
|
+
x : array
|
5478
|
+
Input data.
|
5479
|
+
func : function
|
5480
|
+
Function to be applied.
|
5481
|
+
size : int
|
5482
|
+
Size of the windows to group the data. It must be odd.
|
5483
|
+
**kwargs : (various)
|
5484
|
+
Keyword arguments of the function to be applied.
|
5485
|
+
|
5486
|
+
Returns
|
5487
|
+
-------
|
5488
|
+
y : array
|
5489
|
+
Resultant array.
|
5490
|
+
"""
|
5491
|
+
|
5492
|
+
def rolling_window(x, window):
|
5493
|
+
"""
|
5494
|
+
Group the input data according to the specified window size.
|
5495
|
+
|
5496
|
+
Function by Erik Rigtorp.
|
5497
|
+
|
5498
|
+
Parameters
|
5499
|
+
----------
|
5500
|
+
x : array
|
5501
|
+
Input data.
|
5502
|
+
window : int
|
5503
|
+
Size of the windows to group the data.
|
5504
|
+
|
5505
|
+
Returns
|
5506
|
+
-------
|
5507
|
+
y : array
|
5508
|
+
Output array.
|
5509
|
+
"""
|
5510
|
+
shape = x.shape[:-1] + (x.shape[-1] - window + 1, window)
|
5511
|
+
strides = x.strides + (x.strides[-1],)
|
5512
|
+
y = np.lib.stride_tricks.as_strided(x, shape=shape, strides=strides)
|
5513
|
+
return y
|
5514
|
+
|
5515
|
+
N = len(x)
|
5516
|
+
if size <= 0:
|
5517
|
+
raise Exception('Window size must be positive.')
|
5518
|
+
size = int(size)
|
5519
|
+
if size == 1 or N == 0:
|
5520
|
+
return x
|
5521
|
+
size = min(N, size)
|
5522
|
+
N = len(x)
|
5523
|
+
y_c = func(rolling_window(x, size), -1, **kwargs)
|
5524
|
+
M = min(N, size) // 2
|
5525
|
+
min_size = 1
|
5526
|
+
y_1, y_2 = np.zeros(M), np.zeros(M)
|
5527
|
+
for i in range(M):
|
5528
|
+
j1 = 0
|
5529
|
+
j2 = max(min_size, 2*i)
|
5530
|
+
y_1[i] = func(x[j1:j2], **kwargs)
|
5531
|
+
j1 = N - max(min_size, 2*i)
|
5532
|
+
j2 = N
|
5533
|
+
y_2[-i-1] = func(x[j1:j2], **kwargs)
|
5534
|
+
y = np.concatenate((y_1, y_c, y_2))
|
5535
|
+
if size % 2 == 0:
|
5536
|
+
y = y[1:]/2 + y[:-1]/2
|
5537
|
+
|
5538
|
+
return y
|
5010
5539
|
|
5011
5540
|
def edit_pdf_info(pdf_info, function):
|
5012
5541
|
"""Modify input PDF for a rich value with the given function."""
|
@@ -5,7 +5,7 @@ with open('README.md', 'r') as file:
|
|
5
5
|
|
6
6
|
setuptools.setup(
|
7
7
|
name = 'richvalues',
|
8
|
-
version = '4.
|
8
|
+
version = '4.2.0',
|
9
9
|
license = 'BSD-3-Clause',
|
10
10
|
author = 'Andrés Megías Toledano',
|
11
11
|
description = 'Python library for working with uncertainties and upper/lower limits',
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|