ararpy 0.0.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ararpy/__init__.py +178 -0
- ararpy/calc/__init__.py +11 -0
- ararpy/calc/age.py +161 -0
- ararpy/calc/arr.py +490 -0
- ararpy/calc/basic.py +57 -0
- ararpy/calc/corr.py +240 -0
- ararpy/calc/err.py +117 -0
- ararpy/calc/histogram.py +166 -0
- ararpy/calc/isochron.py +194 -0
- ararpy/calc/jvalue.py +38 -0
- ararpy/calc/plot.py +68 -0
- ararpy/calc/raw_funcs.py +118 -0
- ararpy/calc/regression.py +961 -0
- ararpy/calc/spectra.py +63 -0
- ararpy/files/__init__.py +2 -0
- ararpy/files/arr_file.py +86 -0
- ararpy/files/basic.py +100 -0
- ararpy/files/calc_file.py +683 -0
- ararpy/files/export.py +1181 -0
- ararpy/files/json.py +49 -0
- ararpy/files/new_file.py +31 -0
- ararpy/files/raw.py +115 -0
- ararpy/files/raw_file.py +14 -0
- ararpy/files/xls.py +27 -0
- ararpy/smp/__init__.py +17 -0
- ararpy/smp/basic.py +371 -0
- ararpy/smp/calculation.py +94 -0
- ararpy/smp/consts.py +20 -0
- ararpy/smp/corr.py +376 -0
- ararpy/smp/initial.py +232 -0
- ararpy/smp/plots.py +636 -0
- ararpy/smp/sample.py +911 -0
- ararpy/smp/style.py +191 -0
- ararpy/smp/table.py +131 -0
- ararpy-0.0.1a1.dist-info/LICENSE +21 -0
- ararpy-0.0.1a1.dist-info/METADATA +269 -0
- ararpy-0.0.1a1.dist-info/RECORD +39 -0
- ararpy-0.0.1a1.dist-info/WHEEL +5 -0
- ararpy-0.0.1a1.dist-info/top_level.txt +1 -0
ararpy/calc/corr.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
# ==========================================
|
|
5
|
+
# Copyright 2023 Yang
|
|
6
|
+
# ararpy - calc - corr
|
|
7
|
+
# ==========================================
|
|
8
|
+
#
|
|
9
|
+
#
|
|
10
|
+
#
|
|
11
|
+
"""
|
|
12
|
+
import traceback
|
|
13
|
+
|
|
14
|
+
from ..calc import arr, err
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def blank(a0: list, e0: list, a1: list, e1: list):
|
|
19
|
+
"""
|
|
20
|
+
:param a0: a list of tested isotope value
|
|
21
|
+
:param e0: 1 sigma error of a0, list type
|
|
22
|
+
:param a1: a list of blank isotope value
|
|
23
|
+
:param e1: 1 sigma error of a1, list type
|
|
24
|
+
:return: list of corrected data | error list
|
|
25
|
+
"""
|
|
26
|
+
# Do not force negative value to zero in correcting blank...
|
|
27
|
+
return arr.sub((a0, e0), (a1, e1))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def mdf(rm: float, srm: float, m1: float, m2: float, ra: float = 298.56,
|
|
31
|
+
sra: float = 0):
|
|
32
|
+
"""
|
|
33
|
+
:param rm: ratio 40a/36a
|
|
34
|
+
:param srm: error of ratio in one sigma
|
|
35
|
+
:param m1: Ar36 isotopic mass
|
|
36
|
+
:param m2: Ar40 isotopic mass
|
|
37
|
+
:param ra: theoretical 40a/36a
|
|
38
|
+
:param sra: error of theoretical 40a/36a
|
|
39
|
+
:return: linear mdf, error, exp mdf, error, pow mdf, error
|
|
40
|
+
"""
|
|
41
|
+
sm1 = 0
|
|
42
|
+
sm2 = 0
|
|
43
|
+
delta_m = m2 - m1
|
|
44
|
+
sdelta_m = err.add(sm2, sm1)
|
|
45
|
+
ratio_m = m2 / m1
|
|
46
|
+
sratio_m = err.div((m2, sm2), (m1, sm1))
|
|
47
|
+
isAapkop = True
|
|
48
|
+
if isAapkop:
|
|
49
|
+
# line
|
|
50
|
+
k1 = (ra / rm + delta_m - 1) / delta_m # A.A.P.Koppers
|
|
51
|
+
k2 = arr.div(((ra / rm + delta_m - 1), arr.div((ra, sra), (rm, srm))), (delta_m, sdelta_m))
|
|
52
|
+
# exp
|
|
53
|
+
try:
|
|
54
|
+
k3 = (np.log(ra / rm) / np.log(ratio_m)) * (1 / m1) + 1 # A.A.P.Koppers
|
|
55
|
+
v1 = err.log((ra / rm, err.div((ra, sra), (rm, srm))))
|
|
56
|
+
v2 = err.log((ratio_m, sratio_m))
|
|
57
|
+
v3 = err.div((np.log(ra / rm), v1), (np.log(ratio_m), v2))
|
|
58
|
+
k4 = err.div((np.log(ra / rm) / np.log(ratio_m), v3), (m1, sm1))
|
|
59
|
+
except Exception:
|
|
60
|
+
k3, k4 = "Null", "Null"
|
|
61
|
+
# pow
|
|
62
|
+
try:
|
|
63
|
+
k5 = pow((ra / rm), (1 / delta_m)) # A.A.P.Koppers
|
|
64
|
+
k6 = err.pow((ra / rm, err.div((ra, sra), (rm, srm))),
|
|
65
|
+
(1 / delta_m, err.div((1, 0), (delta_m, sdelta_m))))
|
|
66
|
+
except Exception:
|
|
67
|
+
k5, k6 = "Null", "Null"
|
|
68
|
+
return k1, k2, k3, k4, k5, k6
|
|
69
|
+
else:
|
|
70
|
+
mdf_line_2 = (rm / ra - 1) / delta_m # Ryu et al., 2013
|
|
71
|
+
return mdf_line_2, 0, 0, 0, 0, 0
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def discr(a0: list, e0: list, mdf: list, smdf: list, m: list, m40: list,
|
|
75
|
+
isRelative=True, method="l"):
|
|
76
|
+
"""
|
|
77
|
+
:param a0: a list of tested isotope value
|
|
78
|
+
:param e0: 1 sigma error of a0, list type
|
|
79
|
+
:param mdf: mass discrimination factor(MDF), list
|
|
80
|
+
:param smdf: absolute error of MDF, list
|
|
81
|
+
:param m: mass of isotope being corrected
|
|
82
|
+
:param m40: mass of Ar40, default value is defined above
|
|
83
|
+
:param isRelative: errors of params are in a relative format
|
|
84
|
+
:param method: correction method, "l" or "linear", "e" or "exponential", "p" or "power"
|
|
85
|
+
:return: corrected value | error of corrected value
|
|
86
|
+
linear correction, MDF = [(Ar40/Ar36)true / (Ar40/Ar36)measure] * 1 / MD - 1 / MD + 1
|
|
87
|
+
corr = blank_corrected / [MD * MDF - MD +1]
|
|
88
|
+
"""
|
|
89
|
+
r0, r1 = [], []
|
|
90
|
+
if isRelative:
|
|
91
|
+
smdf = [smdf[i] * mdf[i] / 100 for i in range(len(smdf))]
|
|
92
|
+
for i in range(min([len(arg) for arg in [a0, e0, mdf, smdf]])):
|
|
93
|
+
delta_mass = abs(m40[i] - m[i])
|
|
94
|
+
ratio_mass = abs(m40[i] / m[i]) if m[i] != 0 else 1
|
|
95
|
+
if method.lower()[0] == 'l':
|
|
96
|
+
k0 = 1 / (delta_mass * mdf[i] - delta_mass + 1) if (delta_mass * mdf[i] - delta_mass + 1) != 0 else 0
|
|
97
|
+
k1 = err.div((1, 0), (delta_mass * mdf[i] - delta_mass + 1, smdf[i] * delta_mass))
|
|
98
|
+
elif method.lower()[0] == 'e':
|
|
99
|
+
k0 = 1 / (ratio_mass ** (mdf[i] * m40[i] - m[i]))
|
|
100
|
+
k1 = err.div((1, 0), (ratio_mass ** (mdf[i] * m40[i] - m[i]), err.pow((ratio_mass, 0), (
|
|
101
|
+
mdf[i] * m40[i] - m[i], err.mul((mdf[i], smdf[i]), (m40[i], 0))))))
|
|
102
|
+
elif method.lower()[0] == 'p':
|
|
103
|
+
k0 = 1 / (mdf[i] ** delta_mass)
|
|
104
|
+
k1 = err.div((1, 0), (mdf[i] ** delta_mass, err.pow((mdf[i], smdf[i]), (delta_mass, 0))))
|
|
105
|
+
else:
|
|
106
|
+
k0 = 1
|
|
107
|
+
k1 = 0
|
|
108
|
+
r0.append(a0[i] * k0)
|
|
109
|
+
r1.append(err.mul((a0[i], e0[i]), (k0, k1)))
|
|
110
|
+
return [r0, r1]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def decay(a0: list, e0: list, t1: list, t2: list, t3: list, f: list, sf: list,
|
|
114
|
+
unit: str = 'h', isRelative=True):
|
|
115
|
+
r0, r1 = [], []
|
|
116
|
+
if isRelative:
|
|
117
|
+
sf = [sf[i] * f[i] / 100 for i in range(len(sf))]
|
|
118
|
+
for i in range(len(a0)):
|
|
119
|
+
k = get_decay_factor(t1[i], t2[i], t3[i], f[i], sf[i], unit)
|
|
120
|
+
r0.append(a0[i] * k[0])
|
|
121
|
+
r1.append(err.mul((a0[i], e0[i]), (k[0], k[1])))
|
|
122
|
+
return [r0, r1]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def get_decay_factor(t1: list, t2: list, t3: list, f: float, sf: float,
|
|
126
|
+
unit: str = 'h'):
|
|
127
|
+
"""
|
|
128
|
+
:param t1: [year, month, day, hour, min, (second)], test start time
|
|
129
|
+
:param t2: irradiation end time for all cycles, [[year, month, day, hour, min],...]
|
|
130
|
+
:param t3: irradiation durations for all cycles, list for all irradiation cycles, in hour
|
|
131
|
+
:param f: decay constant of K
|
|
132
|
+
:param sf: absolute error of f
|
|
133
|
+
:param unit: unit of decay constant, input 'h' or 'a'
|
|
134
|
+
:return: correction factor | error of factor | stand duration
|
|
135
|
+
"""
|
|
136
|
+
v1 = []
|
|
137
|
+
v2 = []
|
|
138
|
+
e1 = []
|
|
139
|
+
# t_year, t_month, t_day, t_hour, t_min = t1
|
|
140
|
+
t_test_start = get_datetime(*t1) # the time when analysis began
|
|
141
|
+
t2 = [get_datetime(*i) for i in t2] # the time when irradiation ended for all cycles, in second
|
|
142
|
+
k2 = [t_test_start - i for i in t2] # standing time in second between irradiation and analysing
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
if unit == 'h':
|
|
146
|
+
k2 = [float(i) / 3600 for i in k2] # exchange to unit in hour
|
|
147
|
+
t3 = [float(i) for i in t3]
|
|
148
|
+
elif unit == 'a':
|
|
149
|
+
k2 = [i / (3600 * 24 * 365.242) for i in k2] # exchange to unit in year
|
|
150
|
+
t3 = [float(i) / (24 * 365) for i in t3]
|
|
151
|
+
for i in range(len(t3)):
|
|
152
|
+
iP = 1 # power
|
|
153
|
+
v1.append(iP * (1 - np.exp(-f * t3[i])) / (f * np.exp(f * k2[i])))
|
|
154
|
+
e11 = t3[i] * np.exp(-f * t3[i]) / (f * np.exp(f * k2[i]))
|
|
155
|
+
e12 = (np.exp(-f * t3[i]) - 1) * (1 + f * k2[i]) * np.exp(f * k2[i]) / (f * np.exp(f * k2[i])) ** 2
|
|
156
|
+
e1.append(iP * (e11 + e12))
|
|
157
|
+
v2.append(iP * t3[i])
|
|
158
|
+
k0 = sum(v2) / sum(v1)
|
|
159
|
+
k1 = err.div((sum(v2), 0), (sum(v1), pow(sum(e1) ** 2 * sf ** 2, 0.5)))
|
|
160
|
+
# other error calculation equation in CALC
|
|
161
|
+
# It is calculated based on an assumption that only one irradiation exist with total duration of sum of t3,
|
|
162
|
+
# and the end time is the last irradiation finish time
|
|
163
|
+
k1 = pow(
|
|
164
|
+
((sum(t3) * np.exp(f * k2[-1]) * (1 - np.exp(-f * sum(t3))) + f * sum(t3) * k2[-1] * np.exp(f * k2[-1]) * (
|
|
165
|
+
1 - np.exp(-f * sum(t3))) - f * sum(t3) * np.exp(f * k2[-1]) * sum(t3) * np.exp(-f * sum(t3))) / (
|
|
166
|
+
1 - np.exp(-f * sum(t3))) ** 2) ** 2 * sf ** 2, 0.5)
|
|
167
|
+
except Exception as e:
|
|
168
|
+
print(traceback.format_exc())
|
|
169
|
+
return 1, 0
|
|
170
|
+
else:
|
|
171
|
+
return k0, k1
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def get_datetime(t_year: int, t_month: int, t_day: int, t_hour: int, t_min: int,
|
|
175
|
+
t_seconds: int = 0, base=None):
|
|
176
|
+
"""
|
|
177
|
+
:param t_year: int
|
|
178
|
+
:param t_month: int
|
|
179
|
+
:param t_day: int
|
|
180
|
+
:param t_hour: int
|
|
181
|
+
:param t_min: int
|
|
182
|
+
:param t_seconds: int, default == 0
|
|
183
|
+
:param base: base time [y, m, d, h, m]
|
|
184
|
+
:return: seconds since 1970-1-1 8:00
|
|
185
|
+
"""
|
|
186
|
+
t_year, t_month, t_day, t_hour, t_min, t_seconds = \
|
|
187
|
+
int(t_year), int(t_month), int(t_day), int(t_hour), int(t_min), int(t_seconds)
|
|
188
|
+
if base is None:
|
|
189
|
+
base = [1970, 1, 1, 8, 0]
|
|
190
|
+
base_year, base_mouth, base_day, base_hour, base_min = base
|
|
191
|
+
if t_year % 4 == 0 and t_year % 100 != 0 or t_year % 400 == 0:
|
|
192
|
+
days = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
|
193
|
+
else:
|
|
194
|
+
days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
|
195
|
+
delta_seconds = ((((t_year - base_year) * 365 + ((t_year + 1 - base_year) - (t_year + 1 - base_year) % 4) / 4 +
|
|
196
|
+
sum(days[base_mouth - 1:t_month - 1]) + t_day - base_day) * 24 + t_hour - base_hour) * 60 +
|
|
197
|
+
t_min - base_min) * 60 + t_seconds
|
|
198
|
+
return delta_seconds
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def get_irradiation_datetime_by_string(datetime_str: str):
|
|
202
|
+
"""
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
datetime_str : datatime string, like "2022-04-19-18-35D13.45S2022-04-19-04-19D6.7"
|
|
206
|
+
return [2022-04-19T18:35:13.45, 2022-04-19T04:19:6.7]
|
|
207
|
+
|
|
208
|
+
Returns
|
|
209
|
+
-------
|
|
210
|
+
|
|
211
|
+
"""
|
|
212
|
+
res = []
|
|
213
|
+
if datetime_str == '' or datetime_str is None:
|
|
214
|
+
return ['', 0]
|
|
215
|
+
cycles = datetime_str.split('S')
|
|
216
|
+
for cycle in cycles:
|
|
217
|
+
[dt, hrs] = cycle.split('D')
|
|
218
|
+
[d, t1, t2] = dt.rsplit('-', 2)
|
|
219
|
+
res = res + [d+'T'+t1+':'+t2, float(hrs)]
|
|
220
|
+
return res
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def get_method_fitting_law_by_name(method_str: str):
|
|
224
|
+
"""
|
|
225
|
+
Parameters
|
|
226
|
+
----------
|
|
227
|
+
method_str
|
|
228
|
+
|
|
229
|
+
Returns
|
|
230
|
+
-------
|
|
231
|
+
|
|
232
|
+
"""
|
|
233
|
+
res = [False] * 3
|
|
234
|
+
try:
|
|
235
|
+
res[['Linear', 'Exponential', 'Power'].index(method_str.capitalize())] = True
|
|
236
|
+
except ValueError:
|
|
237
|
+
res[0] = True
|
|
238
|
+
return res
|
|
239
|
+
|
|
240
|
+
|
ararpy/calc/err.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
# ==========================================
|
|
5
|
+
# Copyright 2023 Yang
|
|
6
|
+
# ararpy - calc - err.py
|
|
7
|
+
# ==========================================
|
|
8
|
+
#
|
|
9
|
+
# This package contains error propagation functions
|
|
10
|
+
#
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def add(*args: float) -> float:
|
|
18
|
+
"""
|
|
19
|
+
For Y = X1 +/- X2 +/- ... +/- Xn
|
|
20
|
+
Args:
|
|
21
|
+
*args:errors in 1 sigma
|
|
22
|
+
|
|
23
|
+
Returns: float, propagated error
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
k = sum([i ** 2 for i in args]) ** .5
|
|
27
|
+
return k
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def mul(*args: tuple) -> float:
|
|
31
|
+
"""
|
|
32
|
+
For Y = X1 * X2 * ... * Xn
|
|
33
|
+
Args:
|
|
34
|
+
*args: tuple, (v1, s1), (v2, s2), ...
|
|
35
|
+
|
|
36
|
+
Returns: float, propagated error
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
k = abs(np.prod([arg[0] for arg in args])) * sum([np.divide(arg[1] ** 2, arg[0] ** 2) for arg in args]) ** .5
|
|
40
|
+
return k
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def rec(a: tuple) -> float:
|
|
44
|
+
"""
|
|
45
|
+
For Y = 1 / X
|
|
46
|
+
Args:
|
|
47
|
+
*a: tuple, (value, error)
|
|
48
|
+
|
|
49
|
+
Returns: float, propagated error
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
if isinstance(a, pd.Series):
|
|
53
|
+
a = a.values.tolist()
|
|
54
|
+
k = np.divide(abs(a[1]), a[0] ** 2)
|
|
55
|
+
return k
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def div(*args: tuple) -> float:
|
|
59
|
+
"""
|
|
60
|
+
For Y = X1 / X2 / ... / Xn
|
|
61
|
+
Args:
|
|
62
|
+
*args: float, (v1, s1), (v2, s2), ...
|
|
63
|
+
|
|
64
|
+
Returns: float, propagated error
|
|
65
|
+
|
|
66
|
+
"""
|
|
67
|
+
args = [arg if index == 0 else (np.divide(1, arg[0]), div(arg)) for index, arg in enumerate(args)]
|
|
68
|
+
k = mul(*args)
|
|
69
|
+
return k
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def pow(a0: tuple, a1: tuple):
|
|
73
|
+
"""
|
|
74
|
+
For y = pow(a0, a1), y = a0 ^ a1
|
|
75
|
+
Args:
|
|
76
|
+
a0: tuple, (value, error)
|
|
77
|
+
a1: tuple, (value, error)
|
|
78
|
+
|
|
79
|
+
Returns: float, propagated error
|
|
80
|
+
|
|
81
|
+
"""
|
|
82
|
+
p1 = a0[1] ** 2 * (a1[0] * a0[0] ** (a1[0] - 1)) ** 2
|
|
83
|
+
p2 = a1[1] ** 2 * (a0[0] ** a1[0] * np.log(a0[0])) ** 2
|
|
84
|
+
k = (p1 + p2) ** .5
|
|
85
|
+
return k
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def log(a: tuple) -> float:
|
|
89
|
+
"""
|
|
90
|
+
For y = ln(a)
|
|
91
|
+
Args:
|
|
92
|
+
a: tuple, (value, error)
|
|
93
|
+
|
|
94
|
+
Returns: float, propagated error
|
|
95
|
+
|
|
96
|
+
"""
|
|
97
|
+
k = np.divide(abs(a[1]), abs(a[0]))
|
|
98
|
+
return k
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def cor(sX: float, sY: float, sZ: float):
|
|
102
|
+
"""
|
|
103
|
+
Calculate correlation coefficient of errors.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
sX : relative error of X, where X/Z vs. Y/Z
|
|
108
|
+
sY : relative error of Y
|
|
109
|
+
sZ : relative error of Z
|
|
110
|
+
|
|
111
|
+
Returns
|
|
112
|
+
-------
|
|
113
|
+
"""
|
|
114
|
+
if sZ == 0:
|
|
115
|
+
return np.nan
|
|
116
|
+
k = np.divide(sZ ** 2, ((sZ ** 2 + sX ** 2) * (sZ ** 2 + sY ** 2)) ** .5)
|
|
117
|
+
return k
|
ararpy/calc/histogram.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
# ==========================================
|
|
5
|
+
# Copyright 2023 Yang
|
|
6
|
+
# ararpy - calc - histogram
|
|
7
|
+
# ==========================================
|
|
8
|
+
#
|
|
9
|
+
#
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_data(x: list, s: float = None, r: str = 'sturges', w: float = None, c: int = None):
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
x : input data to yield bins
|
|
22
|
+
s : starting point
|
|
23
|
+
r : rules, string
|
|
24
|
+
w : bin width or interval, float for specific number
|
|
25
|
+
c : bin count
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
tuple. counts: [number of points in each bin], bins: [half bins], s, r, w, c, e, res: [values in each bin]
|
|
30
|
+
"""
|
|
31
|
+
if len(x) == 0 or max(x) == min(x):
|
|
32
|
+
return None
|
|
33
|
+
else:
|
|
34
|
+
x = [round(xi, 2) for xi in x]
|
|
35
|
+
if isinstance(r, str) and r.lower().find('square-root') != -1:
|
|
36
|
+
# Square-root choice, used by Excel's Analysis Toolpak histograms and many other
|
|
37
|
+
c = np.ceil(len(x) ** (1. / 2.))
|
|
38
|
+
if isinstance(r, str) and r.lower().find('sturges') != -1:
|
|
39
|
+
# Sturges' formula, Ceiling(log2n)
|
|
40
|
+
c = np.ceil(np.log2(len(x))) + 1
|
|
41
|
+
if isinstance(r, str) and r.lower().find('rice') != -1:
|
|
42
|
+
# Rice Rule
|
|
43
|
+
c = np.ceil(2 * len(x) ** (1. / 3.))
|
|
44
|
+
if isinstance(r, str) and r.lower().find('scott') != -1:
|
|
45
|
+
# Scott's normal reference rule, optimal for random samples of normally distributed data
|
|
46
|
+
w = np.ceil(3.49 * (sum([float(i - sum(x) / len(x)) ** 2 for i in x]) / len(x)) ** (1. / 2.) / len(x) ** (1. / 3.))
|
|
47
|
+
d = 0.5 * 10 ** (int(np.log(w)) - 1)
|
|
48
|
+
if w is None and c is None:
|
|
49
|
+
return get_data(x=x, s=s, w=w, c=c, r='sturges')
|
|
50
|
+
|
|
51
|
+
if s is None and isinstance(c, (int, np.float64)):
|
|
52
|
+
d = 0.5 * 10 ** int(np.log(abs(max(x) - min(x)) / c))
|
|
53
|
+
if s is None:
|
|
54
|
+
d = d if min(x) - d >= 0 else min(x)
|
|
55
|
+
s = round(min(x) - d, 2)
|
|
56
|
+
e = round(max(x) + d, 2)
|
|
57
|
+
else:
|
|
58
|
+
e = round(max(x) + min(x) - s, 2)
|
|
59
|
+
if isinstance(c, (int, np.float64)) and not isinstance(w, (int, float, np.float64)):
|
|
60
|
+
w = round(abs(e - s) / c, 2)
|
|
61
|
+
bins = [s + i * w for i in range(10000) if s + (i - 1) * w <= max(x)]
|
|
62
|
+
bins = list(set(bins))
|
|
63
|
+
bins.sort()
|
|
64
|
+
c = len(bins) - 1
|
|
65
|
+
counts = [0] * c
|
|
66
|
+
res = [[]] * c
|
|
67
|
+
half_bins = [round((bins[i] + bins[i + 1]) / 2, 2) for i in range(c)]
|
|
68
|
+
bin_ranges = [[]] * c
|
|
69
|
+
for i in range(c):
|
|
70
|
+
bin_ranges[i] = [round(bins[i], 2), round(bins[i + 1], 2)]
|
|
71
|
+
for xi in x:
|
|
72
|
+
if bins[i] <= xi < bins[i + 1] or bins[i] <= xi <= bins[i + 1] and i == c - 1:
|
|
73
|
+
counts[i] += 1
|
|
74
|
+
res[i].append(xi)
|
|
75
|
+
|
|
76
|
+
return counts, half_bins, bin_ranges, s, r, w, c, bins[-1], res
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_kde(x: list, h: (float, int) = None, a: str = None, k: str = 'normal',
|
|
80
|
+
s: (float, int) = None, e: (float, int) = None, n: int = 200):
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
x : input data
|
|
86
|
+
h : bandwidth
|
|
87
|
+
a : auto width rule
|
|
88
|
+
k : kernel function name, default is normal, standard normal density function
|
|
89
|
+
s : KDE curve starting point
|
|
90
|
+
e : KDE curve ending point
|
|
91
|
+
n : points number of KDE line
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
|
|
96
|
+
"""
|
|
97
|
+
x = [round(xi, 2) for xi in x]
|
|
98
|
+
x.sort()
|
|
99
|
+
|
|
100
|
+
def get_uniform_x(_x: list, _s=s, _e=e, _np=n):
|
|
101
|
+
_s = min(_x) if _s is None else _s
|
|
102
|
+
_e = max(_x) if _e is None else _e
|
|
103
|
+
_line_x = []
|
|
104
|
+
_step = abs(_e - _s) / _np
|
|
105
|
+
_line_x = [_s + i * _step for i in range(_np)] + [_e]
|
|
106
|
+
_line_x.sort()
|
|
107
|
+
return _line_x
|
|
108
|
+
|
|
109
|
+
def h_scott(_x, _se): # Scott, 1992
|
|
110
|
+
return 1.06 * _se * len(_x) ** (-1. / 5.)
|
|
111
|
+
|
|
112
|
+
def h_silverman(_x, _se): # Silverman, 1986
|
|
113
|
+
return 0.9 * min(_se, (_x[int(3 / 4 * len(_x))] - _x[int(1 / 4 * len(_x))]) / 1.34) * len(_x) ** (-1. / 5.)
|
|
114
|
+
|
|
115
|
+
# Normal function
|
|
116
|
+
def k_normal(_xi, _u=0, _se=1, _h=h):
|
|
117
|
+
return 1 / (_se * np.sqrt(2 * np.pi)) * (np.exp(-1. / 2. * ((_xi - _u) / (_h * _se)) ** 2))
|
|
118
|
+
|
|
119
|
+
def k_epanechnikov(_xi, _u=0, _h=h):
|
|
120
|
+
_xi = (_xi - _u) / _h
|
|
121
|
+
return 3 / 4 * (1 - _xi ** 2) if abs(_xi) <= 1 else 0
|
|
122
|
+
|
|
123
|
+
def k_uniform(_xi, _u=0, _h=h):
|
|
124
|
+
_xi = (_xi - _u) / _h
|
|
125
|
+
return 1 / 2 if abs(_xi) <= 1 else 0
|
|
126
|
+
|
|
127
|
+
def k_triangular(_xi, _u=0, _h=h):
|
|
128
|
+
_xi = (_xi - _u) / _h
|
|
129
|
+
return 1 - abs(_xi) if abs(_xi) <= 1 else 0
|
|
130
|
+
|
|
131
|
+
mean_x = sum(x) / len(x)
|
|
132
|
+
se = np.sqrt(sum([(xi - mean_x) ** 2 for xi in x]) / (len(x) - 1))
|
|
133
|
+
|
|
134
|
+
if (a is None or a == 'none') and (h is None or h <= 0):
|
|
135
|
+
# Default rule for h is Scott's rule
|
|
136
|
+
a = 'Scott'
|
|
137
|
+
if isinstance(a, str):
|
|
138
|
+
if a.lower() == 'scott':
|
|
139
|
+
h = h_scott(x, se)
|
|
140
|
+
elif a.lower() == 'silverman':
|
|
141
|
+
h = h_silverman(x, se)
|
|
142
|
+
else:
|
|
143
|
+
a = 'none'
|
|
144
|
+
else:
|
|
145
|
+
a = 'none'
|
|
146
|
+
|
|
147
|
+
# Get points that are evenly distributed over the range (min_x, max_x) to get a KDE curve
|
|
148
|
+
line_x = get_uniform_x(_x=x, _s=s, _e=e)
|
|
149
|
+
|
|
150
|
+
if k.lower() == 'normal':
|
|
151
|
+
k_normal_res = [[k_normal(_xi, _u=xi, _h=h) for _xi in line_x] for xi in x]
|
|
152
|
+
elif k.lower() == 'epanechnikov':
|
|
153
|
+
k_normal_res = [[k_epanechnikov(_xi, _u=xi, _h=h) for _xi in line_x] for xi in x]
|
|
154
|
+
elif k.lower() == 'uniform':
|
|
155
|
+
k_normal_res = [[k_uniform(_xi, _u=xi, _h=h) for _xi in line_x] for xi in x]
|
|
156
|
+
elif k.lower() == 'triangular':
|
|
157
|
+
k_normal_res = [[k_triangular(_xi, _u=xi, _h=h) for _xi in line_x] for xi in x]
|
|
158
|
+
else:
|
|
159
|
+
return get_kde(x=x, h=h, a=a, k='normal', s=s, e=e, n=n)
|
|
160
|
+
|
|
161
|
+
res = [
|
|
162
|
+
sum([k_normal_res[j][i] for j in range(len(x))]) / (len(x) * h) for i in range(len(line_x))
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
return [[line_x, res], h, k, a]
|
|
166
|
+
|