ararpy 0.0.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ararpy/__init__.py +178 -0
- ararpy/calc/__init__.py +11 -0
- ararpy/calc/age.py +161 -0
- ararpy/calc/arr.py +490 -0
- ararpy/calc/basic.py +57 -0
- ararpy/calc/corr.py +240 -0
- ararpy/calc/err.py +117 -0
- ararpy/calc/histogram.py +166 -0
- ararpy/calc/isochron.py +194 -0
- ararpy/calc/jvalue.py +38 -0
- ararpy/calc/plot.py +68 -0
- ararpy/calc/raw_funcs.py +118 -0
- ararpy/calc/regression.py +961 -0
- ararpy/calc/spectra.py +63 -0
- ararpy/files/__init__.py +2 -0
- ararpy/files/arr_file.py +86 -0
- ararpy/files/basic.py +100 -0
- ararpy/files/calc_file.py +683 -0
- ararpy/files/export.py +1181 -0
- ararpy/files/json.py +49 -0
- ararpy/files/new_file.py +31 -0
- ararpy/files/raw.py +115 -0
- ararpy/files/raw_file.py +14 -0
- ararpy/files/xls.py +27 -0
- ararpy/smp/__init__.py +17 -0
- ararpy/smp/basic.py +371 -0
- ararpy/smp/calculation.py +94 -0
- ararpy/smp/consts.py +20 -0
- ararpy/smp/corr.py +376 -0
- ararpy/smp/initial.py +232 -0
- ararpy/smp/plots.py +636 -0
- ararpy/smp/sample.py +911 -0
- ararpy/smp/style.py +191 -0
- ararpy/smp/table.py +131 -0
- ararpy-0.0.1a1.dist-info/LICENSE +21 -0
- ararpy-0.0.1a1.dist-info/METADATA +269 -0
- ararpy-0.0.1a1.dist-info/RECORD +39 -0
- ararpy-0.0.1a1.dist-info/WHEEL +5 -0
- ararpy-0.0.1a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,961 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
# ==========================================
|
|
5
|
+
# Copyright 2023 Yang
|
|
6
|
+
# ararpy - calc - regression
|
|
7
|
+
# ==========================================
|
|
8
|
+
#
|
|
9
|
+
# Regression functions
|
|
10
|
+
#
|
|
11
|
+
"""
|
|
12
|
+
from ..calc import arr
|
|
13
|
+
|
|
14
|
+
# === External import ===
|
|
15
|
+
import traceback
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
from scipy.stats import distributions
|
|
19
|
+
from scipy.optimize import fsolve
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def york2(x: list, sx: list, y: list, sy: list, ri: list, f: int = 1,
|
|
23
|
+
convergence: float = 0.001, iteration: int = 100):
|
|
24
|
+
"""
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
x : isochron x-axis
|
|
28
|
+
sx : standard error of x
|
|
29
|
+
y : isochron y-axis, y = b + m * x
|
|
30
|
+
sy : standard error of y
|
|
31
|
+
ri : error correlation coefficient of errors of x and y
|
|
32
|
+
f : factor of errors, default 1
|
|
33
|
+
convergence: float. Convergence tolerate, default 0.001
|
|
34
|
+
iteration: int. Number of iteration, default 100
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
Intercept | Error | slope | Error | MSWD | Convergence | Number of Iterations | error magnification | other
|
|
39
|
+
b, sb, a, sa, mswd, dF, Di, k, r2, chi_square, p_value
|
|
40
|
+
b, seb, m, sem, mswd, abs(m - last_m), Di, k, r2, chi_square, p_value, avg_err_s
|
|
41
|
+
"""
|
|
42
|
+
data = np.array([x, sx, y, sy, ri])
|
|
43
|
+
data = data[:, np.where(
|
|
44
|
+
np.logical_or(data == np.inf, pd.isna(data)), False, True).all(axis=0)].astype(np.float64)
|
|
45
|
+
x, sx, y, sy, ri = data
|
|
46
|
+
n = data.shape[-1]
|
|
47
|
+
X, sX, Y, sY, R = data
|
|
48
|
+
# change to 1 sigma
|
|
49
|
+
if np.issubdtype(type(f), np.integer) and f > 1:
|
|
50
|
+
sX, sY = np.divide([sX, sY], f)
|
|
51
|
+
# weights of x and y
|
|
52
|
+
wX = 1 / sX ** 2
|
|
53
|
+
wY = 1 / sY ** 2
|
|
54
|
+
# weight of S
|
|
55
|
+
Z = lambda m, b: wX * wY / (m ** 2 * wY + wX - 2 * m * R * (wX * wY) ** .5)
|
|
56
|
+
# weighted mean of X and Y
|
|
57
|
+
mX = lambda m, b: sum(Z(m, b) * X) / sum(Z(m, b))
|
|
58
|
+
mY = lambda m, b: sum(Z(m, b) * Y) / sum(Z(m, b))
|
|
59
|
+
# Equation to minimize
|
|
60
|
+
S = lambda m, b: sum(Z(m, b) * (Y - m * X - b) ** 2)
|
|
61
|
+
# Slope by OLS is used as the initial values in weights calculation
|
|
62
|
+
temp_lst = linest(Y, X)
|
|
63
|
+
if not temp_lst:
|
|
64
|
+
return False
|
|
65
|
+
b, seb, m, sem = temp_lst[0], temp_lst[1], temp_lst[5][0], temp_lst[6][0]
|
|
66
|
+
b = mY(m, b) - m * mX(m, b)
|
|
67
|
+
last_m = 1e10
|
|
68
|
+
Di = 0 # Iteration number
|
|
69
|
+
mswd, k = 1, 1 # Initial return values
|
|
70
|
+
while abs(m - last_m) >= abs(m * convergence / 100):
|
|
71
|
+
last_m = m
|
|
72
|
+
U = X - mX(m, b)
|
|
73
|
+
V = Y - mY(m, b)
|
|
74
|
+
# Expression from York 2004, which differs to York 1969
|
|
75
|
+
Up = Z(m, b) ** 2 * V * (U / wY + m * V / wX - R * (V + m * U) / (wX * wY) ** .5)
|
|
76
|
+
Lo = Z(m, b) ** 2 * U * (U / wY + m * V / wX - R * (V + m * U) / (wX * wY) ** .5)
|
|
77
|
+
m = sum(Up) / sum(Lo) # New slope
|
|
78
|
+
b = mY(m, b) - m * mX(m, b) # From York 2004, calculate b again after final value of m has been obtained
|
|
79
|
+
sumUUZ = sum(U * U * Z(m, b))
|
|
80
|
+
sumXXZ = sum(X * X * Z(m, b))
|
|
81
|
+
sem = 1 / sumUUZ ** .5
|
|
82
|
+
seb = (sumXXZ / sum(Z(m, b))) ** .5 * sem
|
|
83
|
+
mswd = S(m, b) / (n - 2)
|
|
84
|
+
# print(f"York 2004 regression, m = {m}, b = {b}, S = {S(m, b)}, Di = {Di}")
|
|
85
|
+
if mswd > 1:
|
|
86
|
+
k = mswd ** .5 # k为误差放大系数
|
|
87
|
+
else:
|
|
88
|
+
k = 1
|
|
89
|
+
|
|
90
|
+
sem = sem * k
|
|
91
|
+
seb = seb * k
|
|
92
|
+
|
|
93
|
+
Di = Di + 1
|
|
94
|
+
if Di >= iteration:
|
|
95
|
+
break
|
|
96
|
+
|
|
97
|
+
# Calculate Y values base on the regression results
|
|
98
|
+
estimate_y = b + m * X
|
|
99
|
+
resid = (estimate_y - Y) ** 2
|
|
100
|
+
reg = (estimate_y - np.mean(estimate_y)) ** 2
|
|
101
|
+
ssresid = sum(resid) # residual sum of squares / sum squared residual
|
|
102
|
+
ssreg = sum(reg) # regression sum of square
|
|
103
|
+
sstotal = ssreg + ssresid # total sum of squares
|
|
104
|
+
r2 = ssreg / sstotal if sstotal != 0 else np.inf # r2 = ssreg / sstotal
|
|
105
|
+
chi_square = mswd * (n - 2)
|
|
106
|
+
p_value = distributions.chi2.sf(chi_square, n - 2)
|
|
107
|
+
# average error of S
|
|
108
|
+
err_s = lambda m, b: list(map(lambda Zi, Yi, Xi: (1 / Zi) ** 1./2. / abs(Yi - m * Xi - b), Z(m, b), y, x))
|
|
109
|
+
avg_err_s = sum(err_s(m, b)) / len(x) * 100
|
|
110
|
+
|
|
111
|
+
# print('----------------------------------------------------------------')
|
|
112
|
+
# print('截距>>>' + str(b) + ' ' + '误差>>>' + str(seb))
|
|
113
|
+
# print('斜率>>>' + str(m) + ' ' + '误差>>>' + str(sem))
|
|
114
|
+
# print('Absolute Convergence' + '>>>' + str(abs(m - last_m)))
|
|
115
|
+
# print('Number of Iterations' + '>>>' + str(Di))
|
|
116
|
+
# print('MSWD' + '>>>' + str(mswd))
|
|
117
|
+
# print('Error Magnification>>>' + str(k))
|
|
118
|
+
# print('----------------------------------------------------------------')
|
|
119
|
+
|
|
120
|
+
# keys = [
|
|
121
|
+
# k, sk, m, sm, mswd, conv, iter, mag, r2, chisq, p, avg_err
|
|
122
|
+
# ]
|
|
123
|
+
|
|
124
|
+
return b, seb, m, sem, mswd, abs(m - last_m), Di, k, r2, chi_square, p_value, avg_err_s
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def york2_df(data: pd.DataFrame, f: int = 1, convergence: float = 0.001,
|
|
128
|
+
iteration: int = 100) -> pd.DataFrame:
|
|
129
|
+
"""
|
|
130
|
+
DataFrame format input and output
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
data
|
|
134
|
+
f
|
|
135
|
+
convergence
|
|
136
|
+
iteration
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
data frame with keys [
|
|
141
|
+
'k', 'sk', 'm1', 'sm1',
|
|
142
|
+
'iter', 'conv', 'rs', 'MSWD', 'R2', 'Chisq', 'Pvalue', # 'rs' means relative error of the total sum
|
|
143
|
+
'mag', 'abs_conv', # 'mag' is error magnification factor, 'abs_conv' is absolute convergence at the last time
|
|
144
|
+
]
|
|
145
|
+
Intercept | Error | slope | Error | MSWD | Convergence | Number of Iterations | error magnification | other
|
|
146
|
+
b, sb, a, sa, mswd, dF, Di, k, r2, chi_square, p_value
|
|
147
|
+
"""
|
|
148
|
+
# remove nan, inf from data
|
|
149
|
+
data: pd.DataFrame = data.replace([-np.inf], np.nan).dropna(axis=0)
|
|
150
|
+
data: list = data.transpose().values.tolist()[:5]
|
|
151
|
+
|
|
152
|
+
columns = [
|
|
153
|
+
'k', 'sk', 'm1', 'sm1',
|
|
154
|
+
'MSWD', 'abs_conv', 'iter', 'mag', 'R2', 'Chisq', 'Pvalue', # 'rs' means relative error of the total sum
|
|
155
|
+
'rs'
|
|
156
|
+
]
|
|
157
|
+
values = york2(*data, f=f, convergence=convergence, iteration=iteration)
|
|
158
|
+
res = pd.DataFrame([list(values)], columns=columns)
|
|
159
|
+
|
|
160
|
+
return res
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def wtd_3D_regression(x: list, sx: list, y: list, sy: list, z: list, sz: list, r1: list,
|
|
164
|
+
r2: list, r3: list, f: int = 1, convergence: float = 0.001,
|
|
165
|
+
iteration: int = 100):
|
|
166
|
+
"""
|
|
167
|
+
Error weighted regression for 3D plots
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
x
|
|
171
|
+
sx
|
|
172
|
+
y
|
|
173
|
+
sy
|
|
174
|
+
z
|
|
175
|
+
sz
|
|
176
|
+
r1 : error correlation between x and y
|
|
177
|
+
r2 : error correlation between x and z
|
|
178
|
+
r3 : error correlation between y and z
|
|
179
|
+
f : factor of errors, default 1.
|
|
180
|
+
convergence : convergence tolerate in percentage, default 0.001 indicating 0.001%
|
|
181
|
+
iteration : number of iteration, default 100
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
c (interceept), sc, a, sa, b, sb, S, mswd, r2, abs(a - last_a), Di, k # length == 12
|
|
186
|
+
"""
|
|
187
|
+
n = np.shape([x, sx, y, sy, z, sz, r1, r2, r3])[-1]
|
|
188
|
+
x, sx, y, sy, z, sz, r1, r2, r3 = np.array([x, sx, y, sy, z, sz, r1, r2, r3])
|
|
189
|
+
# change to 1 sigma
|
|
190
|
+
if np.issubdtype(type(f), np.integer) and f > 1:
|
|
191
|
+
sx, sy, sz = np.divide([sx, sy, sz], f)
|
|
192
|
+
if n <= 3:
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
Di = 0
|
|
196
|
+
|
|
197
|
+
# Weights of S
|
|
198
|
+
W = lambda a, b: 1 / (
|
|
199
|
+
a ** 2 * sx ** 2 + b ** 2 * sy ** 2 + sz ** 2 + 2 * a * b * r1 * sx * sy -
|
|
200
|
+
2 * a * r2 * sx * sz - 2 * b * r3 * sy * sz)
|
|
201
|
+
# Weighted mean values of X, Y, and Z, respectively
|
|
202
|
+
mX = lambda a, b: sum(W(a, b) * x) / sum(W(a, b))
|
|
203
|
+
mY = lambda a, b: sum(W(a, b) * y) / sum(W(a, b))
|
|
204
|
+
mZ = lambda a, b: sum(W(a, b) * z) / sum(W(a, b))
|
|
205
|
+
# Minimizing this equation
|
|
206
|
+
S = lambda a, b, c: sum(W(a, b) * (a * x + b * y + c - z) ** 2)
|
|
207
|
+
# Calculate new c based on iterated a and b
|
|
208
|
+
new_c = lambda a, b: mZ(a, b) - a * mX(a, b) - b * mY(a, b)
|
|
209
|
+
# Initial values of a, b, and c from OLS
|
|
210
|
+
linest_res = linest(z, x, y)
|
|
211
|
+
c, sc, k2, k3, k4, [a, b], [sa, sb] = linest_res[0:7]
|
|
212
|
+
c = new_c(a, b)
|
|
213
|
+
k = 1 # Error magnification factor
|
|
214
|
+
last_a = 1e10
|
|
215
|
+
mswd, f = 1000, 0
|
|
216
|
+
# print(f"初始值:a = {a}, b = {b}, c = {c}")
|
|
217
|
+
# ar38ar36 = 0.1885
|
|
218
|
+
# ar40ar36 = (a + b * ar38ar36) * -1 / c
|
|
219
|
+
# print(f"Ar38/Ar36 = {ar38ar36}, Ar40/Ar36 = {ar40ar36}, S = {S(a, b, c)}")
|
|
220
|
+
while abs(a - last_a) >= abs(a * convergence / 100):
|
|
221
|
+
last_a = a
|
|
222
|
+
U = x - mX(a, b)
|
|
223
|
+
V = y - mY(a, b)
|
|
224
|
+
G = z - mZ(a, b)
|
|
225
|
+
# P and Q are Xi - mX and Yi - mY, respectively. These values are obtained by weighted Orthogonal regression
|
|
226
|
+
P = W(a, b) * ((a * sx ** 2 + b * r1 * sx * sy - r2 * sx * sz) * (G - b * V) + (
|
|
227
|
+
a * b * r1 * sx * sy + b ** 2 * sy ** 2 - a * r2 * sx * sz - 2 * b * r3 * sy * sz + sz ** 2) * U)
|
|
228
|
+
Q = W(a, b) * ((b * sy ** 2 + a * r1 * sx * sy - r3 * sy * sz) * (G - a * U) + (
|
|
229
|
+
a * b * r1 * sx * sy + a ** 2 * sx ** 2 - b * r3 * sy * sz - 2 * a * r2 * sx * sz + sz ** 2) * V)
|
|
230
|
+
a_Up = sum(W(a, b) * P * G).sum() * sum(W(a, b) * Q * V).sum() - \
|
|
231
|
+
sum(W(a, b) * P * V).sum() * sum(W(a, b) * Q * G)
|
|
232
|
+
a_Lo = sum(W(a, b) * P * U).sum() * sum(W(a, b) * Q * V).sum() - \
|
|
233
|
+
sum(W(a, b) * P * V).sum() * sum(W(a, b) * Q * U)
|
|
234
|
+
new_a = a_Up / a_Lo
|
|
235
|
+
b_Up = sum(W(a, b) * Q * G) * sum(W(a, b) * P * U) - sum(W(a, b) * P * G) * sum(W(a, b) * Q * U)
|
|
236
|
+
b_Lo = sum(W(a, b) * P * U) * sum(W(a, b) * Q * V) - sum(W(a, b) * P * V) * sum(W(a, b) * Q * U)
|
|
237
|
+
new_b = b_Up / b_Lo
|
|
238
|
+
|
|
239
|
+
# Standard errors
|
|
240
|
+
mU = sum(W(a, b) * U) / sum(W(a, b))
|
|
241
|
+
mV = sum(W(a, b) * V) / sum(W(a, b))
|
|
242
|
+
mP = sum(W(a, b) * P) / sum(W(a, b))
|
|
243
|
+
mQ = sum(W(a, b) * Q) / sum(W(a, b))
|
|
244
|
+
|
|
245
|
+
D_PU = W(a, b) * (a * b * r1 * sx * sy + b ** 2 * sy ** 2 - a * r2 * sx * sz - 2 * b * r3 * sy * sz + sz ** 2)
|
|
246
|
+
D_QU = -1 * a * W(a, b) * (b * sy ** 2 + a * r1 * sx * sy - r3 * sy * sz)
|
|
247
|
+
D_PV = -1 * b * W(a, b) * (a * sx ** 2 + b * r1 * sx * sy - r2 * sx * sz)
|
|
248
|
+
D_QV = W(a, b) * (a * b * r1 * sx * sy + a ** 2 * sx ** 2 - b * r3 * sy * sz - 2 * a * r2 * sx * sz + sz ** 2)
|
|
249
|
+
D_PG = W(a, b) * (a * sx ** 2 + b * r1 * sx * sy - r2 * sx * sz)
|
|
250
|
+
D_QG = W(a, b) * (b * sy ** 2 + a * r1 * sx * sy - r3 * sy * sz)
|
|
251
|
+
D_UX = D_VY = D_GZ = 1 - W(a, b) / sum(W(a, b))
|
|
252
|
+
D_Wa = -1 * W(a, b) ** 2 * (2 * a * sx ** 2 + 2 * b * r1 * sx * sy - 2 * r2 * sx * sz)
|
|
253
|
+
D_Wb = -1 * W(a, b) ** 2 * (2 * b * sy ** 2 + 2 * a * r1 * sx * sy - 2 * r3 * sy * sz)
|
|
254
|
+
|
|
255
|
+
D_aX = W(a, b) * D_UX * (a * (sum(W(a, b) * P * U) * V * D_QU + sum(W(a, b) * Q * V) * (
|
|
256
|
+
U * D_PU + P) - sum(W(a, b) * Q * U) * V * D_PU - sum(W(a, b) * P * V) * (U * D_QU + Q)) -
|
|
257
|
+
(sum(W(a, b) * P * G) * V * D_QU + sum(W(a, b) * Q * V) * G * D_PU) +
|
|
258
|
+
(sum(W(a, b) * Q * G) * V * D_PU + sum(W(a, b) * P * V) * G * D_QU))
|
|
259
|
+
|
|
260
|
+
D_aY = W(a, b) * D_VY * (a * (sum(W(a, b) * P * U) * (Q + V * D_QV) + sum(W(a, b) * Q * V) * (
|
|
261
|
+
U * D_PV) - sum(W(a, b) * Q * U) * (P + V * D_PV) - sum(W(a, b) * P * V) * (U * D_QV)) -
|
|
262
|
+
(sum(W(a, b) * P * G) * (Q + V * D_QV) + sum(W(a, b) * Q * V) * G * D_PV) +
|
|
263
|
+
(sum(W(a, b) * Q * G) * (P + V * D_PV) + sum(W(a, b) * P * V) * G * D_QV))
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
D_aZ = W(a, b) * D_GZ * (a * (sum(W(a, b) * P * U) * (V * D_QG) + sum(W(a, b) * Q * V) * (
|
|
267
|
+
U * D_PG) - sum(W(a, b) * Q * U) * (V * D_PG) - sum(W(a, b) * P * V) * (U * D_QG)) -
|
|
268
|
+
(sum(W(a, b) * P * G) * (V * D_QG) + sum(W(a, b) * Q * V) * (P + G * D_PG)) +
|
|
269
|
+
(sum(W(a, b) * Q * G) * (V * D_PG) + sum(W(a, b) * P * V) * (Q + G * D_QG)))
|
|
270
|
+
|
|
271
|
+
D_WPU_a = D_Wa * P * U
|
|
272
|
+
D_WQV_a = D_Wa * Q * V
|
|
273
|
+
D_WQU_a = D_Wa * Q * U
|
|
274
|
+
D_WPV_a = D_Wa * P * V
|
|
275
|
+
D_WPG_a = D_Wa * P * G
|
|
276
|
+
D_WQG_a = D_Wa * Q * G
|
|
277
|
+
|
|
278
|
+
D_aa = a_Lo + \
|
|
279
|
+
a * (sum(D_WPU_a) * sum(W(a, b) * Q * V) + sum(D_WQV_a) * sum(W(a, b) * P * U) -
|
|
280
|
+
sum(D_WQU_a) * sum(W(a, b) * P * V) - sum(D_WPV_a) * sum(W(a, b) * Q * U)
|
|
281
|
+
) - (sum(D_WPG_a) * sum(W(a, b) * Q * V) + sum(D_WQV_a) * sum(W(a, b) * P * G) -
|
|
282
|
+
sum(D_WQG_a) * sum(W(a, b) * P * V) - sum(D_WPV_a) * sum(W(a, b) * Q * G))
|
|
283
|
+
|
|
284
|
+
D_bX = W(a, b) * D_UX * (b * (sum(W(a, b) * P * U) * (V * D_QU) + sum(W(a, b) * Q * V) * (P + U * D_PU) -
|
|
285
|
+
sum(W(a, b) * Q * U) * (V * D_PU) - sum(W(a, b) * P * V) * (Q + U * D_QU)) -
|
|
286
|
+
(sum(W(a, b) * Q * G) * (P + U * D_PU) + sum(W(a, b) * P * U) * G * D_QU) +
|
|
287
|
+
(sum(W(a, b) * P * G) * (Q + U * D_QU) + sum(W(a, b) * Q * U) * G * D_PU))
|
|
288
|
+
|
|
289
|
+
D_bY = W(a, b) * D_VY * (b * (sum(W(a, b) * P * U) * (Q + V * D_QV) + sum(W(a, b) * Q * V) * (U * D_PV) -
|
|
290
|
+
sum(W(a, b) * Q * U) * (P + V * D_PV) - sum(W(a, b) * P * V) * (U * D_QV)) -
|
|
291
|
+
(sum(W(a, b) * Q * G) * (U * D_PV) + sum(W(a, b) * P * U) * (G * D_QV)) +
|
|
292
|
+
(sum(W(a, b) * P * G) * (U * D_QV) + sum(W(a, b) * Q * U) * (G * D_PV)))
|
|
293
|
+
|
|
294
|
+
D_bZ = W(a, b) * D_GZ * (b * (sum(W(a, b) * P * U) * (V * D_QG) + sum(W(a, b) * Q * V) * (U * D_PG) -
|
|
295
|
+
sum(W(a, b) * Q * U) * (V * D_PG) - sum(W(a, b) * P * V) * (U * D_QG)) -
|
|
296
|
+
(sum(W(a, b) * Q * G) * (U * D_PG) + sum(W(a, b) * P * U) * (Q + G * D_QG)) +
|
|
297
|
+
(sum(W(a, b) * P * G) * (U * D_QG) + sum(W(a, b) * Q * U) * (P + G * D_PG)))
|
|
298
|
+
|
|
299
|
+
D_WPU_b = D_Wb * P * U
|
|
300
|
+
D_WQV_b = D_Wb * Q * V
|
|
301
|
+
D_WQU_b = D_Wb * Q * U
|
|
302
|
+
D_WPV_b = D_Wb * P * V
|
|
303
|
+
D_WPG_b = D_Wb * P * G
|
|
304
|
+
D_WQG_b = D_Wb * Q * G
|
|
305
|
+
|
|
306
|
+
D_bb = b_Lo + b * (
|
|
307
|
+
sum(D_WPU_b) * sum(W(a, b) * Q * V) + sum(D_WQV_b) * sum(W(a, b) * P * U) -
|
|
308
|
+
sum(D_WQU_b) * sum(W(a, b) * P * V) - sum(D_WPV_b) * sum(W(a, b) * Q * U)
|
|
309
|
+
) - (
|
|
310
|
+
sum(D_WQG_b) * sum(W(a, b) * P * U) + sum(D_WPU_b) * sum(W(a, b) * Q * G) -
|
|
311
|
+
sum(D_WPG_b) * sum(W(a, b) * Q * U) - sum(D_WQU_b) * sum(W(a, b) * P * G)
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
Va = sum(D_aX ** 2 * sx ** 2 + D_aY ** 2 * sy ** 2 + D_aZ ** 2 * sz ** 2 +
|
|
315
|
+
2 * r1 * sx * sy * D_aX * D_aY + 2 * r2 * sx * sz * D_aX * D_aZ + 2 * r3 * sy * sz * D_aY * D_aZ)
|
|
316
|
+
Vb = sum(D_bX ** 2 * sx ** 2 + D_bY ** 2 * sy ** 2 + D_bZ ** 2 * sz ** 2 +
|
|
317
|
+
2 * r1 * sx * sy * D_bX * D_bY + 2 * r2 * sx * sz * D_bX * D_bZ + 2 * r3 * sy * sz * D_bY * D_bZ)
|
|
318
|
+
|
|
319
|
+
D_cX = - 1 * a * W(a, b) / sum(W(a, b)) + (-1 * D_aX) * (2 * mP - 2 * mU + mX(a, b)) + (-1 * D_bX) * (
|
|
320
|
+
2 * mQ - 2 * mV + mY(a, b))
|
|
321
|
+
D_cY = - 1 * b * W(a, b) / sum(W(a, b)) + (-1 * D_aY) * (2 * mP - 2 * mU + mX(a, b)) + (-1 * D_bY) * (
|
|
322
|
+
2 * mQ - 2 * mV + mY(a, b))
|
|
323
|
+
D_cZ = W(a, b) / sum(W(a, b)) + (-1 * D_aZ) * (2 * mP - 2 * mU) + (-1 * D_bZ) * (2 * mQ - 2 * mV)
|
|
324
|
+
Vc = sum(D_cX ** 2 * sx ** 2 + D_cY ** 2 * sy ** 2 + D_cZ ** 2 * sz ** 2 +
|
|
325
|
+
2 * r1 * sx * sy * D_cX * D_cY + 2 * r2 * sx * sz * D_cX * D_cZ + 2 * r3 * sy * sz * D_cY * D_cZ)
|
|
326
|
+
|
|
327
|
+
sa = (Va / D_aa) ** .5
|
|
328
|
+
sb = (Vb / D_bb) ** .5
|
|
329
|
+
sc = Vc ** .5
|
|
330
|
+
|
|
331
|
+
mswd = S(a, b, c) / (n - 3)
|
|
332
|
+
if mswd > 1:
|
|
333
|
+
k = mswd ** .5 # k为误差放大系数
|
|
334
|
+
else:
|
|
335
|
+
k = 1
|
|
336
|
+
|
|
337
|
+
sa, sb, sc = sa * k, sb * k, sc * k
|
|
338
|
+
|
|
339
|
+
a = new_a
|
|
340
|
+
b = new_b
|
|
341
|
+
c = new_c(new_a, new_b)
|
|
342
|
+
|
|
343
|
+
# ar40ar36 = (a + b * ar38ar36) * -1 / c
|
|
344
|
+
# f = 1 / c
|
|
345
|
+
# print(f"new_a = {a}, new_b = {b}, new_c = {c}, S = {S(a, b, c)}, MSWD = {mswd}, Ar38/Ar36 = {ar38ar36}, Ar40/Ar36 = {ar40ar36}")
|
|
346
|
+
#
|
|
347
|
+
# print(f"Iteration info: a = {a:.4f} ± {sa:.4f} | {sa/a * 100:.2f}%, b = {b:.4f} ± {sb:.4f} | {sb/b * 100:.2f}%, c = {c:.4f} ± {sc:.4f} | {sc/c * 100:.2f}% "
|
|
348
|
+
# f"S = {S(a, b, c)}, Di = {Di}, MSWD = {mswd}")
|
|
349
|
+
|
|
350
|
+
Di = Di + 1
|
|
351
|
+
if Di >= iteration:
|
|
352
|
+
break
|
|
353
|
+
|
|
354
|
+
estimate_z = c + a * x + b * y
|
|
355
|
+
resid = (estimate_z - z) ** 2
|
|
356
|
+
reg = (estimate_z - np.mean(estimate_z)) ** 2
|
|
357
|
+
ssresid = sum(resid) # residual sum of squares / sum squared residual
|
|
358
|
+
ssreg = sum(reg) # regression sum of square
|
|
359
|
+
sstotal = ssreg + ssresid # total sum of squares
|
|
360
|
+
R = ssreg / sstotal if sstotal != 0 else np.inf # r2 = ssreg / sstotal
|
|
361
|
+
chi_square = mswd * (n - 3)
|
|
362
|
+
p_value = distributions.chi2.sf(chi_square, n - 3)
|
|
363
|
+
|
|
364
|
+
# relative error of S
|
|
365
|
+
err_s = lambda a, b, c: (1 / W(a, b)) ** .5 / abs(a * x + b * y + c - z)
|
|
366
|
+
avg_err_s = np.mean(err_s(a, b, c)) * 100
|
|
367
|
+
print(f"Average relative error of S = {avg_err_s}%")
|
|
368
|
+
|
|
369
|
+
# print(f"a = {a}, b = {b}, c = {c}, S = {S(a, b, c)}, Di = {Di}, MSWD = {mswd}, r2 = {R}")
|
|
370
|
+
|
|
371
|
+
return c, sc, a, sa, b, sb, S(a, b, c), mswd, R, abs(a - last_a), \
|
|
372
|
+
Di, k, chi_square, p_value, avg_err_s
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def wtd_3D_regression_df(data: pd.DataFrame, f: int = 1, convergence: float = 0.001,
|
|
376
|
+
iteration: int = 100) -> pd.DataFrame:
|
|
377
|
+
"""
|
|
378
|
+
:param data: isochron data
|
|
379
|
+
:param f: factor of error, should be 1 for 1 sigma, or 2 for 2 sigma, default = 1
|
|
380
|
+
:param convergence: convergence toleration in percentage, default = 0.001, means 0.001%
|
|
381
|
+
:param iteration number of iteration, default = 100
|
|
382
|
+
:return: data frame with keys [
|
|
383
|
+
'k', 'sk', 'm1', 'sm1',
|
|
384
|
+
'iter', 'conv', 'rs', 'MSWD', 'R2', 'Chisq', 'Pvalue', # 'rs' means relative error of the total sum
|
|
385
|
+
'mag', 'abs_conv', # 'mag' is error magnification factor, 'abs_conv' is absolute convergence at the last time
|
|
386
|
+
]
|
|
387
|
+
"""
|
|
388
|
+
# remove nan, inf from data
|
|
389
|
+
data: pd.DataFrame = data.replace([-np.inf], np.nan).dropna(axis=0)
|
|
390
|
+
data = data.transpose().values.tolist()
|
|
391
|
+
res_list = wtd_3D_regression(*data[:9], f=f, convergence=convergence, iteration=iteration)
|
|
392
|
+
|
|
393
|
+
columns = [
|
|
394
|
+
'k', 'sk', 'm1', 'sm1', 'm2', 'sm2',
|
|
395
|
+
'S', 'MSWD', 'R2', 'abs_conv', 'iter', 'mag',
|
|
396
|
+
'Chisq', 'Pvalue', 'rs', # 'rs' means relative error of the total sum
|
|
397
|
+
|
|
398
|
+
]
|
|
399
|
+
res = pd.DataFrame([list(res_list)], columns=columns)
|
|
400
|
+
return res
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def linest(a0: list, a1: list, *args):
|
|
404
|
+
"""
|
|
405
|
+
Parameters
|
|
406
|
+
----------
|
|
407
|
+
a0 : known_y's, y = b + m * x
|
|
408
|
+
a1 : known_x's
|
|
409
|
+
args : more known_x's
|
|
410
|
+
|
|
411
|
+
Returns
|
|
412
|
+
-------
|
|
413
|
+
intercept | standard error | relative error | R2 | MSWD | other params: list |
|
|
414
|
+
error of other params: list | equation | m_ssresid (y估计值的标准误差)
|
|
415
|
+
|
|
416
|
+
"""
|
|
417
|
+
# beta = (xTx)^-1 * xTy >>> xtx * beta = xty
|
|
418
|
+
# crate matrix of x and y, calculate the transpose of x
|
|
419
|
+
if not args:
|
|
420
|
+
x = np.concatenate(([[1]*len(a1)], [a1]), axis=0).transpose()
|
|
421
|
+
else:
|
|
422
|
+
x = np.concatenate(([[1]*len(a1)], [a1], args), axis=0).transpose()
|
|
423
|
+
n = x.shape[-1] # number of unknown x, constant is seen as x^0
|
|
424
|
+
m = x.shape[0] # number of data
|
|
425
|
+
y = np.array([a0]).transpose()
|
|
426
|
+
try:
|
|
427
|
+
inv_xtx = np.linalg.inv(np.matmul(x.transpose(), x))
|
|
428
|
+
except np.linalg.LinAlgError:
|
|
429
|
+
raise np.linalg.LinAlgError(f"The determinant of the given matrix must not be zero ")
|
|
430
|
+
beta = np.matmul(inv_xtx, np.matmul(x.transpose(), y))
|
|
431
|
+
|
|
432
|
+
# calculate Y values base on the fitted formula
|
|
433
|
+
estimate_y = np.matmul(x, beta)
|
|
434
|
+
resid = (estimate_y - y) ** 2
|
|
435
|
+
reg = (estimate_y - np.mean(estimate_y)) ** 2
|
|
436
|
+
ssresid = sum(resid)
|
|
437
|
+
ssreg = sum(reg)
|
|
438
|
+
sstotal = ssreg + ssresid
|
|
439
|
+
df = m - n
|
|
440
|
+
m_ssresid = ssresid / df
|
|
441
|
+
se_beta = (m_ssresid * np.diagonal(inv_xtx)) ** .5
|
|
442
|
+
beta = beta.transpose()[0]
|
|
443
|
+
rse_beta = se_beta / beta
|
|
444
|
+
r2 = ssreg / sstotal if sstotal != 0 else np.inf
|
|
445
|
+
|
|
446
|
+
def get_adjusted_y(*args):
|
|
447
|
+
args = [[1] * len(args[0]), *args]
|
|
448
|
+
return [sum([beta[i] * args[i][j] for i in range(len(beta))]) for j in range(len(args[0]))]
|
|
449
|
+
|
|
450
|
+
return beta[0], se_beta[0], se_beta[0], r2, 'mswd', beta[1:], se_beta[1:], get_adjusted_y, m_ssresid
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def average(a0: list, a1=None):
|
|
454
|
+
"""
|
|
455
|
+
:param a0: known_y's
|
|
456
|
+
:param a1:
|
|
457
|
+
:return: intercept | standard error | relative error | r2 | MSWD | other params | errors of other params |
|
|
458
|
+
euqation | m_ssresid
|
|
459
|
+
"""
|
|
460
|
+
if a1 is None:
|
|
461
|
+
a1 = []
|
|
462
|
+
k0 = sum(a0) / len(a0)
|
|
463
|
+
|
|
464
|
+
# calculate Y values base on the fitted formula
|
|
465
|
+
estimate_y = [k0 for x in a0]
|
|
466
|
+
resid = [(x - k0) ** 2 for x in a0]
|
|
467
|
+
reg = [(i - sum(estimate_y) / len(estimate_y)) ** 2 for i in estimate_y]
|
|
468
|
+
ssresid = sum(resid) # residual sum of squares / sum squared residual
|
|
469
|
+
ssreg = sum(reg) # regression sum of square
|
|
470
|
+
sstotal = ssreg + ssresid # total sum of squares
|
|
471
|
+
df = len(a0) - 1 # df = degree of freedom
|
|
472
|
+
m_ssresid = ssresid / df
|
|
473
|
+
r2 = ssreg / sstotal if sstotal != 0 else 1 # r2 = ssreg / sstotal
|
|
474
|
+
|
|
475
|
+
k1 = pow(sum([(i - k0) ** 2 for i in a0]) / df, 0.5) # standard deviation
|
|
476
|
+
k2 = k1 / k0 * 100 if k0 != 0 else 0 # relative standard error
|
|
477
|
+
k3 = r2 # determination coefficient
|
|
478
|
+
k4 = 'MSWD'
|
|
479
|
+
k5 = []
|
|
480
|
+
k6 = []
|
|
481
|
+
k8 = m_ssresid
|
|
482
|
+
|
|
483
|
+
def get_adjusted_y(x: list):
|
|
484
|
+
return [k0] * len(x)
|
|
485
|
+
|
|
486
|
+
k7 = get_adjusted_y
|
|
487
|
+
|
|
488
|
+
return k0, k1, k2, k3, k4, k5, k6, k7, k8
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
#
|
|
492
|
+
# def wtd_linest(a0: list, a1: list):
|
|
493
|
+
# """
|
|
494
|
+
# y = m * x + b,
|
|
495
|
+
# :param a0: known_y's
|
|
496
|
+
# :param a1: known_x's
|
|
497
|
+
# :return: intercept | standard error | relative error | R2 | [m] | [sem]
|
|
498
|
+
# """
|
|
499
|
+
# linest_res = linest(a0, a1)
|
|
500
|
+
# b0, seb0, rseb0, r2, mswd, [m0], [rem0] = linest_res[0:7]
|
|
501
|
+
# y0 = list(map(lambda i: m0 * i + b0, a1))
|
|
502
|
+
# resid = list(map(lambda i, j: i - j, y0, a0))
|
|
503
|
+
# weight = list(map(lambda i: 1 / i ** 2, resid)) # Use weighting by inverse of the squares of residual
|
|
504
|
+
#
|
|
505
|
+
# sum_wi = sum(weight)
|
|
506
|
+
# sum_wiyi = sum(list(map(lambda i, j: i * j, weight, a0)))
|
|
507
|
+
# sum_wixi = sum(list(map(lambda i, j: i * j, weight, a1)))
|
|
508
|
+
# sum_wiyixi = sum(list(map(lambda i, j, g: i * j * g, weight, a0, a1)))
|
|
509
|
+
# sum_wixixi = sum(list(map(lambda i, j, g: i * j * g, weight, a1, a1)))
|
|
510
|
+
#
|
|
511
|
+
# m = (sum_wiyixi - sum_wixi * sum_wiyi / sum_wi) / (sum_wixixi - sum_wixi * sum_wixi / sum_wi)
|
|
512
|
+
# b = (sum_wiyi - m * sum_wixi) / sum_wi
|
|
513
|
+
# a0 = list(map(lambda i, j: i * j, weight, a0))
|
|
514
|
+
# a1 = list(map(lambda i, j: i * j, weight, a1))
|
|
515
|
+
# linest_res = intercept_linest(a0, a1, weight=weight)
|
|
516
|
+
# b, seb, rseb, r2, mswd, [m], [sem] = linest_res[0:7]
|
|
517
|
+
# return b, seb, rseb, r2, [m], [sem]
|
|
518
|
+
#
|
|
519
|
+
#
|
|
520
|
+
# def intercept_linest(a0: list, a1: list, *args, weight: list = None, interceptIsZero: bool = False):
|
|
521
|
+
# """
|
|
522
|
+
# :param a0: known_y's, y = b + m * x
|
|
523
|
+
# :param a1: known_x's
|
|
524
|
+
# :param args: more known_x's
|
|
525
|
+
# :param weight: necessary when weighted least squares fitting
|
|
526
|
+
# :param interceptIsZero: set b as zero, y = m * x
|
|
527
|
+
# :return: intercept | standard error | relative error | R2 | MSWD | other params: list |
|
|
528
|
+
# error of other params: list | equation | m_ssresid (y估计值的标准误差)
|
|
529
|
+
# """
|
|
530
|
+
# if interceptIsZero:
|
|
531
|
+
# if len(a0) != len(a1) or len(args) > 0:
|
|
532
|
+
# return False
|
|
533
|
+
# try:
|
|
534
|
+
# df = len(a0) - 1
|
|
535
|
+
# m = sum(list(map(lambda x, y: x * y, a1, a0))) / sum(list(map(lambda x: x ** 2, a1)))
|
|
536
|
+
# SSresid = sum(list(map(lambda x, y: y - x * m, a1, a0)))
|
|
537
|
+
# sey = pow(SSresid / df, 0.5)
|
|
538
|
+
# SSreg = sum(list(map(lambda x: (x * m) ** 2, a1)))
|
|
539
|
+
# SStotal = SSreg + SSresid
|
|
540
|
+
# R2 = SStotal / SSreg
|
|
541
|
+
# sem = pow(SSresid / df * 1 / sum(list(map(lambda x: x ** 2, a1))), 0.5)
|
|
542
|
+
# return m, sem, R2
|
|
543
|
+
# except Exception:
|
|
544
|
+
# return False
|
|
545
|
+
# # beta = (xTx)^-1 * xTy >>> xtx * beta = xty
|
|
546
|
+
# # crate matrix of x and y, calculate the transpose of x
|
|
547
|
+
# m = len(a1) # number of data
|
|
548
|
+
# n = len(args) + 2 # number of unknown x, constant is seen as x^0
|
|
549
|
+
# if m - n < 1 or len(a0) != len(a1):
|
|
550
|
+
# return False
|
|
551
|
+
# if weight is not None:
|
|
552
|
+
# xlst = [weight, a1, *args]
|
|
553
|
+
# else:
|
|
554
|
+
# xlst = [[1] * m, a1, *args]
|
|
555
|
+
# ylst = a0
|
|
556
|
+
# xtx = list()
|
|
557
|
+
# xty = list()
|
|
558
|
+
# for i in range(n):
|
|
559
|
+
# xtx.append([])
|
|
560
|
+
# xty.append([])
|
|
561
|
+
# xty[i] = sum([xlst[i][k] * ylst[k] for k in range(m)])
|
|
562
|
+
# for j in range(n):
|
|
563
|
+
# xtx[i].append([])
|
|
564
|
+
# xtx[i][j] = sum([xlst[i][k] * xlst[j][k] for k in range(m)])
|
|
565
|
+
# # solve the system of linear equations using LU factorization algorithm
|
|
566
|
+
# # LU * beta = xty, U * beta = b, L * b = xty
|
|
567
|
+
# l: List[List[Any]] = list()
|
|
568
|
+
# u: List[List[Any]] = list()
|
|
569
|
+
# b: List[Any] = list()
|
|
570
|
+
# beta: List[Any] = list()
|
|
571
|
+
# for i in range(n):
|
|
572
|
+
# l.append([])
|
|
573
|
+
# u.append([])
|
|
574
|
+
# b.append([])
|
|
575
|
+
# beta.append([])
|
|
576
|
+
# for j in range(n):
|
|
577
|
+
# l[i].append([])
|
|
578
|
+
# u[i].append([])
|
|
579
|
+
# if j > i:
|
|
580
|
+
# l[i][j] = 0
|
|
581
|
+
# elif i > j:
|
|
582
|
+
# u[i][j] = 0
|
|
583
|
+
# else:
|
|
584
|
+
# l[i][j] = 1
|
|
585
|
+
# for i in range(n):
|
|
586
|
+
# if i >= 1:
|
|
587
|
+
# l[i][0] = xtx[i][0] / u[0][0]
|
|
588
|
+
# for j in range(n):
|
|
589
|
+
# if i == 0:
|
|
590
|
+
# u[i][j] = xtx[i][j]
|
|
591
|
+
# elif i == 1 and j >= 1:
|
|
592
|
+
# u[i][j] = xtx[i][j] - l[i][0] * u[0][j]
|
|
593
|
+
# elif i < n - 1:
|
|
594
|
+
# if j in range(1, i):
|
|
595
|
+
# l[i][j] = (xtx[i][j] - sum([l[i][r] * u[r][j] for r in range(j)])) / u[j][j]
|
|
596
|
+
# if j in range(i, n):
|
|
597
|
+
# u[i][j] = xtx[i][j] - sum([l[i][r] * u[r][j] for r in range(i)])
|
|
598
|
+
# elif i == n - 1:
|
|
599
|
+
# if j in range(1, i):
|
|
600
|
+
# l[n - 1][j] = (xtx[n - 1][j] - sum([l[n - 1][r] * u[r][j] for r in range(j)])) / u[j][j]
|
|
601
|
+
# if j == n - 1:
|
|
602
|
+
# u[i][j] = xtx[i][j] - sum([l[i][r] * u[r][j] for r in range(i)])
|
|
603
|
+
# # calculate matrix b, L * b = y
|
|
604
|
+
# b[0] = xty[0]
|
|
605
|
+
# for i in range(1, n):
|
|
606
|
+
# b[i] = xty[i] - sum([l[i][j] * b[j] for j in range(i)])
|
|
607
|
+
# # calculate matrix beta, b = U * beta
|
|
608
|
+
# beta[n - 1] = b[n - 1] / u[n - 1][n - 1]
|
|
609
|
+
# for i in [n - k for k in range(2, n + 1)]:
|
|
610
|
+
# beta[i] = (b[i] - sum([u[i][j] * beta[j] for j in range(i + 1, n)])) / u[i][i]
|
|
611
|
+
#
|
|
612
|
+
# # calculate the inverse of matrix xTx
|
|
613
|
+
# inv_l: List[List[Any]] = list()
|
|
614
|
+
# inv_u: List[List[Any]] = list()
|
|
615
|
+
# for i in range(n):
|
|
616
|
+
# inv_l.append([])
|
|
617
|
+
# inv_u.append([])
|
|
618
|
+
# for j in range(n):
|
|
619
|
+
# inv_l[i].append([])
|
|
620
|
+
# inv_u[i].append([])
|
|
621
|
+
# if i == j:
|
|
622
|
+
# inv_l[i][j] = 1 / l[i][j]
|
|
623
|
+
# inv_u[i][j] = 1 / u[i][j]
|
|
624
|
+
# elif i > j:
|
|
625
|
+
# inv_u[i][j] = 0
|
|
626
|
+
# elif j > i:
|
|
627
|
+
# inv_l[i][j] = 0
|
|
628
|
+
#
|
|
629
|
+
# for j in range(1, n):
|
|
630
|
+
# for i in range(n - 1):
|
|
631
|
+
# if i + j > n - 1:
|
|
632
|
+
# break
|
|
633
|
+
# else:
|
|
634
|
+
# inv_u[i][i + j] = -1 * sum([u[i][k] * inv_u[k][i + j] for k in range(i + 1, i + j + 1)]) / u[i][i]
|
|
635
|
+
# if i + j > n - 1:
|
|
636
|
+
# break
|
|
637
|
+
# else:
|
|
638
|
+
# inv_l[i + j][i] = -1 * sum([l[i + j][k] * inv_l[k][i] for k in range(i, i + j)]) / l[i + j][i + j]
|
|
639
|
+
#
|
|
640
|
+
# # inv_xTx = inv_u * inv_l
|
|
641
|
+
# inv_xtx: List[List[Any]] = list()
|
|
642
|
+
# for i in range(n):
|
|
643
|
+
# inv_xtx.append([])
|
|
644
|
+
# for j in range(n):
|
|
645
|
+
# inv_xtx[i].append([])
|
|
646
|
+
# inv_xtx[i][j] = sum([inv_u[i][k] * inv_l[k][j] for k in range(n)])
|
|
647
|
+
# # pow(inv_xtx[0][0], 0.5) is the errF in Excel Linest function
|
|
648
|
+
#
|
|
649
|
+
# # calculate Y values base on the fitted formula
|
|
650
|
+
# estimate_y = [sum([xlst[j][i] * beta[j] for j in range(n)]) for i in range(m)]
|
|
651
|
+
# resid = [(estimate_y[i] - a0[i]) ** 2 for i in range(m)]
|
|
652
|
+
# reg = [(i - sum(estimate_y) / len(estimate_y)) ** 2 for i in estimate_y]
|
|
653
|
+
# ssresid = sum(resid) # residual sum of squares / sum squared residual
|
|
654
|
+
# ssreg = sum(reg) # regression sum of square
|
|
655
|
+
# sstotal = ssreg + ssresid # total sum of squares
|
|
656
|
+
# df = m - n + 1 - 1 # df = degree of freedom
|
|
657
|
+
# m_ssresid = ssresid / df
|
|
658
|
+
# se_beta = [pow(m_ssresid * inv_xtx[i][i], 0.5) for i in range(n)]
|
|
659
|
+
# rseb = (se_beta[0] / beta[0]) * 100 if beta[0] != 0 else se_beta[0] # relative error of intercept
|
|
660
|
+
# r2 = ssreg / sstotal if sstotal != 0 else 1 # r2 = ssreg / sstotal
|
|
661
|
+
#
|
|
662
|
+
# def get_adjusted_y(*args):
|
|
663
|
+
# args = [[1] * len(args[0]), *args]
|
|
664
|
+
# return [sum([beta[i] * args[i][j] for i in range(len(beta))]) for j in range(len(args[0]))]
|
|
665
|
+
#
|
|
666
|
+
# return beta[0], se_beta[0], rseb, r2, 'mswd', beta[1:], se_beta[1:], get_adjusted_y, m_ssresid
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def quadratic(a0: list, a1: list):
|
|
670
|
+
""" y = b + m1 * x + m2 * x ^ 2
|
|
671
|
+
:param a0: known_y's, y = b + m1 * x + m2 * x ^ 2
|
|
672
|
+
:param a1: known_x's
|
|
673
|
+
:return: intercept | standard error | relative error | r2 | MSWD | [m1, m2] | [sem1, sem2], equation
|
|
674
|
+
"""
|
|
675
|
+
# y = b + m1 * x + m2 * x ^ 2
|
|
676
|
+
k = list(linest(a0, a1, [i ** 2 for i in a1]))
|
|
677
|
+
b, seb, rseb, r2, mswd, [m1, m2], [sem1, sem2] = k[0:7]
|
|
678
|
+
|
|
679
|
+
def get_adjusted_y(x: list):
|
|
680
|
+
return [b + m1 * _x + m2 * _x ** 2 for _x in x]
|
|
681
|
+
|
|
682
|
+
k[7] = get_adjusted_y
|
|
683
|
+
|
|
684
|
+
return k
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
def polynomial(a0: list, a1: list, degree: int = 5):
|
|
688
|
+
""" y = b + m1 * x + m2 * x ^ 2 + ... + m[n] * x ^ n
|
|
689
|
+
:param a0: known_y's, y = b + m1 * x + m2 * x ^ 2
|
|
690
|
+
:param a1: known_x's
|
|
691
|
+
:param degree: the order of the fitting, default = 5
|
|
692
|
+
:return: intercept | standard error | relative error | r2 | MSWD | [m1, m2] | [sem1, sem2], equation
|
|
693
|
+
"""
|
|
694
|
+
# y = b + m1 * x + m2 * x ^ 2 + ... + m[n] * x ^ n
|
|
695
|
+
k = list(linest(a0, *[[j ** (i + 1) for j in a1] for i in range(degree)]))
|
|
696
|
+
b, seb, rseb, r2, mswd, m, sem = k[0:7]
|
|
697
|
+
|
|
698
|
+
def get_adjusted_y(x: list):
|
|
699
|
+
return [b + sum([m[i] * _x ** (i + 1) for i in range(degree)]) for _x in x]
|
|
700
|
+
|
|
701
|
+
k[7] = get_adjusted_y
|
|
702
|
+
|
|
703
|
+
return k
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
def logest(a0: list, a1: list):
|
|
707
|
+
"""
|
|
708
|
+
:param a0: known_y's, y = b * m ^ x
|
|
709
|
+
:param a1: known_x's
|
|
710
|
+
:return: intercept | standard error | relative error | R2 | MSWD | m | sem
|
|
711
|
+
"""
|
|
712
|
+
# y = b * m ^ x, Microsoft Excel LOGEST function, ln(y) = ln(b) + ln(m) * x
|
|
713
|
+
a0 = [np.log(i) for i in a0] # ln(y)
|
|
714
|
+
linest_res = linest(a0, a1)
|
|
715
|
+
b, seb, rseb, r2, mswd, [lnm], [selnm] = linest_res[0:7]
|
|
716
|
+
b = np.exp(b)
|
|
717
|
+
m = np.exp(lnm)
|
|
718
|
+
sem = np.exp(lnm) * selnm
|
|
719
|
+
seb = b * seb # Excel.Logest function do not consider the error propagation
|
|
720
|
+
rseb = seb / b * 100
|
|
721
|
+
return b, seb, rseb, r2, mswd, m, sem
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def power(a0: list, a1: list):
|
|
725
|
+
"""
|
|
726
|
+
:param a0: known_y's, y = a * x ^ b + c
|
|
727
|
+
:param a1: known_x's
|
|
728
|
+
:return: intercept | standard error of intercept | relative error | R2 | MSWD | [a, b, c] | [sem, sec, seb]
|
|
729
|
+
"""
|
|
730
|
+
|
|
731
|
+
def _pow_func(x, a, b, c):
|
|
732
|
+
return a * x ** b + c
|
|
733
|
+
|
|
734
|
+
def _solve_pow(params):
|
|
735
|
+
a, b, c = params
|
|
736
|
+
x, y = [0, 0, 0], [0, 0, 0]
|
|
737
|
+
x[0] = sum(a1[:3]) / 3
|
|
738
|
+
y[0] = sum(a0[:3]) / 3
|
|
739
|
+
x[1] = sum(a1) / len(a1)
|
|
740
|
+
y[1] = sum(a0) / len(a0)
|
|
741
|
+
x[2] = sum(a1[-3:]) / 3
|
|
742
|
+
y[2] = sum(a0[-3:]) / 3
|
|
743
|
+
return np.array([
|
|
744
|
+
_pow_func(x[0], a, b, c) - y[0],
|
|
745
|
+
_pow_func(x[1], a, b, c) - y[1],
|
|
746
|
+
_pow_func(x[2], a, b, c) - y[2],
|
|
747
|
+
])
|
|
748
|
+
|
|
749
|
+
def _get_sum(a, b, c):
|
|
750
|
+
y_predicted = [_pow_func(_x, a, b, c) for _x in a1]
|
|
751
|
+
return sum([(y_predicted[i] - a0[i]) ** 2 for i in range(len(a0))])
|
|
752
|
+
|
|
753
|
+
def _get_abc(b): # Return a, b, c given b based on linest regression
|
|
754
|
+
f = linest(a0, [_x ** b for _x in a1])
|
|
755
|
+
return f[5][0], b, f[0]
|
|
756
|
+
|
|
757
|
+
try:
|
|
758
|
+
a, b, c = fsolve(func=_solve_pow, x0=np.array([1, 1, 1])) # initial estimate
|
|
759
|
+
count = 0
|
|
760
|
+
step = 0.01
|
|
761
|
+
while count < 100:
|
|
762
|
+
a, b, c = _get_abc(b)
|
|
763
|
+
s = _get_sum(a, b, c)
|
|
764
|
+
b_left, b_right = b - step * b, b + step * b
|
|
765
|
+
s_left = _get_sum(*_get_abc(b_left))
|
|
766
|
+
s_right = _get_sum(*_get_abc(b_right))
|
|
767
|
+
if s_left > s > s_right:
|
|
768
|
+
b = b_right
|
|
769
|
+
continue
|
|
770
|
+
elif s_left < s < s_right:
|
|
771
|
+
b = b_left
|
|
772
|
+
continue
|
|
773
|
+
elif s_left < s_right:
|
|
774
|
+
b = (b + b_left) / 2
|
|
775
|
+
else:
|
|
776
|
+
b = (b + b_right) / 2
|
|
777
|
+
step = step * 0.5
|
|
778
|
+
count += 1
|
|
779
|
+
if step < 0.000001:
|
|
780
|
+
break
|
|
781
|
+
except RuntimeError:
|
|
782
|
+
raise RuntimeError
|
|
783
|
+
except np.linalg.LinAlgError:
|
|
784
|
+
raise np.linalg.LinAlgError
|
|
785
|
+
except TypeError or IndexError:
|
|
786
|
+
raise IndexError
|
|
787
|
+
|
|
788
|
+
f = linest(a0, [_x ** b for _x in a1])
|
|
789
|
+
a, sea, c, sec = f[5][0], f[6][0], f[0], f[1]
|
|
790
|
+
|
|
791
|
+
calculated_y = [_pow_func(i, a, b, c) for i in a1]
|
|
792
|
+
resid = [(calculated_y[i] - a0[i]) ** 2 for i in range(len(a0))]
|
|
793
|
+
reg = [(i - sum(calculated_y) / len(calculated_y)) ** 2 for i in calculated_y]
|
|
794
|
+
ssresid = sum(resid)
|
|
795
|
+
ssreg = sum(reg)
|
|
796
|
+
sstotal = ssreg + ssresid
|
|
797
|
+
df = len(a0) - 1
|
|
798
|
+
m_ssresid = ssresid / df
|
|
799
|
+
r2 = ssreg / sstotal if sstotal != 0 else 1
|
|
800
|
+
|
|
801
|
+
intercept = c
|
|
802
|
+
se_intercept_1 = sec
|
|
803
|
+
dp = len(a1) # data points
|
|
804
|
+
z = [i ** b for i in a1]
|
|
805
|
+
# calculate error of intercept
|
|
806
|
+
errfz = pow(sum([i ** 2 for i in z]) / (dp * sum([i ** 2 for i in z]) - sum(z) ** 2), 0.5)
|
|
807
|
+
errfx = pow(sum([i ** 2 for i in a1]) / (dp * sum([i ** 2 for i in a1]) - sum(a1) ** 2), 0.5)
|
|
808
|
+
# seb = errfz * sey = errfz * ssresid / df -> se_intercept = sey * errfx = seb / errfz * errfx
|
|
809
|
+
se_intercept = sec / errfz * errfx
|
|
810
|
+
rse_intercept = se_intercept / intercept * 100
|
|
811
|
+
|
|
812
|
+
return intercept, se_intercept, rse_intercept, r2, 'mswd', [a, b, c], 'se', \
|
|
813
|
+
lambda x: [_pow_func(i, a, b, c) for i in x], m_ssresid
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
def exponential(a0: list, a1: list):
|
|
817
|
+
"""
|
|
818
|
+
:param a0: known_y's, y = a * b ^ x + c
|
|
819
|
+
:param a1: known_x's
|
|
820
|
+
:return: intercept | standard error of intercept | relative error | R2 | MSWD | [m, c, b] | [sem, sec, seb]
|
|
821
|
+
"""
|
|
822
|
+
|
|
823
|
+
def _exp_func(x, a, b, c):
|
|
824
|
+
return a * b ** x + c
|
|
825
|
+
|
|
826
|
+
def _solve_exp(params):
|
|
827
|
+
a, b, c = params
|
|
828
|
+
x, y = [0, 0, 0], [0, 0, 0]
|
|
829
|
+
x[0] = sum(a1[:3]) / 3
|
|
830
|
+
y[0] = sum(a0[:3]) / 3
|
|
831
|
+
x[1] = sum(a1) / len(a1)
|
|
832
|
+
y[1] = sum(a0) / len(a0)
|
|
833
|
+
x[2] = sum(a1[-3:]) / 3
|
|
834
|
+
y[2] = sum(a0[-3:]) / 3
|
|
835
|
+
return np.array([
|
|
836
|
+
_exp_func(x[0], a, b, c) - y[0],
|
|
837
|
+
_exp_func(x[1], a, b, c) - y[1],
|
|
838
|
+
_exp_func(x[2], a, b, c) - y[2],
|
|
839
|
+
])
|
|
840
|
+
|
|
841
|
+
def _get_sum(a, b, c):
|
|
842
|
+
y_predicted = [_exp_func(_x, a, b, c) for _x in a1]
|
|
843
|
+
return sum([(y_predicted[i] - a0[i]) ** 2 for i in range(len(a0))])
|
|
844
|
+
|
|
845
|
+
def _get_ac(b):
|
|
846
|
+
f = linest(a0, [b ** _x for _x in a1])
|
|
847
|
+
return f[5][0], b, f[0]
|
|
848
|
+
|
|
849
|
+
try:
|
|
850
|
+
a, b, c = fsolve(_solve_exp, np.array([1, 1, 1]))
|
|
851
|
+
count = 0
|
|
852
|
+
step = 0.01
|
|
853
|
+
while count < 100:
|
|
854
|
+
a, b, c = _get_ac(b)
|
|
855
|
+
s = _get_sum(a, b, c)
|
|
856
|
+
b_left, b_right = b - step * b, b + step * b
|
|
857
|
+
s_left = _get_sum(*_get_ac(b_left))
|
|
858
|
+
s_right = _get_sum(*_get_ac(b_right))
|
|
859
|
+
if s_left > s > s_right:
|
|
860
|
+
b = b_right
|
|
861
|
+
continue
|
|
862
|
+
elif s_left < s < s_right:
|
|
863
|
+
b = b_left
|
|
864
|
+
continue
|
|
865
|
+
elif s_left < s_right:
|
|
866
|
+
b = (b + b_left) / 2
|
|
867
|
+
else:
|
|
868
|
+
b = (b + b_right) / 2
|
|
869
|
+
count += 1
|
|
870
|
+
step = step * 0.5
|
|
871
|
+
if step < 0.000001:
|
|
872
|
+
break
|
|
873
|
+
|
|
874
|
+
except RuntimeError:
|
|
875
|
+
raise RuntimeError
|
|
876
|
+
except np.linalg.LinAlgError:
|
|
877
|
+
raise np.linalg.LinAlgError
|
|
878
|
+
except TypeError or IndexError:
|
|
879
|
+
raise IndexError
|
|
880
|
+
|
|
881
|
+
f = linest(a0, [b ** _x for _x in a1])
|
|
882
|
+
a, sea, c, sec = f[5][0], f[6][0], f[0], f[1]
|
|
883
|
+
|
|
884
|
+
calculated_y = [_exp_func(i, a, b, c) for i in a1]
|
|
885
|
+
resid = [(calculated_y[i] - a0[i]) ** 2 for i in range(len(a0))]
|
|
886
|
+
reg = [(i - sum(calculated_y) / len(calculated_y)) ** 2 for i in calculated_y]
|
|
887
|
+
ssresid = sum(resid)
|
|
888
|
+
ssreg = sum(reg)
|
|
889
|
+
sstotal = ssreg + ssresid
|
|
890
|
+
dp = len(a1)
|
|
891
|
+
df = dp - 1
|
|
892
|
+
m_ssresid = ssresid / df
|
|
893
|
+
r2 = ssreg / sstotal if sstotal != 0 else 1
|
|
894
|
+
|
|
895
|
+
z = [b ** i for i in a1]
|
|
896
|
+
intercept = a + c
|
|
897
|
+
se_intercept_1 = np.sqrt(sea ** 2 + sec ** 2)
|
|
898
|
+
# calculate error of intercept
|
|
899
|
+
errfz = pow(sum([i ** 2 for i in z]) / (dp * sum([i ** 2 for i in z]) - sum(z) ** 2), 0.5)
|
|
900
|
+
errfx = pow(sum([i ** 2 for i in a1]) / (dp * sum([i ** 2 for i in a1]) - sum(a1) ** 2), 0.5)
|
|
901
|
+
# seb = errfz * sey = errfz * ssresid / df -> se_intercept = sey * errfx = seb / errfz * errfx
|
|
902
|
+
se_intercept = sec / errfz * errfx
|
|
903
|
+
rse_intercept = se_intercept / intercept * 100
|
|
904
|
+
|
|
905
|
+
if abs(intercept) > 10 * max(a0):
|
|
906
|
+
raise ValueError
|
|
907
|
+
if intercept < 0:
|
|
908
|
+
raise ValueError
|
|
909
|
+
|
|
910
|
+
return intercept, se_intercept, rse_intercept, r2, 'mswd', [a, b, c], 'se', \
|
|
911
|
+
lambda x: [_exp_func(i, a, b, c) for i in x], m_ssresid
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
""" line functions """
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
def linear_eq(x: list, beta: list):
|
|
918
|
+
""" y = b0 * x^0 + b1 * x^1 + ... + bn * x^n
|
|
919
|
+
Parameters
|
|
920
|
+
----------
|
|
921
|
+
beta : coefficients
|
|
922
|
+
x :
|
|
923
|
+
|
|
924
|
+
Returns
|
|
925
|
+
-------
|
|
926
|
+
|
|
927
|
+
"""
|
|
928
|
+
return [np.prod([beta, [_x ** i for i in range(len(beta))]], axis=0).sum() for _x in x]
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
def exponential_eq(x: list, beta: list):
|
|
932
|
+
""" y = a * b ^ x + c
|
|
933
|
+
Parameters
|
|
934
|
+
----------
|
|
935
|
+
beta : coefficients, [a, b, c]
|
|
936
|
+
x :
|
|
937
|
+
|
|
938
|
+
Returns
|
|
939
|
+
-------
|
|
940
|
+
|
|
941
|
+
"""
|
|
942
|
+
return [beta[0] * beta[1] ** _x + beta[0] for _x in x]
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
def power_eq(x: list, beta: list):
|
|
946
|
+
""" y = y = a * x ^ b + c
|
|
947
|
+
Parameters
|
|
948
|
+
----------
|
|
949
|
+
beta : coefficients, [a, b, c]
|
|
950
|
+
x :
|
|
951
|
+
|
|
952
|
+
Returns
|
|
953
|
+
-------
|
|
954
|
+
|
|
955
|
+
"""
|
|
956
|
+
return [beta[0] * _x ** beta[1] + beta[0] for _x in x]
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
if __name__ == '__main__':
|
|
960
|
+
pass
|
|
961
|
+
|