linregmc 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of linregmc might be problematic. Click here for more details.
- linregmc/.bashlog +15 -0
- linregmc/__init__.py +1 -0
- linregmc/linregmc.py +253 -0
- linregmc-0.0.1.dist-info/METADATA +21 -0
- linregmc-0.0.1.dist-info/RECORD +6 -0
- linregmc-0.0.1.dist-info/WHEEL +4 -0
linregmc/.bashlog
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
557 cd example_package_per/
|
|
2
|
+
558 echo -n >__init__.py
|
|
3
|
+
559 vi example.py
|
|
4
|
+
539 cd linregmc/
|
|
5
|
+
540 ls
|
|
6
|
+
541 ls -lt
|
|
7
|
+
542 rm example.py
|
|
8
|
+
543 vi __init__.py
|
|
9
|
+
544 ls
|
|
10
|
+
545 rm linregmc.py
|
|
11
|
+
546 cp ../../../linregmc.py .
|
|
12
|
+
547 ls
|
|
13
|
+
584 cd linregmc/
|
|
14
|
+
585 ls
|
|
15
|
+
586 vi __init__.py
|
linregmc/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .linregmc import *
|
linregmc/linregmc.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from numpy.matlib import repmat,randn
|
|
3
|
+
|
|
4
|
+
def addnoise(yinp,ysiginp,nmc=10000,distrib='normal'):
|
|
5
|
+
|
|
6
|
+
# function ymc = createmcdata(y,ysig,nmc,distrib)
|
|
7
|
+
#
|
|
8
|
+
# Creates a matrix ymc of nmc vectors with the mean values of y but with
|
|
9
|
+
# added random noise of standard deviation ysig.
|
|
10
|
+
#
|
|
11
|
+
# y data vector
|
|
12
|
+
# ysig standard deviation vector (same length as y)
|
|
13
|
+
# nmc number of Monte Carlo copies
|
|
14
|
+
# distrib 'norm'/'normal' gives normal distribution
|
|
15
|
+
# 'lognorm'/'lognormal' give lognormal distribution (useful for example
|
|
16
|
+
# if negative results are unphysical)
|
|
17
|
+
#
|
|
18
|
+
#
|
|
19
|
+
# You might want to initialize the random number generator in forehand.
|
|
20
|
+
#
|
|
21
|
+
|
|
22
|
+
yinp = np.asarray(yinp)
|
|
23
|
+
ysiginp = np.asarray(ysiginp)
|
|
24
|
+
if np.ndim(yinp)>1 or np.ndim(ysiginp)>1:
|
|
25
|
+
raise Exception('y and ysig must not have higher dimension than 1.')
|
|
26
|
+
if np.size(ysiginp) == 1:
|
|
27
|
+
ysiginp = ysiginp*np.ones(np.size(yinp)) #If ysiginp is a scalar, turn it into a vector with identical elements
|
|
28
|
+
if np.size(yinp) != np.size(ysiginp):
|
|
29
|
+
raise Exception('y and ysig must have the same length.')
|
|
30
|
+
|
|
31
|
+
n=np.size(yinp)
|
|
32
|
+
y=yinp.reshape((1,n))
|
|
33
|
+
ysig=ysiginp.reshape((1,n))
|
|
34
|
+
if distrib.lower() in ('norm' ,'normal'):
|
|
35
|
+
|
|
36
|
+
return np.array(repmat(y,nmc,1)) + np.array(repmat(ysig,nmc,1))*np.array(randn(nmc,n))
|
|
37
|
+
elif distrib.lower() in ('lognorm','lognormal'):
|
|
38
|
+
mu = np.log(y**2/np.sqrt(ysig**2+y**2)) # mu of lognormal dist
|
|
39
|
+
sigma = np.sqrt(np.log(ysig**2/y**2+1)) # sigma of lognormal dist
|
|
40
|
+
return np.exp(np.array(randn(nmc,n))*np.array(repmat(sigma,nmc,1)) + np.array(repmat(mu,nmc,1)))
|
|
41
|
+
else:
|
|
42
|
+
raise Exception('Distribution named "' + distrib + '" is not recognized.')
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def linreg(xinp, yinp, plot = False):
|
|
46
|
+
#This is the new (2024) weighted-fit version (similar to MATLAB linregmc) that only handles linear fits
|
|
47
|
+
#and does NOT do confidence intervals, as these can be done by mcerrconf
|
|
48
|
+
|
|
49
|
+
#Performs linear fitting ax+b=y with error analysis
|
|
50
|
+
#using a Monte Carlo approach.
|
|
51
|
+
|
|
52
|
+
#Input arguments:
|
|
53
|
+
# x : a NX x N matrix: the NX data sets of x values (N data points)
|
|
54
|
+
# y : a NY x N matrix: the NY data sets of y values (N data points)
|
|
55
|
+
# NX and NY need not be the same. In particular one may use a
|
|
56
|
+
# single data set (without added noise) for one of them.
|
|
57
|
+
# The number of fits equals max(NX,NY) and if there are less data
|
|
58
|
+
# sets for one of x or y, they are just cyclically reused.
|
|
59
|
+
#Return values:
|
|
60
|
+
# pp (2 elements): single-fit value of each parameter (can be used as the result)
|
|
61
|
+
# psig (2 elements): standard deviation of each parameter
|
|
62
|
+
# pchi : probability of chi>chi0
|
|
63
|
+
# pmc : a NX x (n+1) maxtrix, the fitted parameters for all data sets
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
if np.ndim(xinp) == 1:
|
|
67
|
+
x=xinp.reshape((1,np.size(xinp)))
|
|
68
|
+
else:
|
|
69
|
+
x= xinp
|
|
70
|
+
if np.ndim(yinp) == 1:
|
|
71
|
+
y=yinp.reshape((1,np.size(yinp)))
|
|
72
|
+
else:
|
|
73
|
+
y=yinp
|
|
74
|
+
if np.size(x,1) != np.size(y,1):
|
|
75
|
+
raise Exception('Number of columns in x and y must be equal')
|
|
76
|
+
N=np.size(x,1)
|
|
77
|
+
n=1 #always linear fit
|
|
78
|
+
|
|
79
|
+
#Perform single fit to get the base chi2 value
|
|
80
|
+
xs=np.median(x, axis=0)
|
|
81
|
+
ys=np.median(y, axis=0) #Reproduces original data points independent of distribution
|
|
82
|
+
sig=np.std(x, axis=0)+np.std(y, axis=0) #This only makes sense if either x or y is a single set
|
|
83
|
+
|
|
84
|
+
Xt=np.stack((xs, np.ones(N)), axis=1)
|
|
85
|
+
X=np.stack((xs/sig, np.ones(N)/sig), axis=1)
|
|
86
|
+
Y=ys/sig
|
|
87
|
+
pp=np.linalg.lstsq(X,Y, rcond=None)[0]
|
|
88
|
+
chi2 = sum((Y - np.matmul(X,pp))**2)
|
|
89
|
+
subtract=ys - np.matmul(Xt,pp)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
xn=np.size(x,0)
|
|
93
|
+
yn=np.size(y,0)
|
|
94
|
+
nmc = max(xn,yn)
|
|
95
|
+
pmc = np.zeros((nmc,n+1))
|
|
96
|
+
chi2mc = np.zeros(nmc)
|
|
97
|
+
for i in range(nmc):
|
|
98
|
+
X=np.stack((x[i%xn,:]/sig,np.ones(N)/sig),axis=1)
|
|
99
|
+
Y=(y[i%yn,:]-subtract)/sig
|
|
100
|
+
p=np.linalg.lstsq(X,Y, rcond=None)[0]
|
|
101
|
+
pmc[i,:]=p
|
|
102
|
+
chi2mc[i] = sum((Y - np.matmul(X,p))**2)
|
|
103
|
+
|
|
104
|
+
pmean = np.mean(pmc,0) #This is not used, as the single fit (pp) is returned for compatibility with the MATLAB script
|
|
105
|
+
psig = np.std(pmc,0)
|
|
106
|
+
|
|
107
|
+
#Compute pchi2
|
|
108
|
+
pchi2=sum(chi2mc>chi2)/nmc
|
|
109
|
+
|
|
110
|
+
if plot:
|
|
111
|
+
import matplotlib.pyplot as plt
|
|
112
|
+
fig, ax = plt.subplots(1, 1, figsize=(4, 2))
|
|
113
|
+
counts,*_=ax.hist(chi2mc,bins=50)
|
|
114
|
+
ycent=0.5*max(counts)
|
|
115
|
+
ax.plot([chi2,chi2],[0,ycent],'r-')
|
|
116
|
+
ax.set_yticks([])
|
|
117
|
+
ax.set_xlabel(r"$\chi^2$")
|
|
118
|
+
plt.show()
|
|
119
|
+
|
|
120
|
+
return (pp,psig,pchi2,pmc)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def confidence(X, level=0.683, plot=False):
|
|
125
|
+
#
|
|
126
|
+
# Statistical analysis of the data in matrix X.
|
|
127
|
+
# It is assumed that the number of data points are large; all properties
|
|
128
|
+
# are calculated from the data itself.
|
|
129
|
+
#
|
|
130
|
+
# X data matrix. Data in columns. For example, if X contains data
|
|
131
|
+
# from two measurements, data for measurement 1 is in column 1
|
|
132
|
+
# and measurement 2 in columns 2.
|
|
133
|
+
# If only one column, a 1d-array is also acceptable
|
|
134
|
+
# level confidence limit of error in err. If not specified, level =
|
|
135
|
+
# 0.683 is default.
|
|
136
|
+
# plot an optional boolean specifying whether to plot histograms for each column
|
|
137
|
+
# where a general statistic is shown as a red errorbar (median +/- stdev)
|
|
138
|
+
# and the confidence intervals are shown with black lines.
|
|
139
|
+
# The red markers at the bottom show the simpler (median +/- err)
|
|
140
|
+
# interval which should normally coincide with the confidence interval
|
|
141
|
+
# unless the distribution is skew (in which case the confidence interval is more reliable).
|
|
142
|
+
# If X has exactly two columns, a scatter plot showing possible correlation between
|
|
143
|
+
# the two columns is also produced.
|
|
144
|
+
#
|
|
145
|
+
# Returns a tuple (err, confint) where
|
|
146
|
+
# err Error in the columns based on selected confidence limit.
|
|
147
|
+
# confint A list of tuples (low, high), the confidence interval for each input column
|
|
148
|
+
# (pconf*100% of values are found within this interval around median)
|
|
149
|
+
# If the input X was a 1d-array, a single tuple is returned instead of a list
|
|
150
|
+
|
|
151
|
+
onedim = (np.ndim(X) == 1)
|
|
152
|
+
|
|
153
|
+
if onedim: #convert to matrix, then convert back to onedim at the end
|
|
154
|
+
X=X.reshape((np.size(X),1))
|
|
155
|
+
|
|
156
|
+
if level <= 0 or level >= 1:
|
|
157
|
+
raise Exception("levvel must be 0 < level < 1.")
|
|
158
|
+
|
|
159
|
+
if np.size(X,1) > np.size(X,0):
|
|
160
|
+
print("Warning. It appears that your data is not placed column-wise.")
|
|
161
|
+
|
|
162
|
+
N = np.size(X,0) #number of data points
|
|
163
|
+
n = np.size(X,1) #number of dimensions (columns)
|
|
164
|
+
|
|
165
|
+
# GUM recommendation. ncut is the complement to pconf, ie the 1-pconf
|
|
166
|
+
# fraction of points.
|
|
167
|
+
#ncut = floor((N - floor(level*N+0.5) + 1)/2);
|
|
168
|
+
|
|
169
|
+
median = np.median(X,0)
|
|
170
|
+
sig = np.std(X,0)
|
|
171
|
+
absdiff = abs(X-np.mean(X,0)) #Absolute difference to mean value
|
|
172
|
+
plow = np.zeros(n)
|
|
173
|
+
phigh = np.zeros(n)
|
|
174
|
+
err = np.zeros(n)
|
|
175
|
+
for j in range(n):
|
|
176
|
+
tmp=np.sort(X[:,j])
|
|
177
|
+
plow[j]=tmp[round(max(1,0.5*(1-level)*N))-1]
|
|
178
|
+
phigh[j]=tmp[round(min(N,1-0.5*(1-level)*N))-1]
|
|
179
|
+
tmp=np.sort(absdiff[:,j])
|
|
180
|
+
err[j]=tmp[round(min(N,level*N))-1]
|
|
181
|
+
|
|
182
|
+
if plot:
|
|
183
|
+
import matplotlib.pyplot as plt
|
|
184
|
+
import matplotlib.gridspec as gridspec
|
|
185
|
+
nvar=np.size(X,1)
|
|
186
|
+
if nvar==2: #Exactly two parameters so produce a scatter plot and histograms
|
|
187
|
+
fig = plt.figure(figsize=(8, 4.8))
|
|
188
|
+
gs = gridspec.GridSpec(2, 2, width_ratios=[1.5, 1], height_ratios=[1, 1])
|
|
189
|
+
# Left square spans both rows
|
|
190
|
+
ax_left = fig.add_subplot(gs[:, 0])
|
|
191
|
+
axes = [fig.add_subplot(gs[0, 1]), fig.add_subplot(gs[1, 1])]
|
|
192
|
+
ax_left.set_aspect('equal')
|
|
193
|
+
ax_left.scatter(X[:,0],X[:,1],s=0.1)
|
|
194
|
+
ax_left.set_xlabel('a')
|
|
195
|
+
ax_left.set_ylabel('b')
|
|
196
|
+
ax_left.plot([plow[0],plow[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
|
|
197
|
+
ax_left.plot([phigh[0],phigh[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
|
|
198
|
+
ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[plow[1],plow[1]], 'k--')
|
|
199
|
+
ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[phigh[1],phigh[1]], 'k--')
|
|
200
|
+
|
|
201
|
+
ax_left.set_aspect(1.0/ax_left.get_data_ratio(), adjustable='box')
|
|
202
|
+
else: #only produce histograms
|
|
203
|
+
fig, axes = plt.subplots(nrows=nvar, ncols=1, figsize=(4, 2*nvar))
|
|
204
|
+
if nvar==1: axes=[axes] # fix stupid inconsistency in plt.subplots so that axes is always a list
|
|
205
|
+
|
|
206
|
+
for i,ax in enumerate(axes):
|
|
207
|
+
counts,*_=ax.hist(X[:,i], bins=50)
|
|
208
|
+
ycent=0.5*max(counts)
|
|
209
|
+
ax.errorbar(median[i],ycent,xerr=sig[i],fmt='ro',capsize=5)
|
|
210
|
+
ax.plot([plow[i],plow[i]] ,[0,0.8*ycent],'k--')
|
|
211
|
+
ax.plot([phigh[i],phigh[i]],[0,0.8*ycent],'k--')
|
|
212
|
+
ax.plot([median[i]-err[i], median[i]-err[i]], [0,0.1*ycent],'r-')
|
|
213
|
+
ax.plot([median[i]+err[i], median[i]+err[i]], [0,0.1*ycent],'r-')
|
|
214
|
+
ax.set_xlabel(chr(ord('a')+i)) #Name the variables a,b,c...
|
|
215
|
+
ax.set_yticks([])
|
|
216
|
+
|
|
217
|
+
plt.tight_layout()
|
|
218
|
+
plt.show()
|
|
219
|
+
|
|
220
|
+
if onedim:
|
|
221
|
+
return (err[0], (plow[0], phigh[0])) #simply return scalars
|
|
222
|
+
else:
|
|
223
|
+
return (err, list(zip(plow, phigh)))
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def linconf(xinp, yinp, ysig, nmc=10000, distrib='normal', level=0.683, ytransform=None, restransform=None):
|
|
227
|
+
#
|
|
228
|
+
#Performs the full Monte Carlo linear regression with confidence calculation.
|
|
229
|
+
#by applying the following 5 steps in succession:
|
|
230
|
+
# addnoise to y values
|
|
231
|
+
# transform y values (skipped if ytransform==None)
|
|
232
|
+
# linreg (x,y)
|
|
233
|
+
# calculates a tuple of results from a,b (skipped if restransform==None)
|
|
234
|
+
# confidence for each result
|
|
235
|
+
|
|
236
|
+
# For detailed description of parameters, see previous functions
|
|
237
|
+
# Returns (reslist, pchi2) where reslist is a list of (result, error, confidenceinterval) for each calculated result
|
|
238
|
+
|
|
239
|
+
ymc=addnoise(yinp, ysig, nmc, distrib)
|
|
240
|
+
if ytransform!=None:
|
|
241
|
+
ymc = ytransform(ymc)
|
|
242
|
+
pp,psig,pchi2,pmc=linreg(xinp,ymc)
|
|
243
|
+
if restransform!=None:
|
|
244
|
+
results=restransform(pp[0],pp[1])
|
|
245
|
+
results_mc=restransform(pmc[:,0],pmc[:,1])
|
|
246
|
+
else:
|
|
247
|
+
results=(pp[0],pp[1])
|
|
248
|
+
results_mc=(pmc[:,0],pmc[:,1])
|
|
249
|
+
rlist=[]
|
|
250
|
+
for r,rmc in zip(results,results_mc):
|
|
251
|
+
perr,confint=confidence(rmc, level)
|
|
252
|
+
rlist.append((r,perr,confint))
|
|
253
|
+
return (rlist,pchi2)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: linregmc
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Linear regression with Monte Carlo error analysis
|
|
5
|
+
Project-URL: Homepage, https://github.com/parsod/BFKdemos
|
|
6
|
+
Author-email: Par Soderhjelm <par.soderhjelm@bpc.lu.se>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Requires-Python: >=3.7
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# linregmc package (Linear regression with Monte Carlo error analysis)
|
|
14
|
+
|
|
15
|
+
This package is a single module which contains three functions
|
|
16
|
+
|
|
17
|
+
createmcdata - introduce noise in the data based on known experimental precision
|
|
18
|
+
linreg - perform multiple linear regressions and calculate goodness-of-fit
|
|
19
|
+
errconf - get confidence intervals, plot distributions etc.
|
|
20
|
+
|
|
21
|
+
TODO: Add detailed documentation here
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
linregmc/.bashlog,sha256=6LIuzUCAwO_ab7lx8FSkuUWWksFWkI_M6bclmDaa_Sc,297
|
|
2
|
+
linregmc/__init__.py,sha256=QP5Y7zpbieaPw5lQbGZt4kCFwHCLNgr9O5NSt8n2mIg,24
|
|
3
|
+
linregmc/linregmc.py,sha256=SfS64Kbk-VsqHKRrJSeavLYLQNFJHgU8pBK9wZNiTYM,10422
|
|
4
|
+
linregmc-0.0.1.dist-info/METADATA,sha256=a49XWMX-LnX8MzM1_qplYEcVQWwI8AqEDSoM3Ars-ts,811
|
|
5
|
+
linregmc-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
linregmc-0.0.1.dist-info/RECORD,,
|