linregmc 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linregmc might be problematic. Click here for more details.

linregmc/.bashlog ADDED
@@ -0,0 +1,15 @@
1
+ 557 cd example_package_per/
2
+ 558 echo -n >__init__.py
3
+ 559 vi example.py
4
+ 539 cd linregmc/
5
+ 540 ls
6
+ 541 ls -lt
7
+ 542 rm example.py
8
+ 543 vi __init__.py
9
+ 544 ls
10
+ 545 rm linregmc.py
11
+ 546 cp ../../../linregmc.py .
12
+ 547 ls
13
+ 584 cd linregmc/
14
+ 585 ls
15
+ 586 vi __init__.py
linregmc/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .linregmc import *
linregmc/linregmc.py ADDED
@@ -0,0 +1,253 @@
1
+ import numpy as np
2
+ from numpy.matlib import repmat,randn
3
+
4
+ def addnoise(yinp,ysiginp,nmc=10000,distrib='normal'):
5
+
6
+ # function ymc = createmcdata(y,ysig,nmc,distrib)
7
+ #
8
+ # Creates a matrix ymc of nmc vectors with the mean values of y but with
9
+ # added random noise of standard deviation ysig.
10
+ #
11
+ # y data vector
12
+ # ysig standard deviation vector (same length as y)
13
+ # nmc number of Monte Carlo copies
14
+ # distrib 'norm'/'normal' gives normal distribution
15
+ # 'lognorm'/'lognormal' give lognormal distribution (useful for example
16
+ # if negative results are unphysical)
17
+ #
18
+ #
19
+ # You might want to initialize the random number generator in forehand.
20
+ #
21
+
22
+ yinp = np.asarray(yinp)
23
+ ysiginp = np.asarray(ysiginp)
24
+ if np.ndim(yinp)>1 or np.ndim(ysiginp)>1:
25
+ raise Exception('y and ysig must not have higher dimension than 1.')
26
+ if np.size(ysiginp) == 1:
27
+ ysiginp = ysiginp*np.ones(np.size(yinp)) #If ysiginp is a scalar, turn it into a vector with identical elements
28
+ if np.size(yinp) != np.size(ysiginp):
29
+ raise Exception('y and ysig must have the same length.')
30
+
31
+ n=np.size(yinp)
32
+ y=yinp.reshape((1,n))
33
+ ysig=ysiginp.reshape((1,n))
34
+ if distrib.lower() in ('norm' ,'normal'):
35
+
36
+ return np.array(repmat(y,nmc,1)) + np.array(repmat(ysig,nmc,1))*np.array(randn(nmc,n))
37
+ elif distrib.lower() in ('lognorm','lognormal'):
38
+ mu = np.log(y**2/np.sqrt(ysig**2+y**2)) # mu of lognormal dist
39
+ sigma = np.sqrt(np.log(ysig**2/y**2+1)) # sigma of lognormal dist
40
+ return np.exp(np.array(randn(nmc,n))*np.array(repmat(sigma,nmc,1)) + np.array(repmat(mu,nmc,1)))
41
+ else:
42
+ raise Exception('Distribution named "' + distrib + '" is not recognized.')
43
+
44
+
45
+ def linreg(xinp, yinp, plot = False):
46
+ #This is the new (2024) weighted-fit version (similar to MATLAB linregmc) that only handles linear fits
47
+ #and does NOT do confidence intervals, as these can be done by mcerrconf
48
+
49
+ #Performs linear fitting ax+b=y with error analysis
50
+ #using a Monte Carlo approach.
51
+
52
+ #Input arguments:
53
+ # x : a NX x N matrix: the NX data sets of x values (N data points)
54
+ # y : a NY x N matrix: the NY data sets of y values (N data points)
55
+ # NX and NY need not be the same. In particular one may use a
56
+ # single data set (without added noise) for one of them.
57
+ # The number of fits equals max(NX,NY) and if there are less data
58
+ # sets for one of x or y, they are just cyclically reused.
59
+ #Return values:
60
+ # pp (2 elements): single-fit value of each parameter (can be used as the result)
61
+ # psig (2 elements): standard deviation of each parameter
62
+ # pchi : probability of chi>chi0
63
+ # pmc : a NX x (n+1) maxtrix, the fitted parameters for all data sets
64
+
65
+
66
+ if np.ndim(xinp) == 1:
67
+ x=xinp.reshape((1,np.size(xinp)))
68
+ else:
69
+ x= xinp
70
+ if np.ndim(yinp) == 1:
71
+ y=yinp.reshape((1,np.size(yinp)))
72
+ else:
73
+ y=yinp
74
+ if np.size(x,1) != np.size(y,1):
75
+ raise Exception('Number of columns in x and y must be equal')
76
+ N=np.size(x,1)
77
+ n=1 #always linear fit
78
+
79
+ #Perform single fit to get the base chi2 value
80
+ xs=np.median(x, axis=0)
81
+ ys=np.median(y, axis=0) #Reproduces original data points independent of distribution
82
+ sig=np.std(x, axis=0)+np.std(y, axis=0) #This only makes sense if either x or y is a single set
83
+
84
+ Xt=np.stack((xs, np.ones(N)), axis=1)
85
+ X=np.stack((xs/sig, np.ones(N)/sig), axis=1)
86
+ Y=ys/sig
87
+ pp=np.linalg.lstsq(X,Y, rcond=None)[0]
88
+ chi2 = sum((Y - np.matmul(X,pp))**2)
89
+ subtract=ys - np.matmul(Xt,pp)
90
+
91
+
92
+ xn=np.size(x,0)
93
+ yn=np.size(y,0)
94
+ nmc = max(xn,yn)
95
+ pmc = np.zeros((nmc,n+1))
96
+ chi2mc = np.zeros(nmc)
97
+ for i in range(nmc):
98
+ X=np.stack((x[i%xn,:]/sig,np.ones(N)/sig),axis=1)
99
+ Y=(y[i%yn,:]-subtract)/sig
100
+ p=np.linalg.lstsq(X,Y, rcond=None)[0]
101
+ pmc[i,:]=p
102
+ chi2mc[i] = sum((Y - np.matmul(X,p))**2)
103
+
104
+ pmean = np.mean(pmc,0) #This is not used, as the single fit (pp) is returned for compatibility with the MATLAB script
105
+ psig = np.std(pmc,0)
106
+
107
+ #Compute pchi2
108
+ pchi2=sum(chi2mc>chi2)/nmc
109
+
110
+ if plot:
111
+ import matplotlib.pyplot as plt
112
+ fig, ax = plt.subplots(1, 1, figsize=(4, 2))
113
+ counts,*_=ax.hist(chi2mc,bins=50)
114
+ ycent=0.5*max(counts)
115
+ ax.plot([chi2,chi2],[0,ycent],'r-')
116
+ ax.set_yticks([])
117
+ ax.set_xlabel(r"$\chi^2$")
118
+ plt.show()
119
+
120
+ return (pp,psig,pchi2,pmc)
121
+
122
+
123
+
124
+ def confidence(X, level=0.683, plot=False):
125
+ #
126
+ # Statistical analysis of the data in matrix X.
127
+ # It is assumed that the number of data points are large; all properties
128
+ # are calculated from the data itself.
129
+ #
130
+ # X data matrix. Data in columns. For example, if X contains data
131
+ # from two measurements, data for measurement 1 is in column 1
132
+ # and measurement 2 in columns 2.
133
+ # If only one column, a 1d-array is also acceptable
134
+ # level confidence limit of error in err. If not specified, level =
135
+ # 0.683 is default.
136
+ # plot an optional boolean specifying whether to plot histograms for each column
137
+ # where a general statistic is shown as a red errorbar (median +/- stdev)
138
+ # and the confidence intervals are shown with black lines.
139
+ # The red markers at the bottom show the simpler (median +/- err)
140
+ # interval which should normally coincide with the confidence interval
141
+ # unless the distribution is skew (in which case the confidence interval is more reliable).
142
+ # If X has exactly two columns, a scatter plot showing possible correlation between
143
+ # the two columns is also produced.
144
+ #
145
+ # Returns a tuple (err, confint) where
146
+ # err Error in the columns based on selected confidence limit.
147
+ # confint A list of tuples (low, high), the confidence interval for each input column
148
+ # (pconf*100% of values are found within this interval around median)
149
+ # If the input X was a 1d-array, a single tuple is returned instead of a list
150
+
151
+ onedim = (np.ndim(X) == 1)
152
+
153
+ if onedim: #convert to matrix, then convert back to onedim at the end
154
+ X=X.reshape((np.size(X),1))
155
+
156
+ if level <= 0 or level >= 1:
157
+ raise Exception("levvel must be 0 < level < 1.")
158
+
159
+ if np.size(X,1) > np.size(X,0):
160
+ print("Warning. It appears that your data is not placed column-wise.")
161
+
162
+ N = np.size(X,0) #number of data points
163
+ n = np.size(X,1) #number of dimensions (columns)
164
+
165
+ # GUM recommendation. ncut is the complement to pconf, ie the 1-pconf
166
+ # fraction of points.
167
+ #ncut = floor((N - floor(level*N+0.5) + 1)/2);
168
+
169
+ median = np.median(X,0)
170
+ sig = np.std(X,0)
171
+ absdiff = abs(X-np.mean(X,0)) #Absolute difference to mean value
172
+ plow = np.zeros(n)
173
+ phigh = np.zeros(n)
174
+ err = np.zeros(n)
175
+ for j in range(n):
176
+ tmp=np.sort(X[:,j])
177
+ plow[j]=tmp[round(max(1,0.5*(1-level)*N))-1]
178
+ phigh[j]=tmp[round(min(N,1-0.5*(1-level)*N))-1]
179
+ tmp=np.sort(absdiff[:,j])
180
+ err[j]=tmp[round(min(N,level*N))-1]
181
+
182
+ if plot:
183
+ import matplotlib.pyplot as plt
184
+ import matplotlib.gridspec as gridspec
185
+ nvar=np.size(X,1)
186
+ if nvar==2: #Exactly two parameters so produce a scatter plot and histograms
187
+ fig = plt.figure(figsize=(8, 4.8))
188
+ gs = gridspec.GridSpec(2, 2, width_ratios=[1.5, 1], height_ratios=[1, 1])
189
+ # Left square spans both rows
190
+ ax_left = fig.add_subplot(gs[:, 0])
191
+ axes = [fig.add_subplot(gs[0, 1]), fig.add_subplot(gs[1, 1])]
192
+ ax_left.set_aspect('equal')
193
+ ax_left.scatter(X[:,0],X[:,1],s=0.1)
194
+ ax_left.set_xlabel('a')
195
+ ax_left.set_ylabel('b')
196
+ ax_left.plot([plow[0],plow[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
197
+ ax_left.plot([phigh[0],phigh[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
198
+ ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[plow[1],plow[1]], 'k--')
199
+ ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[phigh[1],phigh[1]], 'k--')
200
+
201
+ ax_left.set_aspect(1.0/ax_left.get_data_ratio(), adjustable='box')
202
+ else: #only produce histograms
203
+ fig, axes = plt.subplots(nrows=nvar, ncols=1, figsize=(4, 2*nvar))
204
+ if nvar==1: axes=[axes] # fix stupid inconsistency in plt.subplots so that axes is always a list
205
+
206
+ for i,ax in enumerate(axes):
207
+ counts,*_=ax.hist(X[:,i], bins=50)
208
+ ycent=0.5*max(counts)
209
+ ax.errorbar(median[i],ycent,xerr=sig[i],fmt='ro',capsize=5)
210
+ ax.plot([plow[i],plow[i]] ,[0,0.8*ycent],'k--')
211
+ ax.plot([phigh[i],phigh[i]],[0,0.8*ycent],'k--')
212
+ ax.plot([median[i]-err[i], median[i]-err[i]], [0,0.1*ycent],'r-')
213
+ ax.plot([median[i]+err[i], median[i]+err[i]], [0,0.1*ycent],'r-')
214
+ ax.set_xlabel(chr(ord('a')+i)) #Name the variables a,b,c...
215
+ ax.set_yticks([])
216
+
217
+ plt.tight_layout()
218
+ plt.show()
219
+
220
+ if onedim:
221
+ return (err[0], (plow[0], phigh[0])) #simply return scalars
222
+ else:
223
+ return (err, list(zip(plow, phigh)))
224
+
225
+
226
+ def linconf(xinp, yinp, ysig, nmc=10000, distrib='normal', level=0.683, ytransform=None, restransform=None):
227
+ #
228
+ #Performs the full Monte Carlo linear regression with confidence calculation.
229
+ #by applying the following 5 steps in succession:
230
+ # addnoise to y values
231
+ # transform y values (skipped if ytransform==None)
232
+ # linreg (x,y)
233
+ # calculates a tuple of results from a,b (skipped if restransform==None)
234
+ # confidence for each result
235
+
236
+ # For detailed description of parameters, see previous functions
237
+ # Returns (reslist, pchi2) where reslist is a list of (result, error, confidenceinterval) for each calculated result
238
+
239
+ ymc=addnoise(yinp, ysig, nmc, distrib)
240
+ if ytransform!=None:
241
+ ymc = ytransform(ymc)
242
+ pp,psig,pchi2,pmc=linreg(xinp,ymc)
243
+ if restransform!=None:
244
+ results=restransform(pp[0],pp[1])
245
+ results_mc=restransform(pmc[:,0],pmc[:,1])
246
+ else:
247
+ results=(pp[0],pp[1])
248
+ results_mc=(pmc[:,0],pmc[:,1])
249
+ rlist=[]
250
+ for r,rmc in zip(results,results_mc):
251
+ perr,confint=confidence(rmc, level)
252
+ rlist.append((r,perr,confint))
253
+ return (rlist,pchi2)
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: linregmc
3
+ Version: 0.0.1
4
+ Summary: Linear regression with Monte Carlo error analysis
5
+ Project-URL: Homepage, https://github.com/parsod/BFKdemos
6
+ Author-email: Par Soderhjelm <par.soderhjelm@bpc.lu.se>
7
+ License-Expression: MIT
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 3
10
+ Requires-Python: >=3.7
11
+ Description-Content-Type: text/markdown
12
+
13
+ # linregmc package (Linear regression with Monte Carlo error analysis)
14
+
15
+ This package is a single module which contains three functions
16
+
17
+ createmcdata - introduce noise in the data based on known experimental precision
18
+ linreg - perform multiple linear regressions and calculate goodness-of-fit
19
+ errconf - get confidence intervals, plot distributions etc.
20
+
21
+ TODO: Add detailed documentation here
@@ -0,0 +1,6 @@
1
+ linregmc/.bashlog,sha256=6LIuzUCAwO_ab7lx8FSkuUWWksFWkI_M6bclmDaa_Sc,297
2
+ linregmc/__init__.py,sha256=QP5Y7zpbieaPw5lQbGZt4kCFwHCLNgr9O5NSt8n2mIg,24
3
+ linregmc/linregmc.py,sha256=SfS64Kbk-VsqHKRrJSeavLYLQNFJHgU8pBK9wZNiTYM,10422
4
+ linregmc-0.0.1.dist-info/METADATA,sha256=a49XWMX-LnX8MzM1_qplYEcVQWwI8AqEDSoM3Ars-ts,811
5
+ linregmc-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ linregmc-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any