linregmc 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linregmc might be problematic. Click here for more details.

@@ -0,0 +1,42 @@
1
+ 522 cd packaging_mclinreg/
2
+ 523 ls
3
+ 527 cd packaging_mclinreg/
4
+ 528 vi README.MD
5
+ 529 ls
6
+ 530 vi pyproject.toml
7
+ 531 rm -r dist/
8
+ 549 cd ..
9
+ 550 ls
10
+ 551 vi pyproject.toml
11
+ 552 vi README.MD
12
+ 553 vi README.MD
13
+ 554 ls
14
+ 555 python -m pip install --upgrade build
15
+ 556 python -m build
16
+ 557 python -m pip install --upgrade twine
17
+ 558 python -m twine upload --repository testpypi dist/*
18
+ 568 cd ../packaging_linregmc/
19
+ 569 ls
20
+ 570 vi src
21
+ 576 cd ..
22
+ 577 ls
23
+ 578 cat pyproject.toml
24
+ 579 python -m build
25
+ 580 ls
26
+ 588 cd ..
27
+ 589 python -m build
28
+ 590 python -m twine upload --repository testpypi dist/*
29
+ 591 vi pyproject.toml
30
+ 592 python -m build
31
+ 593 ls
32
+ 594 ls dist/
33
+ 595 python -m twine upload --repository testpypi dist/*0.0.2*
34
+ 596 vi src/linregmc/__init__.py
35
+ 597 vi pyproject.toml
36
+ 598 python -m build
37
+ 599 python -m twine upload --repository testpypi dist/*0.0.3*
38
+ 603 cd packaging_linregmc/
39
+ 604 rm -r dist
40
+ 605 vi pyproject.toml
41
+ 606 ls -lt ../linregmc.py
42
+ 607 cp ../linregmc.py src/linregmc/
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: linregmc
3
+ Version: 0.0.1
4
+ Summary: Linear regression with Monte Carlo error analysis
5
+ Project-URL: Homepage, https://github.com/parsod/BFKdemos
6
+ Author-email: Par Soderhjelm <par.soderhjelm@bpc.lu.se>
7
+ License-Expression: MIT
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 3
10
+ Requires-Python: >=3.7
11
+ Description-Content-Type: text/markdown
12
+
13
+ # linregmc package (Linear regression with Monte Carlo error analysis)
14
+
15
+ This package is a single module which contains three functions
16
+
17
+ createmcdata - introduce noise in the data based on known experimental precision
18
+ linreg - perform multiple linear regressions and calculate goodness-of-fit
19
+ errconf - get confidence intervals, plot distributions etc.
20
+
21
+ TODO: Add detailed documentation here
@@ -0,0 +1,9 @@
1
+ # linregmc package (Linear regression with Monte Carlo error analysis)
2
+
3
+ This package is a single module which contains three functions
4
+
5
+ createmcdata - introduce noise in the data based on known experimental precision
6
+ linreg - perform multiple linear regressions and calculate goodness-of-fit
7
+ errconf - get confidence intervals, plot distributions etc.
8
+
9
+ TODO: Add detailed documentation here
@@ -0,0 +1,9 @@
1
+ # linregmc package (Linear regression with Monte Carlo error analysis)
2
+
3
+ This package is a single module which contains three functions
4
+
5
+ createmcdata - introduce noise in the data based on known experimental precision
6
+ linreg - perform multiple linear regressions and calculate goodness-of-fit
7
+ errconf - get confidence intervals, plot distributions etc.
8
+
9
+ TODO: Add detailed documentation here
@@ -0,0 +1,22 @@
1
+ [build-system]
2
+ requires = ["hatchling >= 1.26"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "linregmc"
7
+ version = "0.0.1"
8
+ authors = [
9
+ { name="Par Soderhjelm", email="par.soderhjelm@bpc.lu.se" },
10
+ ]
11
+ description = "Linear regression with Monte Carlo error analysis"
12
+ readme = "README.md"
13
+ requires-python = ">=3.7"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "Operating System :: OS Independent",
17
+ ]
18
+ license = "MIT"
19
+ license-files = ["LICEN[CS]E*"]
20
+
21
+ [project.urls]
22
+ Homepage = "https://github.com/parsod/BFKdemos"
@@ -0,0 +1,20 @@
1
+ 555 cd src
2
+ 556 mkdir example_package_per
3
+ 560 cd ..
4
+ 532 cd src
5
+ 533 ls
6
+ 534 ls example_package_parsod/
7
+ 535 ln -sf ../../linregmc.py .
8
+ 536 ls
9
+ 537 mv example_package_parsod/ linregmc
10
+ 538 mv linregmc.py linregmc
11
+ 548 cd ..
12
+ 571 cd src/
13
+ 572 ls
14
+ 573 mv linregmc/linregmc.py .
15
+ 574 ls
16
+ 575 mv linregmc ../linregmc_package
17
+ 581 cd src/
18
+ 582 mv ../linregmc_package/ linregmc
19
+ 583 mv linregmc.py linregmc
20
+ 587 cd ..
@@ -0,0 +1,15 @@
1
+ 557 cd example_package_per/
2
+ 558 echo -n >__init__.py
3
+ 559 vi example.py
4
+ 539 cd linregmc/
5
+ 540 ls
6
+ 541 ls -lt
7
+ 542 rm example.py
8
+ 543 vi __init__.py
9
+ 544 ls
10
+ 545 rm linregmc.py
11
+ 546 cp ../../../linregmc.py .
12
+ 547 ls
13
+ 584 cd linregmc/
14
+ 585 ls
15
+ 586 vi __init__.py
@@ -0,0 +1 @@
1
+ from .linregmc import *
@@ -0,0 +1,253 @@
1
+ import numpy as np
2
+ from numpy.matlib import repmat,randn
3
+
4
+ def addnoise(yinp,ysiginp,nmc=10000,distrib='normal'):
5
+
6
+ # function ymc = createmcdata(y,ysig,nmc,distrib)
7
+ #
8
+ # Creates a matrix ymc of nmc vectors with the mean values of y but with
9
+ # added random noise of standard deviation ysig.
10
+ #
11
+ # y data vector
12
+ # ysig standard deviation vector (same length as y)
13
+ # nmc number of Monte Carlo copies
14
+ # distrib 'norm'/'normal' gives normal distribution
15
+ # 'lognorm'/'lognormal' give lognormal distribution (useful for example
16
+ # if negative results are unphysical)
17
+ #
18
+ #
19
+ # You might want to initialize the random number generator in forehand.
20
+ #
21
+
22
+ yinp = np.asarray(yinp)
23
+ ysiginp = np.asarray(ysiginp)
24
+ if np.ndim(yinp)>1 or np.ndim(ysiginp)>1:
25
+ raise Exception('y and ysig must not have higher dimension than 1.')
26
+ if np.size(ysiginp) == 1:
27
+ ysiginp = ysiginp*np.ones(np.size(yinp)) #If ysiginp is a scalar, turn it into a vector with identical elements
28
+ if np.size(yinp) != np.size(ysiginp):
29
+ raise Exception('y and ysig must have the same length.')
30
+
31
+ n=np.size(yinp)
32
+ y=yinp.reshape((1,n))
33
+ ysig=ysiginp.reshape((1,n))
34
+ if distrib.lower() in ('norm' ,'normal'):
35
+
36
+ return np.array(repmat(y,nmc,1)) + np.array(repmat(ysig,nmc,1))*np.array(randn(nmc,n))
37
+ elif distrib.lower() in ('lognorm','lognormal'):
38
+ mu = np.log(y**2/np.sqrt(ysig**2+y**2)) # mu of lognormal dist
39
+ sigma = np.sqrt(np.log(ysig**2/y**2+1)) # sigma of lognormal dist
40
+ return np.exp(np.array(randn(nmc,n))*np.array(repmat(sigma,nmc,1)) + np.array(repmat(mu,nmc,1)))
41
+ else:
42
+ raise Exception('Distribution named "' + distrib + '" is not recognized.')
43
+
44
+
45
+ def linreg(xinp, yinp, plot = False):
46
+ #This is the new (2024) weighted-fit version (similar to MATLAB linregmc) that only handles linear fits
47
+ #and does NOT do confidence intervals, as these can be done by mcerrconf
48
+
49
+ #Performs linear fitting ax+b=y with error analysis
50
+ #using a Monte Carlo approach.
51
+
52
+ #Input arguments:
53
+ # x : a NX x N matrix: the NX data sets of x values (N data points)
54
+ # y : a NY x N matrix: the NY data sets of y values (N data points)
55
+ # NX and NY need not be the same. In particular one may use a
56
+ # single data set (without added noise) for one of them.
57
+ # The number of fits equals max(NX,NY) and if there are less data
58
+ # sets for one of x or y, they are just cyclically reused.
59
+ #Return values:
60
+ # pp (2 elements): single-fit value of each parameter (can be used as the result)
61
+ # psig (2 elements): standard deviation of each parameter
62
+ # pchi : probability of chi>chi0
63
+ # pmc : a NX x (n+1) maxtrix, the fitted parameters for all data sets
64
+
65
+
66
+ if np.ndim(xinp) == 1:
67
+ x=xinp.reshape((1,np.size(xinp)))
68
+ else:
69
+ x= xinp
70
+ if np.ndim(yinp) == 1:
71
+ y=yinp.reshape((1,np.size(yinp)))
72
+ else:
73
+ y=yinp
74
+ if np.size(x,1) != np.size(y,1):
75
+ raise Exception('Number of columns in x and y must be equal')
76
+ N=np.size(x,1)
77
+ n=1 #always linear fit
78
+
79
+ #Perform single fit to get the base chi2 value
80
+ xs=np.median(x, axis=0)
81
+ ys=np.median(y, axis=0) #Reproduces original data points independent of distribution
82
+ sig=np.std(x, axis=0)+np.std(y, axis=0) #This only makes sense if either x or y is a single set
83
+
84
+ Xt=np.stack((xs, np.ones(N)), axis=1)
85
+ X=np.stack((xs/sig, np.ones(N)/sig), axis=1)
86
+ Y=ys/sig
87
+ pp=np.linalg.lstsq(X,Y, rcond=None)[0]
88
+ chi2 = sum((Y - np.matmul(X,pp))**2)
89
+ subtract=ys - np.matmul(Xt,pp)
90
+
91
+
92
+ xn=np.size(x,0)
93
+ yn=np.size(y,0)
94
+ nmc = max(xn,yn)
95
+ pmc = np.zeros((nmc,n+1))
96
+ chi2mc = np.zeros(nmc)
97
+ for i in range(nmc):
98
+ X=np.stack((x[i%xn,:]/sig,np.ones(N)/sig),axis=1)
99
+ Y=(y[i%yn,:]-subtract)/sig
100
+ p=np.linalg.lstsq(X,Y, rcond=None)[0]
101
+ pmc[i,:]=p
102
+ chi2mc[i] = sum((Y - np.matmul(X,p))**2)
103
+
104
+ pmean = np.mean(pmc,0) #This is not used, as the single fit (pp) is returned for compatibility with the MATLAB script
105
+ psig = np.std(pmc,0)
106
+
107
+ #Compute pchi2
108
+ pchi2=sum(chi2mc>chi2)/nmc
109
+
110
+ if plot:
111
+ import matplotlib.pyplot as plt
112
+ fig, ax = plt.subplots(1, 1, figsize=(4, 2))
113
+ counts,*_=ax.hist(chi2mc,bins=50)
114
+ ycent=0.5*max(counts)
115
+ ax.plot([chi2,chi2],[0,ycent],'r-')
116
+ ax.set_yticks([])
117
+ ax.set_xlabel(r"$\chi^2$")
118
+ plt.show()
119
+
120
+ return (pp,psig,pchi2,pmc)
121
+
122
+
123
+
124
+ def confidence(X, level=0.683, plot=False):
125
+ #
126
+ # Statistical analysis of the data in matrix X.
127
+ # It is assumed that the number of data points are large; all properties
128
+ # are calculated from the data itself.
129
+ #
130
+ # X data matrix. Data in columns. For example, if X contains data
131
+ # from two measurements, data for measurement 1 is in column 1
132
+ # and measurement 2 in columns 2.
133
+ # If only one column, a 1d-array is also acceptable
134
+ # level confidence limit of error in err. If not specified, level =
135
+ # 0.683 is default.
136
+ # plot an optional boolean specifying whether to plot histograms for each column
137
+ # where a general statistic is shown as a red errorbar (median +/- stdev)
138
+ # and the confidence intervals are shown with black lines.
139
+ # The red markers at the bottom show the simpler (median +/- err)
140
+ # interval which should normally coincide with the confidence interval
141
+ # unless the distribution is skew (in which case the confidence interval is more reliable).
142
+ # If X has exactly two columns, a scatter plot showing possible correlation between
143
+ # the two columns is also produced.
144
+ #
145
+ # Returns a tuple (err, confint) where
146
+ # err Error in the columns based on selected confidence limit.
147
+ # confint A list of tuples (low, high), the confidence interval for each input column
148
+ # (pconf*100% of values are found within this interval around median)
149
+ # If the input X was a 1d-array, a single tuple is returned instead of a list
150
+
151
+ onedim = (np.ndim(X) == 1)
152
+
153
+ if onedim: #convert to matrix, then convert back to onedim at the end
154
+ X=X.reshape((np.size(X),1))
155
+
156
+ if level <= 0 or level >= 1:
157
+ raise Exception("levvel must be 0 < level < 1.")
158
+
159
+ if np.size(X,1) > np.size(X,0):
160
+ print("Warning. It appears that your data is not placed column-wise.")
161
+
162
+ N = np.size(X,0) #number of data points
163
+ n = np.size(X,1) #number of dimensions (columns)
164
+
165
+ # GUM recommendation. ncut is the complement to pconf, ie the 1-pconf
166
+ # fraction of points.
167
+ #ncut = floor((N - floor(level*N+0.5) + 1)/2);
168
+
169
+ median = np.median(X,0)
170
+ sig = np.std(X,0)
171
+ absdiff = abs(X-np.mean(X,0)) #Absolute difference to mean value
172
+ plow = np.zeros(n)
173
+ phigh = np.zeros(n)
174
+ err = np.zeros(n)
175
+ for j in range(n):
176
+ tmp=np.sort(X[:,j])
177
+ plow[j]=tmp[round(max(1,0.5*(1-level)*N))-1]
178
+ phigh[j]=tmp[round(min(N,1-0.5*(1-level)*N))-1]
179
+ tmp=np.sort(absdiff[:,j])
180
+ err[j]=tmp[round(min(N,level*N))-1]
181
+
182
+ if plot:
183
+ import matplotlib.pyplot as plt
184
+ import matplotlib.gridspec as gridspec
185
+ nvar=np.size(X,1)
186
+ if nvar==2: #Exactly two parameters so produce a scatter plot and histograms
187
+ fig = plt.figure(figsize=(8, 4.8))
188
+ gs = gridspec.GridSpec(2, 2, width_ratios=[1.5, 1], height_ratios=[1, 1])
189
+ # Left square spans both rows
190
+ ax_left = fig.add_subplot(gs[:, 0])
191
+ axes = [fig.add_subplot(gs[0, 1]), fig.add_subplot(gs[1, 1])]
192
+ ax_left.set_aspect('equal')
193
+ ax_left.scatter(X[:,0],X[:,1],s=0.1)
194
+ ax_left.set_xlabel('a')
195
+ ax_left.set_ylabel('b')
196
+ ax_left.plot([plow[0],plow[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
197
+ ax_left.plot([phigh[0],phigh[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
198
+ ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[plow[1],plow[1]], 'k--')
199
+ ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[phigh[1],phigh[1]], 'k--')
200
+
201
+ ax_left.set_aspect(1.0/ax_left.get_data_ratio(), adjustable='box')
202
+ else: #only produce histograms
203
+ fig, axes = plt.subplots(nrows=nvar, ncols=1, figsize=(4, 2*nvar))
204
+ if nvar==1: axes=[axes] # fix stupid inconsistency in plt.subplots so that axes is always a list
205
+
206
+ for i,ax in enumerate(axes):
207
+ counts,*_=ax.hist(X[:,i], bins=50)
208
+ ycent=0.5*max(counts)
209
+ ax.errorbar(median[i],ycent,xerr=sig[i],fmt='ro',capsize=5)
210
+ ax.plot([plow[i],plow[i]] ,[0,0.8*ycent],'k--')
211
+ ax.plot([phigh[i],phigh[i]],[0,0.8*ycent],'k--')
212
+ ax.plot([median[i]-err[i], median[i]-err[i]], [0,0.1*ycent],'r-')
213
+ ax.plot([median[i]+err[i], median[i]+err[i]], [0,0.1*ycent],'r-')
214
+ ax.set_xlabel(chr(ord('a')+i)) #Name the variables a,b,c...
215
+ ax.set_yticks([])
216
+
217
+ plt.tight_layout()
218
+ plt.show()
219
+
220
+ if onedim:
221
+ return (err[0], (plow[0], phigh[0])) #simply return scalars
222
+ else:
223
+ return (err, list(zip(plow, phigh)))
224
+
225
+
226
+ def linconf(xinp, yinp, ysig, nmc=10000, distrib='normal', level=0.683, ytransform=None, restransform=None):
227
+ #
228
+ #Performs the full Monte Carlo linear regression with confidence calculation.
229
+ #by applying the following 5 steps in succession:
230
+ # addnoise to y values
231
+ # transform y values (skipped if ytransform==None)
232
+ # linreg (x,y)
233
+ # calculates a tuple of results from a,b (skipped if restransform==None)
234
+ # confidence for each result
235
+
236
+ # For detailed description of parameters, see previous functions
237
+ # Returns (reslist, pchi2) where reslist is a list of (result, error, confidenceinterval) for each calculated result
238
+
239
+ ymc=addnoise(yinp, ysig, nmc, distrib)
240
+ if ytransform!=None:
241
+ ymc = ytransform(ymc)
242
+ pp,psig,pchi2,pmc=linreg(xinp,ymc)
243
+ if restransform!=None:
244
+ results=restransform(pp[0],pp[1])
245
+ results_mc=restransform(pmc[:,0],pmc[:,1])
246
+ else:
247
+ results=(pp[0],pp[1])
248
+ results_mc=(pmc[:,0],pmc[:,1])
249
+ rlist=[]
250
+ for r,rmc in zip(results,results_mc):
251
+ perr,confint=confidence(rmc, level)
252
+ rlist.append((r,perr,confint))
253
+ return (rlist,pchi2)