linregmc 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linregmc might be problematic. Click here for more details.

linregmc-0.0.2/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025, Pär Söderhjelm
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,22 @@
1
+ Metadata-Version: 2.3
2
+ Name: linregmc
3
+ Version: 0.0.2
4
+ Summary: Linear regression with Monte Carlo error analysis
5
+ License: MIT
6
+ Author: Pär Söderhjelm
7
+ Requires-Python: >=3.12,<4.0
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Requires-Dist: numpy (>=1.23,<2.0)
13
+ Description-Content-Type: text/markdown
14
+
15
+ # linregmc package (Linear regression with Monte Carlo error analysis)
16
+
17
+ This package is a single module which contains four functions:<br>
18
+ addnoise - introduce noise in the data based on known experimental precision<br>
19
+ linreg - perform multiple linear regressions and calculate goodness-of-fit<br>
20
+ confidence - get confidence intervals, plot distributions etc.<br>
21
+ linconf - do all the steps in succession, including possible transformation of the data<br>
22
+
@@ -0,0 +1,7 @@
1
+ # linregmc package (Linear regression with Monte Carlo error analysis)
2
+
3
+ This package is a single module which contains four functions:<br>
4
+ addnoise - introduce noise in the data based on known experimental precision<br>
5
+ linreg - perform multiple linear regressions and calculate goodness-of-fit<br>
6
+ confidence - get confidence intervals, plot distributions etc.<br>
7
+ linconf - do all the steps in succession, including possible transformation of the data<br>
@@ -0,0 +1,27 @@
1
+ [tool.poetry]
2
+ name = "linregmc"
3
+ version = "0.0.2"
4
+ description = "Linear regression with Monte Carlo error analysis"
5
+ authors = ["Pär Söderhjelm"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ packages = [
9
+ { include = "linregmc", from = "src" }
10
+ ]
11
+
12
+
13
+ [tool.poetry.dependencies]
14
+ python = "^3.12"
15
+ numpy = "^1.23"
16
+
17
+ [tool.poetry.group.dev.dependencies]
18
+ myst-nb = "^1.3.0"
19
+ sphinx-autoapi = "^3.6.0"
20
+
21
+ [tool.poetry.group.docs.dependencies]
22
+ sphinx = "^7.0"
23
+ sphinx-rtd-theme = "^3.0.2"
24
+
25
+ [build-system]
26
+ requires = ["poetry-core>=1.0.0"]
27
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,313 @@
1
+ import numpy as np
2
+ from numpy.matlib import repmat,randn
3
+
4
+ def addnoise(yinp,ysiginp,nmc=10000,distrib='normal'):
5
+ """Adds noise to an array of data points (or a single value).
6
+
7
+ Creates a matrix of nmc vectors with the mean values of y but with
8
+ added random noise of standard deviation ysig.
9
+ You might want to initialize the random number generator in forehand.
10
+
11
+
12
+ Parameters
13
+ ----------
14
+ y : array
15
+ data vector
16
+ ysig : array
17
+ standard deviation vector (same length as y, or a single value)
18
+ nmc : int, default 10000
19
+ number of Monte Carlo copies
20
+ distrib : str, default 'normal'
21
+ 'norm'/'normal' gives normal distribution
22
+ 'lognorm'/'lognormal' give lognormal distribution (useful for example if negative results are unphysical)
23
+
24
+ Returns
25
+ -------
26
+ array
27
+ the data vector with added noise
28
+ (always two-dimensional, but only one column if y was a single value)
29
+
30
+ Examples
31
+ --------
32
+ >>> y = np.array([1.2, 2.3, 3.7])
33
+ >>> ysig = np.array([0.1, 0.3, 0.2])
34
+ >>> y1 = addnoise(y, ysig) # different stdev for the three points, normal distribution
35
+ >>> y1.shape # (10000, 3)
36
+ >>> y2 = addnoise(y, 0.2, 1000, distrib='lognorm') # same stdev for the three points, lognormal distribution
37
+ >>> y2.shape # (1000, 3)
38
+ >>> y3 = addnoise(1.5, 0.2) # only one point
39
+ >>> y3.shape # (10000, 1)
40
+ """
41
+
42
+
43
+ yinp = np.asarray(yinp)
44
+ ysiginp = np.asarray(ysiginp)
45
+ if np.ndim(yinp)>1 or np.ndim(ysiginp)>1:
46
+ raise Exception('y and ysig must not have higher dimension than 1.')
47
+ if np.size(ysiginp) == 1:
48
+ ysiginp = ysiginp*np.ones(np.size(yinp)) #If ysiginp is a scalar, turn it into a vector with identical elements
49
+ if np.size(yinp) != np.size(ysiginp):
50
+ raise Exception('y and ysig must have the same length.')
51
+
52
+ n=np.size(yinp)
53
+ y=yinp.reshape((1,n))
54
+ ysig=ysiginp.reshape((1,n))
55
+ if distrib.lower() in ('norm' ,'normal'):
56
+
57
+ return np.array(repmat(y,nmc,1)) + np.array(repmat(ysig,nmc,1))*np.array(randn(nmc,n))
58
+ elif distrib.lower() in ('lognorm','lognormal'):
59
+ mu = np.log(y**2/np.sqrt(ysig**2+y**2)) # mu of lognormal dist
60
+ sigma = np.sqrt(np.log(ysig**2/y**2+1)) # sigma of lognormal dist
61
+ return np.exp(np.array(randn(nmc,n))*np.array(repmat(sigma,nmc,1)) + np.array(repmat(mu,nmc,1)))
62
+ else:
63
+ raise Exception('Distribution named "' + distrib + '" is not recognized.')
64
+
65
+
66
+ def linreg(xinp, yinp, plot = False):
67
+ """Performs linear fitting ax+b=y with error analysis using a Monte Carlo approach.
68
+
69
+ Parameters
70
+ ----------
71
+ xinp : array
72
+ an NM x N matrix: the NX data sets of x values (N data points)
73
+ yinp : array
74
+ an NY x N matrix: the NY data sets of y values (N data points)
75
+ NX and NY need not be the same. In particular one may use a
76
+ single data set (without added noise) for one of them.
77
+ The number of fits equals NM = max(NX,NY) and if there are less data
78
+ sets for one of x or y, they are just cyclically reused.
79
+ plot : boolean, default False
80
+ an optional argument that specifies whether to plot the chi2 distribution
81
+ to visualize the "goodness-of-fit".
82
+
83
+ Returns
84
+ -------
85
+ pp : array (2 elements)
86
+ single-fit value of each parameter (can be used as the result)
87
+ psig : array (2 elements)
88
+ standard deviation of each parameter
89
+ pchi : float
90
+ goodness-of-fit, i.e. probability of chi>chi0
91
+ pmc : array
92
+ a (NM x 2 matrix, the fitted parameters for all data sets
93
+
94
+ Examples
95
+ --------
96
+ >>> x = np.array([0.1, 0.2, 0.3])
97
+ >>> y = np.array([1.2, 2.3, 3.7])
98
+ >>> y_mc=addnoise(y, 0.1)
99
+ >>> pp,psig,pchi2,pmc = linreg(x, y_mc)
100
+ >>> print(pp) #[12.517 -0.102]
101
+ >>> pmc.shape # (10000, 2)
102
+ """
103
+
104
+ if np.ndim(xinp) == 1:
105
+ x=xinp.reshape((1,np.size(xinp)))
106
+ else:
107
+ x= xinp
108
+ if np.ndim(yinp) == 1:
109
+ y=yinp.reshape((1,np.size(yinp)))
110
+ else:
111
+ y=yinp
112
+ if np.size(x,1) != np.size(y,1):
113
+ raise Exception('Number of columns in x and y must be equal')
114
+ N=np.size(x,1)
115
+ n=1 #always linear fit
116
+
117
+ #Perform single fit to get the base chi2 value
118
+ xs=np.median(x, axis=0)
119
+ ys=np.median(y, axis=0) #Reproduces original data points independent of distribution
120
+ sig=np.std(x, axis=0)+np.std(y, axis=0) #This only makes sense if either x or y is a single set
121
+
122
+ Xt=np.stack((xs, np.ones(N)), axis=1)
123
+ X=np.stack((xs/sig, np.ones(N)/sig), axis=1)
124
+ Y=ys/sig
125
+ pp=np.linalg.lstsq(X,Y, rcond=None)[0]
126
+ chi2 = sum((Y - np.matmul(X,pp))**2)
127
+ subtract=ys - np.matmul(Xt,pp)
128
+
129
+
130
+ xn=np.size(x,0)
131
+ yn=np.size(y,0)
132
+ nmc = max(xn,yn)
133
+ pmc = np.zeros((nmc,n+1))
134
+ chi2mc = np.zeros(nmc)
135
+ for i in range(nmc):
136
+ X=np.stack((x[i%xn,:]/sig,np.ones(N)/sig),axis=1)
137
+ Y=(y[i%yn,:]-subtract)/sig
138
+ p=np.linalg.lstsq(X,Y, rcond=None)[0]
139
+ pmc[i,:]=p
140
+ chi2mc[i] = sum((Y - np.matmul(X,p))**2)
141
+
142
+ pmean = np.mean(pmc,0) #This is not used, as the single fit (pp) is returned for compatibility with the MATLAB script
143
+ psig = np.std(pmc,0)
144
+
145
+ #Compute pchi2
146
+ pchi2=sum(chi2mc>chi2)/nmc
147
+
148
+ if plot:
149
+ import matplotlib.pyplot as plt
150
+ fig, ax = plt.subplots(1, 1, figsize=(4, 2))
151
+ counts,*_=ax.hist(chi2mc,bins=50)
152
+ ycent=0.5*max(counts)
153
+ ax.plot([chi2,chi2],[0,ycent],'r-')
154
+ ax.set_yticks([])
155
+ ax.set_xlabel(r"$\chi^2$")
156
+ plt.show()
157
+
158
+ return (pp,psig,pchi2,pmc)
159
+
160
+
161
+ #
162
+
163
+
164
+ def confidence(X, level=0.683, plot=False):
165
+ """Statistical analysis of the data in matrix X.
166
+
167
+ It is assumed that the number of data points are large; all properties
168
+ are calculated from the data itself.
169
+
170
+ Parameters
171
+ ----------
172
+ X : array
173
+ data matrix. Data in columns. For example, if X contains data
174
+ from two measurements, data for measurement 1 is in column 1
175
+ and measurement 2 in columns 2.
176
+ If only one column, a 1d-array is also acceptable
177
+ level : float, default 0.683
178
+ desired confidence level
179
+ plot : boolean, default False
180
+ an optional boolean specifying whether to plot histograms for each column
181
+ where a general statistic is shown as a red errorbar (median +/- stdev)
182
+ and the confidence intervals are shown with black lines.
183
+ The red markers at the bottom show the simpler (median +/- err)
184
+ interval which should normally coincide with the confidence interval
185
+ unless the distribution is skew (in which case the confidence interval is more reliable).
186
+ If X has exactly two columns, a scatter plot showing possible correlation between
187
+ the two columns is also produced.
188
+
189
+ Returns
190
+ -------
191
+ err : float
192
+ estimated error in the columns based on selected confidence level.
193
+ confint : list
194
+ a list of tuples (low, high), the confidence interval for each input column
195
+ (pconf*100% of values are found within this interval around median)
196
+ If the input x was a 1d-array, a single tuple is returned instead of a list
197
+
198
+ Examples
199
+ --------
200
+ >>> x = np.array([0.1, 0.2, 0.3])
201
+ >>> y = np.array([1.2, 2.3, 3.7])
202
+ >>> y_mc=addnoise(y, 0.1)
203
+ >>> pp,psig,pchi2,pmc = linreg(x, y_mc)
204
+ >>> err,confint = confidence(pmc, 0.95)
205
+ >>> print(err) # gives [1.364 0.296]
206
+ >>> print(confint) # gives [(11.16, 13.88), (-0.401, 0.190)]
207
+
208
+ """
209
+
210
+ onedim = (np.ndim(X) == 1)
211
+
212
+ if onedim: #convert to matrix, then convert back to onedim at the end
213
+ X=X.reshape((np.size(X),1))
214
+
215
+ if level <= 0 or level >= 1:
216
+ raise Exception("levvel must be 0 < level < 1.")
217
+
218
+ if np.size(X,1) > np.size(X,0):
219
+ print("Warning. It appears that your data is not placed column-wise.")
220
+
221
+ N = np.size(X,0) #number of data points
222
+ n = np.size(X,1) #number of dimensions (columns)
223
+
224
+ # GUM recommendation. ncut is the complement to pconf, ie the 1-pconf
225
+ # fraction of points.
226
+ #ncut = floor((N - floor(level*N+0.5) + 1)/2);
227
+
228
+ median = np.median(X,0)
229
+ sig = np.std(X,0)
230
+ absdiff = abs(X-np.mean(X,0)) #Absolute difference to mean value
231
+ plow = np.zeros(n)
232
+ phigh = np.zeros(n)
233
+ err = np.zeros(n)
234
+ for j in range(n):
235
+ tmp=np.sort(X[:,j])
236
+ plow[j]=tmp[round(max(1,0.5*(1-level)*N))-1]
237
+ phigh[j]=tmp[round(min(N,1-0.5*(1-level)*N))-1]
238
+ tmp=np.sort(absdiff[:,j])
239
+ err[j]=tmp[round(min(N,level*N))-1]
240
+
241
+ if plot:
242
+ import matplotlib.pyplot as plt
243
+ import matplotlib.gridspec as gridspec
244
+ nvar=np.size(X,1)
245
+ if nvar==2: #Exactly two parameters so produce a scatter plot and histograms
246
+ fig = plt.figure(figsize=(8, 4.8))
247
+ gs = gridspec.GridSpec(2, 2, width_ratios=[1.5, 1], height_ratios=[1, 1])
248
+ # Left square spans both rows
249
+ ax_left = fig.add_subplot(gs[:, 0])
250
+ axes = [fig.add_subplot(gs[0, 1]), fig.add_subplot(gs[1, 1])]
251
+ ax_left.set_aspect('equal')
252
+ ax_left.scatter(X[:,0],X[:,1],s=0.1)
253
+ ax_left.set_xlabel('a')
254
+ ax_left.set_ylabel('b')
255
+ ax_left.plot([plow[0],plow[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
256
+ ax_left.plot([phigh[0],phigh[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
257
+ ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[plow[1],plow[1]], 'k--')
258
+ ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[phigh[1],phigh[1]], 'k--')
259
+
260
+ ax_left.set_aspect(1.0/ax_left.get_data_ratio(), adjustable='box')
261
+ else: #only produce histograms
262
+ fig, axes = plt.subplots(nrows=nvar, ncols=1, figsize=(4, 2*nvar))
263
+ if nvar==1: axes=[axes] # fix stupid inconsistency in plt.subplots so that axes is always a list
264
+
265
+ for i,ax in enumerate(axes):
266
+ counts,*_=ax.hist(X[:,i], bins=50)
267
+ ycent=0.5*max(counts)
268
+ ax.errorbar(median[i],ycent,xerr=sig[i],fmt='ro',capsize=5)
269
+ ax.plot([plow[i],plow[i]] ,[0,0.8*ycent],'k--')
270
+ ax.plot([phigh[i],phigh[i]],[0,0.8*ycent],'k--')
271
+ ax.plot([median[i]-err[i], median[i]-err[i]], [0,0.1*ycent],'r-')
272
+ ax.plot([median[i]+err[i], median[i]+err[i]], [0,0.1*ycent],'r-')
273
+ ax.set_xlabel(chr(ord('a')+i)) #Name the variables a,b,c...
274
+ ax.set_yticks([])
275
+
276
+ plt.tight_layout()
277
+ plt.show()
278
+
279
+ if onedim:
280
+ return (err[0], (plow[0], phigh[0])) #simply return scalars
281
+ else:
282
+ return (err, list(zip(plow, phigh)))
283
+
284
+
285
+ def linconf(xinp, yinp, ysig, nmc=10000, distrib='normal', level=0.683, ytransform=None, restransform=None):
286
+ """Performs the full Monte Carlo linear regression with confidence calculation.
287
+
288
+ This is done by applying the following 5 steps in succession:
289
+ - addnoise to y values
290
+ - transform y values (skipped if ytransform==None)
291
+ - linreg (x,y)
292
+ - calculate a tuple of results from a,b (skipped if restransform==None)
293
+ - confidence for each result
294
+
295
+ For detailed description of parameters, see previous functions
296
+ Returns (reslist, pchi2) where reslist is a list of (result, error, confidenceinterval) for each calculated result
297
+ """
298
+
299
+ ymc=addnoise(yinp, ysig, nmc, distrib)
300
+ if ytransform!=None:
301
+ ymc = ytransform(ymc)
302
+ pp,psig,pchi2,pmc=linreg(xinp,ymc)
303
+ if restransform!=None:
304
+ results=restransform(pp[0],pp[1])
305
+ results_mc=restransform(pmc[:,0],pmc[:,1])
306
+ else:
307
+ results=(pp[0],pp[1])
308
+ results_mc=(pmc[:,0],pmc[:,1])
309
+ rlist=[]
310
+ for r,rmc in zip(results,results_mc):
311
+ perr,confint=confidence(rmc, level)
312
+ rlist.append((r,perr,confint))
313
+ return (rlist,pchi2)
linregmc-0.0.1/.bashlog DELETED
@@ -1,42 +0,0 @@
1
- 522 cd packaging_mclinreg/
2
- 523 ls
3
- 527 cd packaging_mclinreg/
4
- 528 vi README.MD
5
- 529 ls
6
- 530 vi pyproject.toml
7
- 531 rm -r dist/
8
- 549 cd ..
9
- 550 ls
10
- 551 vi pyproject.toml
11
- 552 vi README.MD
12
- 553 vi README.MD
13
- 554 ls
14
- 555 python -m pip install --upgrade build
15
- 556 python -m build
16
- 557 python -m pip install --upgrade twine
17
- 558 python -m twine upload --repository testpypi dist/*
18
- 568 cd ../packaging_linregmc/
19
- 569 ls
20
- 570 vi src
21
- 576 cd ..
22
- 577 ls
23
- 578 cat pyproject.toml
24
- 579 python -m build
25
- 580 ls
26
- 588 cd ..
27
- 589 python -m build
28
- 590 python -m twine upload --repository testpypi dist/*
29
- 591 vi pyproject.toml
30
- 592 python -m build
31
- 593 ls
32
- 594 ls dist/
33
- 595 python -m twine upload --repository testpypi dist/*0.0.2*
34
- 596 vi src/linregmc/__init__.py
35
- 597 vi pyproject.toml
36
- 598 python -m build
37
- 599 python -m twine upload --repository testpypi dist/*0.0.3*
38
- 603 cd packaging_linregmc/
39
- 604 rm -r dist
40
- 605 vi pyproject.toml
41
- 606 ls -lt ../linregmc.py
42
- 607 cp ../linregmc.py src/linregmc/
linregmc-0.0.1/PKG-INFO DELETED
@@ -1,21 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: linregmc
3
- Version: 0.0.1
4
- Summary: Linear regression with Monte Carlo error analysis
5
- Project-URL: Homepage, https://github.com/parsod/BFKdemos
6
- Author-email: Par Soderhjelm <par.soderhjelm@bpc.lu.se>
7
- License-Expression: MIT
8
- Classifier: Operating System :: OS Independent
9
- Classifier: Programming Language :: Python :: 3
10
- Requires-Python: >=3.7
11
- Description-Content-Type: text/markdown
12
-
13
- # linregmc package (Linear regression with Monte Carlo error analysis)
14
-
15
- This package is a single module which contains three functions
16
-
17
- createmcdata - introduce noise in the data based on known experimental precision
18
- linreg - perform multiple linear regressions and calculate goodness-of-fit
19
- errconf - get confidence intervals, plot distributions etc.
20
-
21
- TODO: Add detailed documentation here
linregmc-0.0.1/README.MD DELETED
@@ -1,9 +0,0 @@
1
- # linregmc package (Linear regression with Monte Carlo error analysis)
2
-
3
- This package is a single module which contains three functions
4
-
5
- createmcdata - introduce noise in the data based on known experimental precision
6
- linreg - perform multiple linear regressions and calculate goodness-of-fit
7
- errconf - get confidence intervals, plot distributions etc.
8
-
9
- TODO: Add detailed documentation here
linregmc-0.0.1/README.md DELETED
@@ -1,9 +0,0 @@
1
- # linregmc package (Linear regression with Monte Carlo error analysis)
2
-
3
- This package is a single module which contains three functions
4
-
5
- createmcdata - introduce noise in the data based on known experimental precision
6
- linreg - perform multiple linear regressions and calculate goodness-of-fit
7
- errconf - get confidence intervals, plot distributions etc.
8
-
9
- TODO: Add detailed documentation here
@@ -1,22 +0,0 @@
1
- [build-system]
2
- requires = ["hatchling >= 1.26"]
3
- build-backend = "hatchling.build"
4
-
5
- [project]
6
- name = "linregmc"
7
- version = "0.0.1"
8
- authors = [
9
- { name="Par Soderhjelm", email="par.soderhjelm@bpc.lu.se" },
10
- ]
11
- description = "Linear regression with Monte Carlo error analysis"
12
- readme = "README.md"
13
- requires-python = ">=3.7"
14
- classifiers = [
15
- "Programming Language :: Python :: 3",
16
- "Operating System :: OS Independent",
17
- ]
18
- license = "MIT"
19
- license-files = ["LICEN[CS]E*"]
20
-
21
- [project.urls]
22
- Homepage = "https://github.com/parsod/BFKdemos"
@@ -1,20 +0,0 @@
1
- 555 cd src
2
- 556 mkdir example_package_per
3
- 560 cd ..
4
- 532 cd src
5
- 533 ls
6
- 534 ls example_package_parsod/
7
- 535 ln -sf ../../linregmc.py .
8
- 536 ls
9
- 537 mv example_package_parsod/ linregmc
10
- 538 mv linregmc.py linregmc
11
- 548 cd ..
12
- 571 cd src/
13
- 572 ls
14
- 573 mv linregmc/linregmc.py .
15
- 574 ls
16
- 575 mv linregmc ../linregmc_package
17
- 581 cd src/
18
- 582 mv ../linregmc_package/ linregmc
19
- 583 mv linregmc.py linregmc
20
- 587 cd ..
@@ -1,15 +0,0 @@
1
- 557 cd example_package_per/
2
- 558 echo -n >__init__.py
3
- 559 vi example.py
4
- 539 cd linregmc/
5
- 540 ls
6
- 541 ls -lt
7
- 542 rm example.py
8
- 543 vi __init__.py
9
- 544 ls
10
- 545 rm linregmc.py
11
- 546 cp ../../../linregmc.py .
12
- 547 ls
13
- 584 cd linregmc/
14
- 585 ls
15
- 586 vi __init__.py
@@ -1,253 +0,0 @@
1
- import numpy as np
2
- from numpy.matlib import repmat,randn
3
-
4
- def addnoise(yinp,ysiginp,nmc=10000,distrib='normal'):
5
-
6
- # function ymc = createmcdata(y,ysig,nmc,distrib)
7
- #
8
- # Creates a matrix ymc of nmc vectors with the mean values of y but with
9
- # added random noise of standard deviation ysig.
10
- #
11
- # y data vector
12
- # ysig standard deviation vector (same length as y)
13
- # nmc number of Monte Carlo copies
14
- # distrib 'norm'/'normal' gives normal distribution
15
- # 'lognorm'/'lognormal' give lognormal distribution (useful for example
16
- # if negative results are unphysical)
17
- #
18
- #
19
- # You might want to initialize the random number generator in forehand.
20
- #
21
-
22
- yinp = np.asarray(yinp)
23
- ysiginp = np.asarray(ysiginp)
24
- if np.ndim(yinp)>1 or np.ndim(ysiginp)>1:
25
- raise Exception('y and ysig must not have higher dimension than 1.')
26
- if np.size(ysiginp) == 1:
27
- ysiginp = ysiginp*np.ones(np.size(yinp)) #If ysiginp is a scalar, turn it into a vector with identical elements
28
- if np.size(yinp) != np.size(ysiginp):
29
- raise Exception('y and ysig must have the same length.')
30
-
31
- n=np.size(yinp)
32
- y=yinp.reshape((1,n))
33
- ysig=ysiginp.reshape((1,n))
34
- if distrib.lower() in ('norm' ,'normal'):
35
-
36
- return np.array(repmat(y,nmc,1)) + np.array(repmat(ysig,nmc,1))*np.array(randn(nmc,n))
37
- elif distrib.lower() in ('lognorm','lognormal'):
38
- mu = np.log(y**2/np.sqrt(ysig**2+y**2)) # mu of lognormal dist
39
- sigma = np.sqrt(np.log(ysig**2/y**2+1)) # sigma of lognormal dist
40
- return np.exp(np.array(randn(nmc,n))*np.array(repmat(sigma,nmc,1)) + np.array(repmat(mu,nmc,1)))
41
- else:
42
- raise Exception('Distribution named "' + distrib + '" is not recognized.')
43
-
44
-
45
- def linreg(xinp, yinp, plot = False):
46
- #This is the new (2024) weighted-fit version (similar to MATLAB linregmc) that only handles linear fits
47
- #and does NOT do confidence intervals, as these can be done by mcerrconf
48
-
49
- #Performs linear fitting ax+b=y with error analysis
50
- #using a Monte Carlo approach.
51
-
52
- #Input arguments:
53
- # x : a NX x N matrix: the NX data sets of x values (N data points)
54
- # y : a NY x N matrix: the NY data sets of y values (N data points)
55
- # NX and NY need not be the same. In particular one may use a
56
- # single data set (without added noise) for one of them.
57
- # The number of fits equals max(NX,NY) and if there are less data
58
- # sets for one of x or y, they are just cyclically reused.
59
- #Return values:
60
- # pp (2 elements): single-fit value of each parameter (can be used as the result)
61
- # psig (2 elements): standard deviation of each parameter
62
- # pchi : probability of chi>chi0
63
- # pmc : a NX x (n+1) maxtrix, the fitted parameters for all data sets
64
-
65
-
66
- if np.ndim(xinp) == 1:
67
- x=xinp.reshape((1,np.size(xinp)))
68
- else:
69
- x= xinp
70
- if np.ndim(yinp) == 1:
71
- y=yinp.reshape((1,np.size(yinp)))
72
- else:
73
- y=yinp
74
- if np.size(x,1) != np.size(y,1):
75
- raise Exception('Number of columns in x and y must be equal')
76
- N=np.size(x,1)
77
- n=1 #always linear fit
78
-
79
- #Perform single fit to get the base chi2 value
80
- xs=np.median(x, axis=0)
81
- ys=np.median(y, axis=0) #Reproduces original data points independent of distribution
82
- sig=np.std(x, axis=0)+np.std(y, axis=0) #This only makes sense if either x or y is a single set
83
-
84
- Xt=np.stack((xs, np.ones(N)), axis=1)
85
- X=np.stack((xs/sig, np.ones(N)/sig), axis=1)
86
- Y=ys/sig
87
- pp=np.linalg.lstsq(X,Y, rcond=None)[0]
88
- chi2 = sum((Y - np.matmul(X,pp))**2)
89
- subtract=ys - np.matmul(Xt,pp)
90
-
91
-
92
- xn=np.size(x,0)
93
- yn=np.size(y,0)
94
- nmc = max(xn,yn)
95
- pmc = np.zeros((nmc,n+1))
96
- chi2mc = np.zeros(nmc)
97
- for i in range(nmc):
98
- X=np.stack((x[i%xn,:]/sig,np.ones(N)/sig),axis=1)
99
- Y=(y[i%yn,:]-subtract)/sig
100
- p=np.linalg.lstsq(X,Y, rcond=None)[0]
101
- pmc[i,:]=p
102
- chi2mc[i] = sum((Y - np.matmul(X,p))**2)
103
-
104
- pmean = np.mean(pmc,0) #This is not used, as the single fit (pp) is returned for compatibility with the MATLAB script
105
- psig = np.std(pmc,0)
106
-
107
- #Compute pchi2
108
- pchi2=sum(chi2mc>chi2)/nmc
109
-
110
- if plot:
111
- import matplotlib.pyplot as plt
112
- fig, ax = plt.subplots(1, 1, figsize=(4, 2))
113
- counts,*_=ax.hist(chi2mc,bins=50)
114
- ycent=0.5*max(counts)
115
- ax.plot([chi2,chi2],[0,ycent],'r-')
116
- ax.set_yticks([])
117
- ax.set_xlabel(r"$\chi^2$")
118
- plt.show()
119
-
120
- return (pp,psig,pchi2,pmc)
121
-
122
-
123
-
124
- def confidence(X, level=0.683, plot=False):
125
- #
126
- # Statistical analysis of the data in matrix X.
127
- # It is assumed that the number of data points are large; all properties
128
- # are calculated from the data itself.
129
- #
130
- # X data matrix. Data in columns. For example, if X contains data
131
- # from two measurements, data for measurement 1 is in column 1
132
- # and measurement 2 in columns 2.
133
- # If only one column, a 1d-array is also acceptable
134
- # level confidence limit of error in err. If not specified, level =
135
- # 0.683 is default.
136
- # plot an optional boolean specifying whether to plot histograms for each column
137
- # where a general statistic is shown as a red errorbar (median +/- stdev)
138
- # and the confidence intervals are shown with black lines.
139
- # The red markers at the bottom show the simpler (median +/- err)
140
- # interval which should normally coincide with the confidence interval
141
- # unless the distribution is skew (in which case the confidence interval is more reliable).
142
- # If X has exactly two columns, a scatter plot showing possible correlation between
143
- # the two columns is also produced.
144
- #
145
- # Returns a tuple (err, confint) where
146
- # err Error in the columns based on selected confidence limit.
147
- # confint A list of tuples (low, high), the confidence interval for each input column
148
- # (pconf*100% of values are found within this interval around median)
149
- # If the input X was a 1d-array, a single tuple is returned instead of a list
150
-
151
- onedim = (np.ndim(X) == 1)
152
-
153
- if onedim: #convert to matrix, then convert back to onedim at the end
154
- X=X.reshape((np.size(X),1))
155
-
156
- if level <= 0 or level >= 1:
157
- raise Exception("levvel must be 0 < level < 1.")
158
-
159
- if np.size(X,1) > np.size(X,0):
160
- print("Warning. It appears that your data is not placed column-wise.")
161
-
162
- N = np.size(X,0) #number of data points
163
- n = np.size(X,1) #number of dimensions (columns)
164
-
165
- # GUM recommendation. ncut is the complement to pconf, ie the 1-pconf
166
- # fraction of points.
167
- #ncut = floor((N - floor(level*N+0.5) + 1)/2);
168
-
169
- median = np.median(X,0)
170
- sig = np.std(X,0)
171
- absdiff = abs(X-np.mean(X,0)) #Absolute difference to mean value
172
- plow = np.zeros(n)
173
- phigh = np.zeros(n)
174
- err = np.zeros(n)
175
- for j in range(n):
176
- tmp=np.sort(X[:,j])
177
- plow[j]=tmp[round(max(1,0.5*(1-level)*N))-1]
178
- phigh[j]=tmp[round(min(N,1-0.5*(1-level)*N))-1]
179
- tmp=np.sort(absdiff[:,j])
180
- err[j]=tmp[round(min(N,level*N))-1]
181
-
182
- if plot:
183
- import matplotlib.pyplot as plt
184
- import matplotlib.gridspec as gridspec
185
- nvar=np.size(X,1)
186
- if nvar==2: #Exactly two parameters so produce a scatter plot and histograms
187
- fig = plt.figure(figsize=(8, 4.8))
188
- gs = gridspec.GridSpec(2, 2, width_ratios=[1.5, 1], height_ratios=[1, 1])
189
- # Left square spans both rows
190
- ax_left = fig.add_subplot(gs[:, 0])
191
- axes = [fig.add_subplot(gs[0, 1]), fig.add_subplot(gs[1, 1])]
192
- ax_left.set_aspect('equal')
193
- ax_left.scatter(X[:,0],X[:,1],s=0.1)
194
- ax_left.set_xlabel('a')
195
- ax_left.set_ylabel('b')
196
- ax_left.plot([plow[0],plow[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
197
- ax_left.plot([phigh[0],phigh[0]],[np.min(X[:,1]),np.max(X[:,1])],'k--')
198
- ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[plow[1],plow[1]], 'k--')
199
- ax_left.plot([np.min(X[:,0]),np.max(X[:,0])],[phigh[1],phigh[1]], 'k--')
200
-
201
- ax_left.set_aspect(1.0/ax_left.get_data_ratio(), adjustable='box')
202
- else: #only produce histograms
203
- fig, axes = plt.subplots(nrows=nvar, ncols=1, figsize=(4, 2*nvar))
204
- if nvar==1: axes=[axes] # fix stupid inconsistency in plt.subplots so that axes is always a list
205
-
206
- for i,ax in enumerate(axes):
207
- counts,*_=ax.hist(X[:,i], bins=50)
208
- ycent=0.5*max(counts)
209
- ax.errorbar(median[i],ycent,xerr=sig[i],fmt='ro',capsize=5)
210
- ax.plot([plow[i],plow[i]] ,[0,0.8*ycent],'k--')
211
- ax.plot([phigh[i],phigh[i]],[0,0.8*ycent],'k--')
212
- ax.plot([median[i]-err[i], median[i]-err[i]], [0,0.1*ycent],'r-')
213
- ax.plot([median[i]+err[i], median[i]+err[i]], [0,0.1*ycent],'r-')
214
- ax.set_xlabel(chr(ord('a')+i)) #Name the variables a,b,c...
215
- ax.set_yticks([])
216
-
217
- plt.tight_layout()
218
- plt.show()
219
-
220
- if onedim:
221
- return (err[0], (plow[0], phigh[0])) #simply return scalars
222
- else:
223
- return (err, list(zip(plow, phigh)))
224
-
225
-
226
- def linconf(xinp, yinp, ysig, nmc=10000, distrib='normal', level=0.683, ytransform=None, restransform=None):
227
- #
228
- #Performs the full Monte Carlo linear regression with confidence calculation.
229
- #by applying the following 5 steps in succession:
230
- # addnoise to y values
231
- # transform y values (skipped if ytransform==None)
232
- # linreg (x,y)
233
- # calculates a tuple of results from a,b (skipped if restransform==None)
234
- # confidence for each result
235
-
236
- # For detailed description of parameters, see previous functions
237
- # Returns (reslist, pchi2) where reslist is a list of (result, error, confidenceinterval) for each calculated result
238
-
239
- ymc=addnoise(yinp, ysig, nmc, distrib)
240
- if ytransform!=None:
241
- ymc = ytransform(ymc)
242
- pp,psig,pchi2,pmc=linreg(xinp,ymc)
243
- if restransform!=None:
244
- results=restransform(pp[0],pp[1])
245
- results_mc=restransform(pmc[:,0],pmc[:,1])
246
- else:
247
- results=(pp[0],pp[1])
248
- results_mc=(pmc[:,0],pmc[:,1])
249
- rlist=[]
250
- for r,rmc in zip(results,results_mc):
251
- perr,confint=confidence(rmc, level)
252
- rlist.append((r,perr,confint))
253
- return (rlist,pchi2)