jupyter-analysis-tools 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jupyter_analysis_tools/__init__.py +13 -0
- jupyter_analysis_tools/analysis.py +47 -0
- jupyter_analysis_tools/binning.py +443 -0
- jupyter_analysis_tools/datalocations.py +128 -0
- jupyter_analysis_tools/datastore.py +173 -0
- jupyter_analysis_tools/distrib.py +444 -0
- jupyter_analysis_tools/git.py +75 -0
- jupyter_analysis_tools/plotting.py +70 -0
- jupyter_analysis_tools/readdata.py +193 -0
- jupyter_analysis_tools/ssfz2json.py +57 -0
- jupyter_analysis_tools/ssfz_compare.py +54 -0
- jupyter_analysis_tools/utils.py +262 -0
- jupyter_analysis_tools/widgets.py +89 -0
- jupyter_analysis_tools-1.7.0.dist-info/METADATA +807 -0
- jupyter_analysis_tools-1.7.0.dist-info/RECORD +20 -0
- jupyter_analysis_tools-1.7.0.dist-info/WHEEL +5 -0
- jupyter_analysis_tools-1.7.0.dist-info/entry_points.txt +3 -0
- jupyter_analysis_tools-1.7.0.dist-info/licenses/AUTHORS.rst +6 -0
- jupyter_analysis_tools-1.7.0.dist-info/licenses/LICENSE +9 -0
- jupyter_analysis_tools-1.7.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# datastore.py
|
|
3
|
+
|
|
4
|
+
import filecmp
|
|
5
|
+
import getpass
|
|
6
|
+
import tempfile
|
|
7
|
+
import warnings
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from pybis import Openbis
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DataStore:
|
|
14
|
+
url = None
|
|
15
|
+
token = None
|
|
16
|
+
_availObj = None
|
|
17
|
+
_userspace = None
|
|
18
|
+
|
|
19
|
+
def __init__(self, url, username=None, tokenValidTo=None):
|
|
20
|
+
self.url = url
|
|
21
|
+
self.username = username
|
|
22
|
+
if self.username is None:
|
|
23
|
+
self.username = getpass.getuser()
|
|
24
|
+
print(f"Working as user '{self.username}'.")
|
|
25
|
+
# to generate PAT you need to login normally
|
|
26
|
+
self.ds = Openbis(url=self.url, verify_certificates=True)
|
|
27
|
+
# arg. *save_token* saves the openBIS token to ~/.pybis permanently
|
|
28
|
+
self.ds.login(
|
|
29
|
+
self.username,
|
|
30
|
+
getpass.getpass(prompt=f"Password for {self.username}: "),
|
|
31
|
+
save_token=False,
|
|
32
|
+
)
|
|
33
|
+
# create the PAT with the given name, don't store it
|
|
34
|
+
self.token = self.ds.get_or_create_personal_access_token(
|
|
35
|
+
"test-session", validTo=tokenValidTo
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def userspace(self):
|
|
40
|
+
uspace = self._userspace
|
|
41
|
+
if uspace is None:
|
|
42
|
+
allspaces = self.ds.get_spaces()
|
|
43
|
+
uspace = allspaces.df[
|
|
44
|
+
allspaces.df.code.str.endswith(self.username.upper())
|
|
45
|
+
].code.values[0]
|
|
46
|
+
self._userspace = uspace
|
|
47
|
+
return uspace
|
|
48
|
+
|
|
49
|
+
@userspace.setter
|
|
50
|
+
def userspace(self, name):
|
|
51
|
+
name = name.upper()
|
|
52
|
+
if name in self.ds.get_spaces().df.code.values:
|
|
53
|
+
self._userspace = name
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def identifier(objects, code):
|
|
57
|
+
return objects[objects.code == code].identifier.tolist()[0]
|
|
58
|
+
|
|
59
|
+
def createProject(self, projectName, space, spacePrefix=None):
|
|
60
|
+
"""Finds the requested project in the DataStore.
|
|
61
|
+
Matching project names can be limited to a given *spacePrefix*.
|
|
62
|
+
If the project is not found, a new project with the given code in the given space
|
|
63
|
+
is created."""
|
|
64
|
+
# get available projects, accessible by the current user
|
|
65
|
+
projectsAvail = self.ds.get_projects()
|
|
66
|
+
if spacePrefix:
|
|
67
|
+
projectsAvail = [prj for prj in projectsAvail if f"/{spacePrefix}_" in prj.identifier]
|
|
68
|
+
projects = [prj for prj in projectsAvail if prj.code == projectName]
|
|
69
|
+
assert len(projects) <= 1, f"Multiple projects found for '{projectName}'"
|
|
70
|
+
dsProject = None
|
|
71
|
+
if len(projects): # get the existing object
|
|
72
|
+
dsProject = projects[0]
|
|
73
|
+
else: # create it, if not found
|
|
74
|
+
print(f"Creating project '{projectName}'")
|
|
75
|
+
dsProject = self.ds.new_project(code=projectName, space=space)
|
|
76
|
+
dsProject.save()
|
|
77
|
+
assert dsProject
|
|
78
|
+
return dsProject
|
|
79
|
+
|
|
80
|
+
def createCollection(self, collName, projectObj, defaultObjType=None):
|
|
81
|
+
collections = self.ds.get_collections(project=projectObj)
|
|
82
|
+
dsColl = [coll for coll in collections if coll.code == collName.upper()]
|
|
83
|
+
if len(dsColl):
|
|
84
|
+
dsColl = dsColl[0]
|
|
85
|
+
else: # create it, if not found
|
|
86
|
+
print(f"Creating collection '{collName}'")
|
|
87
|
+
dsColl = self.ds.new_collection(
|
|
88
|
+
code=collName, type="COLLECTION", project=projectObj, props={"$name": collName}
|
|
89
|
+
)
|
|
90
|
+
dsColl.save()
|
|
91
|
+
assert dsColl
|
|
92
|
+
# update properties (name, default view and object type) if not set)
|
|
93
|
+
props = dsColl.props.all() # props as dict
|
|
94
|
+
propKey = "$name"
|
|
95
|
+
if propKey in props and props[propKey] is None:
|
|
96
|
+
props[propKey] = collName
|
|
97
|
+
propKey = "$default_collection_view"
|
|
98
|
+
if propKey in props.keys() and props[propKey] is None:
|
|
99
|
+
propVal = [
|
|
100
|
+
item
|
|
101
|
+
for item in self.ds.get_vocabulary(propKey + "s").get_terms().df.code
|
|
102
|
+
if "list" in item.lower()
|
|
103
|
+
]
|
|
104
|
+
assert len(propVal)
|
|
105
|
+
props[propKey] = propVal[0]
|
|
106
|
+
if defaultObjType:
|
|
107
|
+
propKey = "$default_object_type"
|
|
108
|
+
if propKey in props.keys() and props[propKey] is None:
|
|
109
|
+
props[propKey] = defaultObjType
|
|
110
|
+
# print(f"Setting '{collName}' properties:\n {props}")
|
|
111
|
+
dsColl.set_props(props)
|
|
112
|
+
dsColl.save()
|
|
113
|
+
return dsColl
|
|
114
|
+
|
|
115
|
+
def createObject(
|
|
116
|
+
self,
|
|
117
|
+
projectName: str,
|
|
118
|
+
collectionName: str = None,
|
|
119
|
+
space: str = None,
|
|
120
|
+
spacePrefix: str = None,
|
|
121
|
+
objType: str = None,
|
|
122
|
+
props: dict = None,
|
|
123
|
+
):
|
|
124
|
+
dsProject = self.createProject(projectName, space, spacePrefix=spacePrefix)
|
|
125
|
+
dsColl = None
|
|
126
|
+
if collectionName is None: # collectionName is required
|
|
127
|
+
return None
|
|
128
|
+
dsColl = self.createCollection(collectionName, dsProject, defaultObjType=objType)
|
|
129
|
+
obj = self.ds.get_objects(type=objType, where={"$name": props["$name"]}).objects
|
|
130
|
+
if len(obj):
|
|
131
|
+
obj = obj[0]
|
|
132
|
+
prefix = objType
|
|
133
|
+
msg = "'{}' exists already in {}! Updating ...".format(
|
|
134
|
+
obj.props["$name"], obj.project.identifier
|
|
135
|
+
)
|
|
136
|
+
warnings.warn_explicit(msg, UserWarning, prefix, 0)
|
|
137
|
+
else: # does not exist yet
|
|
138
|
+
objName = f" '{props['$name']}'" if len(props.get("$name", "")) else ""
|
|
139
|
+
print(f"Creating new {objType}{objName} in {dsColl.identifier}")
|
|
140
|
+
obj = self.ds.new_object(type=objType, props=props, collection=dsColl)
|
|
141
|
+
obj.set_props(props)
|
|
142
|
+
return obj
|
|
143
|
+
|
|
144
|
+
def findObjects(self, *args, **kwargs):
|
|
145
|
+
return self.ds.get_objects(**kwargs)
|
|
146
|
+
|
|
147
|
+
def uploadDataset(self, obj, datasetType, fpaths=[]):
|
|
148
|
+
def _checkFile(localPath, remoteFiles):
|
|
149
|
+
remoteFile = [f for f in remoteFiles if f.name == localPath.name]
|
|
150
|
+
if not len(remoteFile): # file exists in the dataset as well
|
|
151
|
+
return False
|
|
152
|
+
return filecmp.cmp(localPath, remoteFile[0], shallow=False)
|
|
153
|
+
|
|
154
|
+
if not len(fpaths):
|
|
155
|
+
return # nothing to do
|
|
156
|
+
for dataset in obj.get_datasets(type=datasetType):
|
|
157
|
+
with tempfile.TemporaryDirectory() as tempdir:
|
|
158
|
+
dataset.download(destination=tempdir)
|
|
159
|
+
dsFiles = [f for f in Path(tempdir).rglob("*") if f.is_file()]
|
|
160
|
+
if len(fpaths) == len(dsFiles):
|
|
161
|
+
if all([_checkFile(fpath, dsFiles) for fpath in fpaths]):
|
|
162
|
+
print(
|
|
163
|
+
f"All local files of {datasetType} match files in dataset, "
|
|
164
|
+
"not updating."
|
|
165
|
+
)
|
|
166
|
+
continue # skip deletion below
|
|
167
|
+
print(f"Dataset {datasetType} needs update, deleting existing dataset:")
|
|
168
|
+
dataset.delete("Needs update")
|
|
169
|
+
if not len(obj.get_datasets(type=datasetType)): # didn't exist yet or all deleted
|
|
170
|
+
dataset = self.ds.new_dataset(
|
|
171
|
+
type=datasetType, collection=obj.collection, object=obj, files=fpaths
|
|
172
|
+
)
|
|
173
|
+
dataset.save()
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# distrib.py
|
|
3
|
+
|
|
4
|
+
import itertools
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from operator import itemgetter
|
|
7
|
+
|
|
8
|
+
import matplotlib.font_manager as font_manager
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import scipy.integrate
|
|
13
|
+
import scipy.interpolate
|
|
14
|
+
|
|
15
|
+
from .plotting import plotVertBar
|
|
16
|
+
from .utils import grouper
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def integrate(xvec, yvec):
|
|
20
|
+
try:
|
|
21
|
+
return abs(scipy.integrate.simps(yvec, x=xvec))
|
|
22
|
+
except AttributeError:
|
|
23
|
+
return abs(scipy.integrate.simpson(yvec, x=xvec))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def normalizeDistrib(x, y, u=None):
|
|
27
|
+
x = x.values if isinstance(x, pd.Series) else x
|
|
28
|
+
y = y.values if isinstance(y, pd.Series) else y
|
|
29
|
+
# normalize the distribution to area of 1
|
|
30
|
+
norm = integrate(x, y)
|
|
31
|
+
# print("CONTINs norm", norm)
|
|
32
|
+
y /= norm
|
|
33
|
+
if u is not None:
|
|
34
|
+
u /= norm
|
|
35
|
+
return x, y, u
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def area(xvec, yvec, showArea=True):
|
|
39
|
+
"""Returns a string with the area value of the given discrete curve points."""
|
|
40
|
+
return r" $\int${:.3g}".format(integrate(xvec, yvec)) if showArea else ""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def findPeakRanges(x, y, tol=1e-16):
|
|
44
|
+
"""Returns the location of data/peak above a base line.
|
|
45
|
+
Assumes it touches the baseline before and after. For distributions.
|
|
46
|
+
*tol*: Multiplied by Y to produce a threshold to distinguish noise/artifacts from peaks."""
|
|
47
|
+
x = x.values if isinstance(x, pd.Series) else x
|
|
48
|
+
y = y.values if isinstance(y, pd.Series) else y
|
|
49
|
+
# look at all data above zero, get their array indices
|
|
50
|
+
indices = np.where(y > tol * y.max())[0]
|
|
51
|
+
# segmentation: look where continous groups of indices start and end
|
|
52
|
+
indexGroups = np.where(np.diff(indices) > 1)[0]
|
|
53
|
+
ranges = []
|
|
54
|
+
istart = indices[0]
|
|
55
|
+
|
|
56
|
+
def appendPeakRange(start, end):
|
|
57
|
+
# print("appending", start, end, end-start)
|
|
58
|
+
start, end = max(start - 1, 0), min(end + 1, len(x) - 1)
|
|
59
|
+
monotony = np.sign(np.diff(y[start : end + 1]))
|
|
60
|
+
if not all(monotony == monotony[0]):
|
|
61
|
+
# avoid monotonously increasing/decreasing peaks -> unwanted artefacts
|
|
62
|
+
ranges.append((start, end))
|
|
63
|
+
|
|
64
|
+
for idx in indexGroups:
|
|
65
|
+
appendPeakRange(istart, indices[idx]) # add the new range to the list
|
|
66
|
+
istart = indices[idx + 1] # start new range
|
|
67
|
+
appendPeakRange(istart, indices[-1])
|
|
68
|
+
# print("findPeakRanges", ranges)
|
|
69
|
+
return ranges
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def findLocalMinima(peakRanges, xarr, yarr, doPlot=False, verbose=False):
|
|
73
|
+
"""Identify local (non-zero) minima within given peak ranges and separate those
|
|
74
|
+
bimodal ranges into monomodal ranges, thus splitting up the peak range if it contains
|
|
75
|
+
maxima connected by non-zero minima. Returns a list of index tuples indicating the
|
|
76
|
+
start and end of each peak. Uses 4th order spline fitting and its derivative
|
|
77
|
+
for finding positions of local minima."""
|
|
78
|
+
# print("findLocalMinima", peakRanges)
|
|
79
|
+
newRanges = []
|
|
80
|
+
if doPlot:
|
|
81
|
+
plt.figure(figsize=(15, 5))
|
|
82
|
+
for ip, (istart, iend) in enumerate(peakRanges):
|
|
83
|
+
if verbose:
|
|
84
|
+
print((istart, iend), xarr[istart], xarr[iend])
|
|
85
|
+
if iend - istart < 5: # skip this, can't be fitted and no sub-peaks are likely
|
|
86
|
+
newRanges.append((istart, iend))
|
|
87
|
+
continue
|
|
88
|
+
while yarr[istart] <= 0.0 and istart < iend:
|
|
89
|
+
istart += 1 # exclude leading zero
|
|
90
|
+
while yarr[iend] <= 0.0 and istart < iend:
|
|
91
|
+
iend -= 1 # exclude trailing zero
|
|
92
|
+
if istart == iend:
|
|
93
|
+
continue
|
|
94
|
+
if verbose:
|
|
95
|
+
print((istart, iend))
|
|
96
|
+
x, y = xarr[istart : iend + 1], yarr[istart : iend + 1]
|
|
97
|
+
try:
|
|
98
|
+
spline = scipy.interpolate.InterpolatedUnivariateSpline(x, y, k=4)
|
|
99
|
+
except Exception:
|
|
100
|
+
print(f"Warning: Could not findLocalMinima() within {(istart, iend)}!")
|
|
101
|
+
newRanges.append((istart, iend))
|
|
102
|
+
continue
|
|
103
|
+
# if verbose: print(spline(x))
|
|
104
|
+
deriv = spline.derivative()
|
|
105
|
+
# if verbose: print(deriv(x))
|
|
106
|
+
roots = deriv.roots()
|
|
107
|
+
# get indices of roots and ignore any duplicate indices
|
|
108
|
+
rootIdx = set(np.argmin(np.abs(xarr[:, np.newaxis] - roots[np.newaxis, :]), axis=0))
|
|
109
|
+
rootIdx.add(istart)
|
|
110
|
+
rootIdx.add(iend)
|
|
111
|
+
rootIdx = sorted(rootIdx)
|
|
112
|
+
# if rootIdx[0] == istart: # omit the first root at the beginning
|
|
113
|
+
# rootIdx = rootIdx[1:]
|
|
114
|
+
if verbose:
|
|
115
|
+
print((istart, iend), len(roots), roots, rootIdx)
|
|
116
|
+
if doPlot:
|
|
117
|
+
plt.subplot(1, len(peakRanges), ip + 1)
|
|
118
|
+
radGrid = np.linspace(x[0], x[-1], 200)
|
|
119
|
+
plt.plot(x, y, label="data")
|
|
120
|
+
plt.plot(radGrid, spline(radGrid), label="spline"),
|
|
121
|
+
plt.ylabel("data & spline approx.")
|
|
122
|
+
handles1, labels1 = plt.gca().get_legend_handles_labels()
|
|
123
|
+
[
|
|
124
|
+
plotVertBar(plt, xarr[i], spline(radGrid).max(), color="blue", ls=":")
|
|
125
|
+
for i in rootIdx
|
|
126
|
+
]
|
|
127
|
+
plt.gca().twinx()
|
|
128
|
+
plt.plot(radGrid, deriv(radGrid), label="deriv. spline", color="green")
|
|
129
|
+
plt.ylabel("1st derivative")
|
|
130
|
+
handles2, labels2 = plt.gca().get_legend_handles_labels()
|
|
131
|
+
plt.grid()
|
|
132
|
+
plt.legend(handles1 + handles2, labels1 + labels2)
|
|
133
|
+
peakBoundaries = rootIdx[::2]
|
|
134
|
+
if verbose:
|
|
135
|
+
print(peakBoundaries)
|
|
136
|
+
newRanges += [tuple(peakBoundaries[i : i + 2]) for i in range(len(peakBoundaries) - 1)]
|
|
137
|
+
if verbose:
|
|
138
|
+
print(newRanges)
|
|
139
|
+
return newRanges
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def getLargestPeaks(peakRanges, xarr, yarr, count=1):
|
|
143
|
+
def peakRangeArea(peakRange):
|
|
144
|
+
return integrate(
|
|
145
|
+
xarr[peakRange[0] : peakRange[1] + 1], yarr[peakRange[0] : peakRange[1] + 1]
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return sorted(peakRanges, key=peakRangeArea, reverse=True)[:count]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class Moments(dict):
|
|
152
|
+
@staticmethod
|
|
153
|
+
def nthMoment(x, weights, n):
|
|
154
|
+
"""Calculates the nth moment of the given distribution weights."""
|
|
155
|
+
center = 0
|
|
156
|
+
if n > 0: # calculate the mean first
|
|
157
|
+
center = np.average(x, weights=weights) if sum(weights) else 0.0
|
|
158
|
+
# np.sqrt(u**2)/len(u) # center uncertainty
|
|
159
|
+
if n == 1:
|
|
160
|
+
return center # the mean
|
|
161
|
+
var = 1.0
|
|
162
|
+
if n > 1:
|
|
163
|
+
var = np.sum(weights * (x - center) ** 2) / np.sum(weights)
|
|
164
|
+
if n == 2:
|
|
165
|
+
return var # the variance
|
|
166
|
+
return np.sum(weights * (x - center) ** n) / np.sum(weights) / var**n
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def fromData(cls, x, y):
|
|
170
|
+
store = cls()
|
|
171
|
+
mean, var, skew, kurt = [cls.nthMoment(x, y, i) for i in range(1, 5)]
|
|
172
|
+
store["area"] = integrate(x, y)
|
|
173
|
+
store["mean"] = mean
|
|
174
|
+
store["var"] = var
|
|
175
|
+
store["skew"] = skew
|
|
176
|
+
store["kurt"] = kurt
|
|
177
|
+
return store
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def area(self):
|
|
181
|
+
return self["area"]
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def mean(self):
|
|
185
|
+
return self["mean"]
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def var(self):
|
|
189
|
+
return self["var"]
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def skew(self):
|
|
193
|
+
return self["skew"]
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def kurt(self):
|
|
197
|
+
return self["kurt"]
|
|
198
|
+
|
|
199
|
+
def __str__(self):
|
|
200
|
+
return "\n".join(
|
|
201
|
+
[
|
|
202
|
+
"{: <4s}: {: 9.2g}".format(k, self[k])
|
|
203
|
+
for k in ("area", "mean", "var", "skew", "kurt")
|
|
204
|
+
]
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
@staticmethod
|
|
208
|
+
def logNormParFromMoments(mean, var, N=1.0):
|
|
209
|
+
# SASfit manual, 6.4. Log-Normal distribution
|
|
210
|
+
median = mean**2 / np.sqrt(var + mean**2)
|
|
211
|
+
sigma = np.sqrt(np.log(mean**2 / median**2))
|
|
212
|
+
return {"N": N, "sigma": sigma, "median": median}
|
|
213
|
+
|
|
214
|
+
def logNormPar(self, N=1.0):
|
|
215
|
+
return self.logNormParFromMoments(self.mean, self.var, N=N)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class Distribution:
|
|
219
|
+
x, y, u = None, None, None
|
|
220
|
+
peaks = None # list of peak (start, end) indices pointing into x,y,u
|
|
221
|
+
color = None
|
|
222
|
+
xlabel = None
|
|
223
|
+
plotAxes, plotAxisIdx = None, 0
|
|
224
|
+
|
|
225
|
+
def __init__(self, xvec, yvec, uvec, xlabel=None, maxPeakCount=None):
|
|
226
|
+
self.xlabel = getattr(xvec, "name", None)
|
|
227
|
+
xvec = xvec.values if isinstance(xvec, pd.Series) else xvec
|
|
228
|
+
yvec = yvec.values if isinstance(yvec, pd.Series) else yvec
|
|
229
|
+
uvec = uvec.values if isinstance(uvec, pd.Series) else uvec
|
|
230
|
+
self.x, self.y, self.u = normalizeDistrib(xvec, yvec, uvec)
|
|
231
|
+
if xlabel is not None:
|
|
232
|
+
self.xlabel = xlabel
|
|
233
|
+
self.peaks = findPeakRanges(self.x, self.y, tol=1e-6)
|
|
234
|
+
# refine the peak ranges containing multiple maxima
|
|
235
|
+
self.peaks = findLocalMinima(self.peaks, self.x, self.y)
|
|
236
|
+
# For a given list of peaks (by start/end indices) return only those
|
|
237
|
+
# whose ratio of amount to uncertainty ratio is always below the given max. ratio
|
|
238
|
+
# maxRatio = 1.5
|
|
239
|
+
# self.peakRanges = [(istart, iend) for istart, iend in self.peakRanges
|
|
240
|
+
# if maxRatio > 1/np.median(self.y[istart:iend+1]/self.u[istart:iend+1])]
|
|
241
|
+
# Sort the peaks by area and use the largest (last) only, assuming monomodal distributions
|
|
242
|
+
if maxPeakCount:
|
|
243
|
+
self.peaks = getLargestPeaks(self.peaks, self.x, self.y, count=maxPeakCount)
|
|
244
|
+
|
|
245
|
+
def peakData(self, peakRange):
|
|
246
|
+
return (
|
|
247
|
+
self.x[peakRange[0] : peakRange[1] + 1],
|
|
248
|
+
self.y[peakRange[0] : peakRange[1] + 1],
|
|
249
|
+
self.u[peakRange[0] : peakRange[1] + 1],
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def uncertRatioMedian(self, peakRange):
|
|
253
|
+
_, y, u = self.peakData(peakRange)
|
|
254
|
+
return 1.0 / np.median(y / u)
|
|
255
|
+
|
|
256
|
+
@staticmethod
|
|
257
|
+
def getBarWidth(xvec):
|
|
258
|
+
return np.concatenate((np.diff(xvec)[:1], np.diff(xvec)))
|
|
259
|
+
|
|
260
|
+
def plotPeak(self, peakRange, mom, momLo, momHi, dp, dpLo, dpHi, showFullRange=False, ax=None):
|
|
261
|
+
"""
|
|
262
|
+
*showFullRange*: Set the x range to cover the whole distribution instead of the peak only.
|
|
263
|
+
"""
|
|
264
|
+
x, y, u = self.peakData(peakRange)
|
|
265
|
+
if not ax:
|
|
266
|
+
ax = plt.gca()
|
|
267
|
+
# ax.plot(x, y, 'o', color=cls.color)
|
|
268
|
+
lbl, fmt = [], "{: <7s} {: 9.2g} ±{: 9.2g}"
|
|
269
|
+
for k in "area", "median", "var", "skew", "kurt":
|
|
270
|
+
if k == "median":
|
|
271
|
+
lbl.append(
|
|
272
|
+
fmt.format(
|
|
273
|
+
"median:",
|
|
274
|
+
dp["median"],
|
|
275
|
+
max(abs(dp["median"] - dpLo["median"]), abs(dpHi["median"] - dp["median"])),
|
|
276
|
+
)
|
|
277
|
+
)
|
|
278
|
+
else:
|
|
279
|
+
lbl.append(
|
|
280
|
+
fmt.format(k + ":", mom[k], max(abs(mom[k] - momLo[k]), abs(momHi[k] - mom[k])))
|
|
281
|
+
)
|
|
282
|
+
lbl.append("LogNorm: " + distrParToText(dp)[0])
|
|
283
|
+
ax.bar(x, y, width=self.getBarWidth(x), color=self.color, alpha=0.5, label="\n".join(lbl))
|
|
284
|
+
ax.fill_between(
|
|
285
|
+
x,
|
|
286
|
+
np.maximum(0, y - u),
|
|
287
|
+
y + u,
|
|
288
|
+
color="red",
|
|
289
|
+
lw=0,
|
|
290
|
+
alpha=0.1,
|
|
291
|
+
label=f"uncertainties (lvl: {self.uncertRatioMedian(peakRange):.3g})",
|
|
292
|
+
)
|
|
293
|
+
if showFullRange:
|
|
294
|
+
ax.set_xlim((self.x.min(), self.x.max()))
|
|
295
|
+
ax.set_xlabel(self.xlabel)
|
|
296
|
+
ax.grid(True)
|
|
297
|
+
legend = ax.legend(prop=font_manager.FontProperties(family="monospace"))
|
|
298
|
+
# make the legend background more transparent
|
|
299
|
+
legend.get_frame().set_alpha(None)
|
|
300
|
+
legend.get_frame().set_facecolor((1, 1, 1, 0.2))
|
|
301
|
+
|
|
302
|
+
def plot(self, ax, distPar, name=""):
|
|
303
|
+
"""plot complete distribution as loaded from file"""
|
|
304
|
+
lbl = (
|
|
305
|
+
"from file, "
|
|
306
|
+
+ name
|
|
307
|
+
+ area(self.x, self.y, showArea=True)
|
|
308
|
+
+ "\n"
|
|
309
|
+
+ distrParLatex(distPar[0])
|
|
310
|
+
)
|
|
311
|
+
ax.fill_between(
|
|
312
|
+
self.x,
|
|
313
|
+
self.y,
|
|
314
|
+
# width=GenericResult.getBarWidth(self.x),
|
|
315
|
+
color=self.color,
|
|
316
|
+
alpha=0.5,
|
|
317
|
+
label=lbl,
|
|
318
|
+
)
|
|
319
|
+
# ax.errorbar(self.x, self.y, yerr=self.u, lw=lineWidth()*2, label=lbl)
|
|
320
|
+
ax.fill_between(
|
|
321
|
+
self.x,
|
|
322
|
+
np.maximum(0, self.y - self.u),
|
|
323
|
+
self.y + self.u,
|
|
324
|
+
color="red",
|
|
325
|
+
lw=0,
|
|
326
|
+
alpha=0.1,
|
|
327
|
+
label="uncertainties",
|
|
328
|
+
)
|
|
329
|
+
ax.set_xlabel(self.xlabel)
|
|
330
|
+
ax.legend()
|
|
331
|
+
ax.grid()
|
|
332
|
+
ax.set_xscale("log")
|
|
333
|
+
|
|
334
|
+
def moments(self):
|
|
335
|
+
def momentsByPeak():
|
|
336
|
+
for peakRange in self.peaks:
|
|
337
|
+
x, y, u = self.peakData(peakRange)
|
|
338
|
+
N = integrate(x, y)
|
|
339
|
+
mom = Moments.fromData(x, y)
|
|
340
|
+
momLo = Moments.fromData(x, np.maximum(0, y - u))
|
|
341
|
+
momHi = Moments.fromData(x, y + u)
|
|
342
|
+
lnp = mom.logNormPar(N=N)
|
|
343
|
+
lnpLo = momLo.logNormPar(N=N)
|
|
344
|
+
lnpHi = momHi.logNormPar(N=N)
|
|
345
|
+
yield (peakRange, mom, momLo, momHi, lnp, lnpLo, lnpHi)
|
|
346
|
+
|
|
347
|
+
# return a dict of lists, addressable by peak index
|
|
348
|
+
return dict(
|
|
349
|
+
zip(
|
|
350
|
+
["peakRange", "mom", "momLo", "momHi", "lnp", "lnpLo", "lnpHi"],
|
|
351
|
+
zip(*[m for m in momentsByPeak()]),
|
|
352
|
+
)
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
def peakDistrPar(self, plotAxes=None, plotAxisStart=0, **plotPeakKwargs):
|
|
356
|
+
momentsAndLogNormPar = self.moments()
|
|
357
|
+
if plotAxes is not None:
|
|
358
|
+
for i, peakRange in enumerate(momentsAndLogNormPar["peakRange"]):
|
|
359
|
+
plotPeakKwargs["ax"] = plotAxes[plotAxisStart + i]
|
|
360
|
+
self.plotPeak(*[v[i] for v in momentsAndLogNormPar.values()], **plotPeakKwargs)
|
|
361
|
+
return momentsAndLogNormPar["lnp"], momentsAndLogNormPar["mom"]
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def distrParToText(logNormPar):
|
|
365
|
+
"""
|
|
366
|
+
>>> distrParToText({'N':1.1, 'sigma':0.15, 'median':33.1234e-9})
|
|
367
|
+
['median=3.3e-08 sigma=0.15 N=1.1']
|
|
368
|
+
>>> distrParToText({'N':1.2e13, 'sigma':0.15, 'median':3.1234})
|
|
369
|
+
['median=3.1 sigma=0.15 N=1.2e+13']
|
|
370
|
+
>>> distrParToText({'N':(1.,2.), 'sigma':(.2,.4), 'median':(40e-9,7e-8)})
|
|
371
|
+
['median_0=4e-08 sigma_0=0.20 N_0=1', 'median_1=7e-08 sigma_1=0.40 N_1=2']
|
|
372
|
+
"""
|
|
373
|
+
fmt = {"median": "{:.2g}", "sigma": "{:.2f}", "N": "{:.2g}"}
|
|
374
|
+
order = {key: list(fmt.keys()).index(key) for key in fmt.keys()}
|
|
375
|
+
return [
|
|
376
|
+
" ".join(p)
|
|
377
|
+
for p in grouper(
|
|
378
|
+
list(
|
|
379
|
+
zip(
|
|
380
|
+
*sorted(
|
|
381
|
+
[
|
|
382
|
+
(
|
|
383
|
+
i * 10 + order[key],
|
|
384
|
+
f"{key}"
|
|
385
|
+
+ (f"_{i}" if isinstance(vals, Iterable) else "")
|
|
386
|
+
+ "="
|
|
387
|
+
+ fmt[key].format(v),
|
|
388
|
+
)
|
|
389
|
+
for key, vals in logNormPar.items()
|
|
390
|
+
for i, v in enumerate(vals if isinstance(vals, Iterable) else [vals])
|
|
391
|
+
],
|
|
392
|
+
key=itemgetter(0),
|
|
393
|
+
)
|
|
394
|
+
)
|
|
395
|
+
)[-1],
|
|
396
|
+
3,
|
|
397
|
+
)
|
|
398
|
+
]
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def distrParLatex(distrPar, *kwargs):
|
|
402
|
+
r"""
|
|
403
|
+
>>> distrParLatex({'N':1.1, 'sigma':0.15, 'median':33e-9})
|
|
404
|
+
'$median=3.3e-08\\;sigma=0.15\\;N=1.1$'
|
|
405
|
+
>>> distrParLatex({'N':(1.,2.), 'sigma':(.2,.4), 'median':(40e-9,7e-8)})
|
|
406
|
+
'$median_0=4e-08\\;sigma_0=0.20\\;N_0=1$\n$median_1=7e-08\\;sigma_1=0.40\\;N_1=2$'
|
|
407
|
+
"""
|
|
408
|
+
return "\n".join(["$" + txt.replace(" ", r"\;") + "$" for txt in distrParToText(distrPar)])
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def distrParToFilename(distrPar, prefix=""):
|
|
412
|
+
"""
|
|
413
|
+
>>> distrParToFilename({'N':1.1, 'sigma':0.15, 'median':33e-9})
|
|
414
|
+
'_median=3.3e-08_sigma=0.15_N=1.1'
|
|
415
|
+
>>> distrParToFilename({'N':(1.,2.), 'sigma':(.2,.4), 'median':(40e-9,7e-8)})
|
|
416
|
+
'_median_0=4e-08_sigma_0=0.20_N_0=1_median_1=7e-08_sigma_1=0.40_N_1=2'
|
|
417
|
+
"""
|
|
418
|
+
return "_".join([prefix] + distrParToText(distrPar)).replace(" ", "_")
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def distrParFromFilename(fn):
|
|
422
|
+
"""
|
|
423
|
+
>>> distrParFromFilename('_median=_33_sigma=0.15_N=1.1') == {'N':1.1, 'sigma':0.15, 'median':33}
|
|
424
|
+
True
|
|
425
|
+
>>> fn = '_median_0=4e-08_sigma_0=0.20_N_0=1_median_1=7e-08_sigma_1=0.40_N_1=2'
|
|
426
|
+
>>> distrParFromFilename(fn) == {'N':(1.,2.), 'sigma':(.2,.4), 'median':(40e-9,7e-8)}
|
|
427
|
+
True
|
|
428
|
+
"""
|
|
429
|
+
fn = fn.split("=")
|
|
430
|
+
fn = [elem.lstrip("_") for elem in fn]
|
|
431
|
+
fn = [(elem.split("_", maxsplit=1) if elem[0].isnumeric() else [elem]) for elem in fn]
|
|
432
|
+
fn = list(itertools.chain(*fn))
|
|
433
|
+
result = {}
|
|
434
|
+
for k, v in grouper(fn, 2):
|
|
435
|
+
key = k.split("_")[0]
|
|
436
|
+
value = float(v) if "median" == key else float(v)
|
|
437
|
+
result[key] = (
|
|
438
|
+
value
|
|
439
|
+
if key not in result
|
|
440
|
+
else (
|
|
441
|
+
result[key] + (value,) if isinstance(result[key], tuple) else (result[key], value)
|
|
442
|
+
)
|
|
443
|
+
)
|
|
444
|
+
return result
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# git.py
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def isRepo(path):
|
|
10
|
+
return os.path.exists(os.path.join(path, ".git"))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def isNBstripoutInstalled():
|
|
14
|
+
out = subprocess.run(
|
|
15
|
+
[sys.executable, "-m", "nbstripout", "--status"],
|
|
16
|
+
stdout=subprocess.PIPE,
|
|
17
|
+
stderr=subprocess.PIPE,
|
|
18
|
+
).stdout.decode("utf-8")
|
|
19
|
+
return len(out) and "not recognized" not in out
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def isNBstripoutActivated():
|
|
23
|
+
out = subprocess.run(
|
|
24
|
+
[sys.executable, "-m", "nbstripout", "--status"],
|
|
25
|
+
stdout=subprocess.PIPE,
|
|
26
|
+
stderr=subprocess.PIPE,
|
|
27
|
+
).stdout.decode("utf-8")
|
|
28
|
+
return len(out) and "is installed" in out
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def checkRepo():
|
|
32
|
+
if not isRepo("."):
|
|
33
|
+
print("Not a GIT repository.")
|
|
34
|
+
return
|
|
35
|
+
# is git installed?
|
|
36
|
+
try:
|
|
37
|
+
import git
|
|
38
|
+
except ImportError:
|
|
39
|
+
print("Could not load git module, is GIT installed and in PATH?")
|
|
40
|
+
return
|
|
41
|
+
# check the repository in detail
|
|
42
|
+
from IPython.display import HTML, display
|
|
43
|
+
|
|
44
|
+
repo = git.Repo(".")
|
|
45
|
+
# currentNB = os.path.basename(currentNBpath())
|
|
46
|
+
try:
|
|
47
|
+
editedOn = repo.git.show(no_patch=True, format="%cd, version %h by %cn", date="iso")
|
|
48
|
+
except git.GitCommandError:
|
|
49
|
+
print("Not a GIT repository.")
|
|
50
|
+
return
|
|
51
|
+
editedOn = editedOn.split(", ")
|
|
52
|
+
opacity = 0.3 # 1.0 if repo.is_dirty() else 0.5
|
|
53
|
+
display(
|
|
54
|
+
HTML(
|
|
55
|
+
'<div style="opacity: {opacity};">'
|
|
56
|
+
"<h3>Document updated on {}</h3>"
|
|
57
|
+
"<h4>({})</h4></div>".format(*editedOn, opacity=opacity)
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
if repo.is_dirty():
|
|
61
|
+
edits = repo.git.diff(stat=True)
|
|
62
|
+
import re
|
|
63
|
+
|
|
64
|
+
edits = re.sub(r" (\++)", r' <span style="color: green;">\1</span>', edits)
|
|
65
|
+
edits = re.sub(r"(\+)?(-+)(\s)", r'\1<span style="color: red;">\2</span>\3', edits)
|
|
66
|
+
display(
|
|
67
|
+
HTML(
|
|
68
|
+
'<div style="border-style: solid; border-color: darkred; border-width: 1px; '
|
|
69
|
+
'padding: 0em 1em 1em 1em; margin: 1em 0em;">'
|
|
70
|
+
'<h4 style="color: darkred;">There are changes in this repository:</h4>'
|
|
71
|
+
"<pre>"
|
|
72
|
+
+ edits
|
|
73
|
+
+ "</pre></div>"
|
|
74
|
+
)
|
|
75
|
+
)
|