jupyter-analysis-tools 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,173 @@
1
+ # -*- coding: utf-8 -*-
2
+ # datastore.py
3
+
4
+ import filecmp
5
+ import getpass
6
+ import tempfile
7
+ import warnings
8
+ from pathlib import Path
9
+
10
+ from pybis import Openbis
11
+
12
+
13
+ class DataStore:
14
+ url = None
15
+ token = None
16
+ _availObj = None
17
+ _userspace = None
18
+
19
+ def __init__(self, url, username=None, tokenValidTo=None):
20
+ self.url = url
21
+ self.username = username
22
+ if self.username is None:
23
+ self.username = getpass.getuser()
24
+ print(f"Working as user '{self.username}'.")
25
+ # to generate PAT you need to login normally
26
+ self.ds = Openbis(url=self.url, verify_certificates=True)
27
+ # arg. *save_token* saves the openBIS token to ~/.pybis permanently
28
+ self.ds.login(
29
+ self.username,
30
+ getpass.getpass(prompt=f"Password for {self.username}: "),
31
+ save_token=False,
32
+ )
33
+ # create the PAT with the given name, don't store it
34
+ self.token = self.ds.get_or_create_personal_access_token(
35
+ "test-session", validTo=tokenValidTo
36
+ )
37
+
38
+ @property
39
+ def userspace(self):
40
+ uspace = self._userspace
41
+ if uspace is None:
42
+ allspaces = self.ds.get_spaces()
43
+ uspace = allspaces.df[
44
+ allspaces.df.code.str.endswith(self.username.upper())
45
+ ].code.values[0]
46
+ self._userspace = uspace
47
+ return uspace
48
+
49
+ @userspace.setter
50
+ def userspace(self, name):
51
+ name = name.upper()
52
+ if name in self.ds.get_spaces().df.code.values:
53
+ self._userspace = name
54
+
55
+ @staticmethod
56
+ def identifier(objects, code):
57
+ return objects[objects.code == code].identifier.tolist()[0]
58
+
59
+ def createProject(self, projectName, space, spacePrefix=None):
60
+ """Finds the requested project in the DataStore.
61
+ Matching project names can be limited to a given *spacePrefix*.
62
+ If the project is not found, a new project with the given code in the given space
63
+ is created."""
64
+ # get available projects, accessible by the current user
65
+ projectsAvail = self.ds.get_projects()
66
+ if spacePrefix:
67
+ projectsAvail = [prj for prj in projectsAvail if f"/{spacePrefix}_" in prj.identifier]
68
+ projects = [prj for prj in projectsAvail if prj.code == projectName]
69
+ assert len(projects) <= 1, f"Multiple projects found for '{projectName}'"
70
+ dsProject = None
71
+ if len(projects): # get the existing object
72
+ dsProject = projects[0]
73
+ else: # create it, if not found
74
+ print(f"Creating project '{projectName}'")
75
+ dsProject = self.ds.new_project(code=projectName, space=space)
76
+ dsProject.save()
77
+ assert dsProject
78
+ return dsProject
79
+
80
+ def createCollection(self, collName, projectObj, defaultObjType=None):
81
+ collections = self.ds.get_collections(project=projectObj)
82
+ dsColl = [coll for coll in collections if coll.code == collName.upper()]
83
+ if len(dsColl):
84
+ dsColl = dsColl[0]
85
+ else: # create it, if not found
86
+ print(f"Creating collection '{collName}'")
87
+ dsColl = self.ds.new_collection(
88
+ code=collName, type="COLLECTION", project=projectObj, props={"$name": collName}
89
+ )
90
+ dsColl.save()
91
+ assert dsColl
92
+ # update properties (name, default view and object type) if not set)
93
+ props = dsColl.props.all() # props as dict
94
+ propKey = "$name"
95
+ if propKey in props and props[propKey] is None:
96
+ props[propKey] = collName
97
+ propKey = "$default_collection_view"
98
+ if propKey in props.keys() and props[propKey] is None:
99
+ propVal = [
100
+ item
101
+ for item in self.ds.get_vocabulary(propKey + "s").get_terms().df.code
102
+ if "list" in item.lower()
103
+ ]
104
+ assert len(propVal)
105
+ props[propKey] = propVal[0]
106
+ if defaultObjType:
107
+ propKey = "$default_object_type"
108
+ if propKey in props.keys() and props[propKey] is None:
109
+ props[propKey] = defaultObjType
110
+ # print(f"Setting '{collName}' properties:\n {props}")
111
+ dsColl.set_props(props)
112
+ dsColl.save()
113
+ return dsColl
114
+
115
+ def createObject(
116
+ self,
117
+ projectName: str,
118
+ collectionName: str = None,
119
+ space: str = None,
120
+ spacePrefix: str = None,
121
+ objType: str = None,
122
+ props: dict = None,
123
+ ):
124
+ dsProject = self.createProject(projectName, space, spacePrefix=spacePrefix)
125
+ dsColl = None
126
+ if collectionName is None: # collectionName is required
127
+ return None
128
+ dsColl = self.createCollection(collectionName, dsProject, defaultObjType=objType)
129
+ obj = self.ds.get_objects(type=objType, where={"$name": props["$name"]}).objects
130
+ if len(obj):
131
+ obj = obj[0]
132
+ prefix = objType
133
+ msg = "'{}' exists already in {}! Updating ...".format(
134
+ obj.props["$name"], obj.project.identifier
135
+ )
136
+ warnings.warn_explicit(msg, UserWarning, prefix, 0)
137
+ else: # does not exist yet
138
+ objName = f" '{props['$name']}'" if len(props.get("$name", "")) else ""
139
+ print(f"Creating new {objType}{objName} in {dsColl.identifier}")
140
+ obj = self.ds.new_object(type=objType, props=props, collection=dsColl)
141
+ obj.set_props(props)
142
+ return obj
143
+
144
+ def findObjects(self, *args, **kwargs):
145
+ return self.ds.get_objects(**kwargs)
146
+
147
+ def uploadDataset(self, obj, datasetType, fpaths=[]):
148
+ def _checkFile(localPath, remoteFiles):
149
+ remoteFile = [f for f in remoteFiles if f.name == localPath.name]
150
+ if not len(remoteFile): # file exists in the dataset as well
151
+ return False
152
+ return filecmp.cmp(localPath, remoteFile[0], shallow=False)
153
+
154
+ if not len(fpaths):
155
+ return # nothing to do
156
+ for dataset in obj.get_datasets(type=datasetType):
157
+ with tempfile.TemporaryDirectory() as tempdir:
158
+ dataset.download(destination=tempdir)
159
+ dsFiles = [f for f in Path(tempdir).rglob("*") if f.is_file()]
160
+ if len(fpaths) == len(dsFiles):
161
+ if all([_checkFile(fpath, dsFiles) for fpath in fpaths]):
162
+ print(
163
+ f"All local files of {datasetType} match files in dataset, "
164
+ "not updating."
165
+ )
166
+ continue # skip deletion below
167
+ print(f"Dataset {datasetType} needs update, deleting existing dataset:")
168
+ dataset.delete("Needs update")
169
+ if not len(obj.get_datasets(type=datasetType)): # didn't exist yet or all deleted
170
+ dataset = self.ds.new_dataset(
171
+ type=datasetType, collection=obj.collection, object=obj, files=fpaths
172
+ )
173
+ dataset.save()
@@ -0,0 +1,444 @@
1
+ # -*- coding: utf-8 -*-
2
+ # distrib.py
3
+
4
+ import itertools
5
+ from collections.abc import Iterable
6
+ from operator import itemgetter
7
+
8
+ import matplotlib.font_manager as font_manager
9
+ import matplotlib.pyplot as plt
10
+ import numpy as np
11
+ import pandas as pd
12
+ import scipy.integrate
13
+ import scipy.interpolate
14
+
15
+ from .plotting import plotVertBar
16
+ from .utils import grouper
17
+
18
+
19
+ def integrate(xvec, yvec):
20
+ try:
21
+ return abs(scipy.integrate.simps(yvec, x=xvec))
22
+ except AttributeError:
23
+ return abs(scipy.integrate.simpson(yvec, x=xvec))
24
+
25
+
26
+ def normalizeDistrib(x, y, u=None):
27
+ x = x.values if isinstance(x, pd.Series) else x
28
+ y = y.values if isinstance(y, pd.Series) else y
29
+ # normalize the distribution to area of 1
30
+ norm = integrate(x, y)
31
+ # print("CONTINs norm", norm)
32
+ y /= norm
33
+ if u is not None:
34
+ u /= norm
35
+ return x, y, u
36
+
37
+
38
+ def area(xvec, yvec, showArea=True):
39
+ """Returns a string with the area value of the given discrete curve points."""
40
+ return r" $\int${:.3g}".format(integrate(xvec, yvec)) if showArea else ""
41
+
42
+
43
+ def findPeakRanges(x, y, tol=1e-16):
44
+ """Returns the location of data/peak above a base line.
45
+ Assumes it touches the baseline before and after. For distributions.
46
+ *tol*: Multiplied by Y to produce a threshold to distinguish noise/artifacts from peaks."""
47
+ x = x.values if isinstance(x, pd.Series) else x
48
+ y = y.values if isinstance(y, pd.Series) else y
49
+ # look at all data above zero, get their array indices
50
+ indices = np.where(y > tol * y.max())[0]
51
+ # segmentation: look where continous groups of indices start and end
52
+ indexGroups = np.where(np.diff(indices) > 1)[0]
53
+ ranges = []
54
+ istart = indices[0]
55
+
56
+ def appendPeakRange(start, end):
57
+ # print("appending", start, end, end-start)
58
+ start, end = max(start - 1, 0), min(end + 1, len(x) - 1)
59
+ monotony = np.sign(np.diff(y[start : end + 1]))
60
+ if not all(monotony == monotony[0]):
61
+ # avoid monotonously increasing/decreasing peaks -> unwanted artefacts
62
+ ranges.append((start, end))
63
+
64
+ for idx in indexGroups:
65
+ appendPeakRange(istart, indices[idx]) # add the new range to the list
66
+ istart = indices[idx + 1] # start new range
67
+ appendPeakRange(istart, indices[-1])
68
+ # print("findPeakRanges", ranges)
69
+ return ranges
70
+
71
+
72
+ def findLocalMinima(peakRanges, xarr, yarr, doPlot=False, verbose=False):
73
+ """Identify local (non-zero) minima within given peak ranges and separate those
74
+ bimodal ranges into monomodal ranges, thus splitting up the peak range if it contains
75
+ maxima connected by non-zero minima. Returns a list of index tuples indicating the
76
+ start and end of each peak. Uses 4th order spline fitting and its derivative
77
+ for finding positions of local minima."""
78
+ # print("findLocalMinima", peakRanges)
79
+ newRanges = []
80
+ if doPlot:
81
+ plt.figure(figsize=(15, 5))
82
+ for ip, (istart, iend) in enumerate(peakRanges):
83
+ if verbose:
84
+ print((istart, iend), xarr[istart], xarr[iend])
85
+ if iend - istart < 5: # skip this, can't be fitted and no sub-peaks are likely
86
+ newRanges.append((istart, iend))
87
+ continue
88
+ while yarr[istart] <= 0.0 and istart < iend:
89
+ istart += 1 # exclude leading zero
90
+ while yarr[iend] <= 0.0 and istart < iend:
91
+ iend -= 1 # exclude trailing zero
92
+ if istart == iend:
93
+ continue
94
+ if verbose:
95
+ print((istart, iend))
96
+ x, y = xarr[istart : iend + 1], yarr[istart : iend + 1]
97
+ try:
98
+ spline = scipy.interpolate.InterpolatedUnivariateSpline(x, y, k=4)
99
+ except Exception:
100
+ print(f"Warning: Could not findLocalMinima() within {(istart, iend)}!")
101
+ newRanges.append((istart, iend))
102
+ continue
103
+ # if verbose: print(spline(x))
104
+ deriv = spline.derivative()
105
+ # if verbose: print(deriv(x))
106
+ roots = deriv.roots()
107
+ # get indices of roots and ignore any duplicate indices
108
+ rootIdx = set(np.argmin(np.abs(xarr[:, np.newaxis] - roots[np.newaxis, :]), axis=0))
109
+ rootIdx.add(istart)
110
+ rootIdx.add(iend)
111
+ rootIdx = sorted(rootIdx)
112
+ # if rootIdx[0] == istart: # omit the first root at the beginning
113
+ # rootIdx = rootIdx[1:]
114
+ if verbose:
115
+ print((istart, iend), len(roots), roots, rootIdx)
116
+ if doPlot:
117
+ plt.subplot(1, len(peakRanges), ip + 1)
118
+ radGrid = np.linspace(x[0], x[-1], 200)
119
+ plt.plot(x, y, label="data")
120
+ plt.plot(radGrid, spline(radGrid), label="spline"),
121
+ plt.ylabel("data & spline approx.")
122
+ handles1, labels1 = plt.gca().get_legend_handles_labels()
123
+ [
124
+ plotVertBar(plt, xarr[i], spline(radGrid).max(), color="blue", ls=":")
125
+ for i in rootIdx
126
+ ]
127
+ plt.gca().twinx()
128
+ plt.plot(radGrid, deriv(radGrid), label="deriv. spline", color="green")
129
+ plt.ylabel("1st derivative")
130
+ handles2, labels2 = plt.gca().get_legend_handles_labels()
131
+ plt.grid()
132
+ plt.legend(handles1 + handles2, labels1 + labels2)
133
+ peakBoundaries = rootIdx[::2]
134
+ if verbose:
135
+ print(peakBoundaries)
136
+ newRanges += [tuple(peakBoundaries[i : i + 2]) for i in range(len(peakBoundaries) - 1)]
137
+ if verbose:
138
+ print(newRanges)
139
+ return newRanges
140
+
141
+
142
+ def getLargestPeaks(peakRanges, xarr, yarr, count=1):
143
+ def peakRangeArea(peakRange):
144
+ return integrate(
145
+ xarr[peakRange[0] : peakRange[1] + 1], yarr[peakRange[0] : peakRange[1] + 1]
146
+ )
147
+
148
+ return sorted(peakRanges, key=peakRangeArea, reverse=True)[:count]
149
+
150
+
151
+ class Moments(dict):
152
+ @staticmethod
153
+ def nthMoment(x, weights, n):
154
+ """Calculates the nth moment of the given distribution weights."""
155
+ center = 0
156
+ if n > 0: # calculate the mean first
157
+ center = np.average(x, weights=weights) if sum(weights) else 0.0
158
+ # np.sqrt(u**2)/len(u) # center uncertainty
159
+ if n == 1:
160
+ return center # the mean
161
+ var = 1.0
162
+ if n > 1:
163
+ var = np.sum(weights * (x - center) ** 2) / np.sum(weights)
164
+ if n == 2:
165
+ return var # the variance
166
+ return np.sum(weights * (x - center) ** n) / np.sum(weights) / var**n
167
+
168
+ @classmethod
169
+ def fromData(cls, x, y):
170
+ store = cls()
171
+ mean, var, skew, kurt = [cls.nthMoment(x, y, i) for i in range(1, 5)]
172
+ store["area"] = integrate(x, y)
173
+ store["mean"] = mean
174
+ store["var"] = var
175
+ store["skew"] = skew
176
+ store["kurt"] = kurt
177
+ return store
178
+
179
+ @property
180
+ def area(self):
181
+ return self["area"]
182
+
183
+ @property
184
+ def mean(self):
185
+ return self["mean"]
186
+
187
+ @property
188
+ def var(self):
189
+ return self["var"]
190
+
191
+ @property
192
+ def skew(self):
193
+ return self["skew"]
194
+
195
+ @property
196
+ def kurt(self):
197
+ return self["kurt"]
198
+
199
+ def __str__(self):
200
+ return "\n".join(
201
+ [
202
+ "{: <4s}: {: 9.2g}".format(k, self[k])
203
+ for k in ("area", "mean", "var", "skew", "kurt")
204
+ ]
205
+ )
206
+
207
+ @staticmethod
208
+ def logNormParFromMoments(mean, var, N=1.0):
209
+ # SASfit manual, 6.4. Log-Normal distribution
210
+ median = mean**2 / np.sqrt(var + mean**2)
211
+ sigma = np.sqrt(np.log(mean**2 / median**2))
212
+ return {"N": N, "sigma": sigma, "median": median}
213
+
214
+ def logNormPar(self, N=1.0):
215
+ return self.logNormParFromMoments(self.mean, self.var, N=N)
216
+
217
+
218
+ class Distribution:
219
+ x, y, u = None, None, None
220
+ peaks = None # list of peak (start, end) indices pointing into x,y,u
221
+ color = None
222
+ xlabel = None
223
+ plotAxes, plotAxisIdx = None, 0
224
+
225
+ def __init__(self, xvec, yvec, uvec, xlabel=None, maxPeakCount=None):
226
+ self.xlabel = getattr(xvec, "name", None)
227
+ xvec = xvec.values if isinstance(xvec, pd.Series) else xvec
228
+ yvec = yvec.values if isinstance(yvec, pd.Series) else yvec
229
+ uvec = uvec.values if isinstance(uvec, pd.Series) else uvec
230
+ self.x, self.y, self.u = normalizeDistrib(xvec, yvec, uvec)
231
+ if xlabel is not None:
232
+ self.xlabel = xlabel
233
+ self.peaks = findPeakRanges(self.x, self.y, tol=1e-6)
234
+ # refine the peak ranges containing multiple maxima
235
+ self.peaks = findLocalMinima(self.peaks, self.x, self.y)
236
+ # For a given list of peaks (by start/end indices) return only those
237
+ # whose ratio of amount to uncertainty ratio is always below the given max. ratio
238
+ # maxRatio = 1.5
239
+ # self.peakRanges = [(istart, iend) for istart, iend in self.peakRanges
240
+ # if maxRatio > 1/np.median(self.y[istart:iend+1]/self.u[istart:iend+1])]
241
+ # Sort the peaks by area and use the largest (last) only, assuming monomodal distributions
242
+ if maxPeakCount:
243
+ self.peaks = getLargestPeaks(self.peaks, self.x, self.y, count=maxPeakCount)
244
+
245
+ def peakData(self, peakRange):
246
+ return (
247
+ self.x[peakRange[0] : peakRange[1] + 1],
248
+ self.y[peakRange[0] : peakRange[1] + 1],
249
+ self.u[peakRange[0] : peakRange[1] + 1],
250
+ )
251
+
252
+ def uncertRatioMedian(self, peakRange):
253
+ _, y, u = self.peakData(peakRange)
254
+ return 1.0 / np.median(y / u)
255
+
256
+ @staticmethod
257
+ def getBarWidth(xvec):
258
+ return np.concatenate((np.diff(xvec)[:1], np.diff(xvec)))
259
+
260
+ def plotPeak(self, peakRange, mom, momLo, momHi, dp, dpLo, dpHi, showFullRange=False, ax=None):
261
+ """
262
+ *showFullRange*: Set the x range to cover the whole distribution instead of the peak only.
263
+ """
264
+ x, y, u = self.peakData(peakRange)
265
+ if not ax:
266
+ ax = plt.gca()
267
+ # ax.plot(x, y, 'o', color=cls.color)
268
+ lbl, fmt = [], "{: <7s} {: 9.2g} ±{: 9.2g}"
269
+ for k in "area", "median", "var", "skew", "kurt":
270
+ if k == "median":
271
+ lbl.append(
272
+ fmt.format(
273
+ "median:",
274
+ dp["median"],
275
+ max(abs(dp["median"] - dpLo["median"]), abs(dpHi["median"] - dp["median"])),
276
+ )
277
+ )
278
+ else:
279
+ lbl.append(
280
+ fmt.format(k + ":", mom[k], max(abs(mom[k] - momLo[k]), abs(momHi[k] - mom[k])))
281
+ )
282
+ lbl.append("LogNorm: " + distrParToText(dp)[0])
283
+ ax.bar(x, y, width=self.getBarWidth(x), color=self.color, alpha=0.5, label="\n".join(lbl))
284
+ ax.fill_between(
285
+ x,
286
+ np.maximum(0, y - u),
287
+ y + u,
288
+ color="red",
289
+ lw=0,
290
+ alpha=0.1,
291
+ label=f"uncertainties (lvl: {self.uncertRatioMedian(peakRange):.3g})",
292
+ )
293
+ if showFullRange:
294
+ ax.set_xlim((self.x.min(), self.x.max()))
295
+ ax.set_xlabel(self.xlabel)
296
+ ax.grid(True)
297
+ legend = ax.legend(prop=font_manager.FontProperties(family="monospace"))
298
+ # make the legend background more transparent
299
+ legend.get_frame().set_alpha(None)
300
+ legend.get_frame().set_facecolor((1, 1, 1, 0.2))
301
+
302
+ def plot(self, ax, distPar, name=""):
303
+ """plot complete distribution as loaded from file"""
304
+ lbl = (
305
+ "from file, "
306
+ + name
307
+ + area(self.x, self.y, showArea=True)
308
+ + "\n"
309
+ + distrParLatex(distPar[0])
310
+ )
311
+ ax.fill_between(
312
+ self.x,
313
+ self.y,
314
+ # width=GenericResult.getBarWidth(self.x),
315
+ color=self.color,
316
+ alpha=0.5,
317
+ label=lbl,
318
+ )
319
+ # ax.errorbar(self.x, self.y, yerr=self.u, lw=lineWidth()*2, label=lbl)
320
+ ax.fill_between(
321
+ self.x,
322
+ np.maximum(0, self.y - self.u),
323
+ self.y + self.u,
324
+ color="red",
325
+ lw=0,
326
+ alpha=0.1,
327
+ label="uncertainties",
328
+ )
329
+ ax.set_xlabel(self.xlabel)
330
+ ax.legend()
331
+ ax.grid()
332
+ ax.set_xscale("log")
333
+
334
+ def moments(self):
335
+ def momentsByPeak():
336
+ for peakRange in self.peaks:
337
+ x, y, u = self.peakData(peakRange)
338
+ N = integrate(x, y)
339
+ mom = Moments.fromData(x, y)
340
+ momLo = Moments.fromData(x, np.maximum(0, y - u))
341
+ momHi = Moments.fromData(x, y + u)
342
+ lnp = mom.logNormPar(N=N)
343
+ lnpLo = momLo.logNormPar(N=N)
344
+ lnpHi = momHi.logNormPar(N=N)
345
+ yield (peakRange, mom, momLo, momHi, lnp, lnpLo, lnpHi)
346
+
347
+ # return a dict of lists, addressable by peak index
348
+ return dict(
349
+ zip(
350
+ ["peakRange", "mom", "momLo", "momHi", "lnp", "lnpLo", "lnpHi"],
351
+ zip(*[m for m in momentsByPeak()]),
352
+ )
353
+ )
354
+
355
+ def peakDistrPar(self, plotAxes=None, plotAxisStart=0, **plotPeakKwargs):
356
+ momentsAndLogNormPar = self.moments()
357
+ if plotAxes is not None:
358
+ for i, peakRange in enumerate(momentsAndLogNormPar["peakRange"]):
359
+ plotPeakKwargs["ax"] = plotAxes[plotAxisStart + i]
360
+ self.plotPeak(*[v[i] for v in momentsAndLogNormPar.values()], **plotPeakKwargs)
361
+ return momentsAndLogNormPar["lnp"], momentsAndLogNormPar["mom"]
362
+
363
+
364
+ def distrParToText(logNormPar):
365
+ """
366
+ >>> distrParToText({'N':1.1, 'sigma':0.15, 'median':33.1234e-9})
367
+ ['median=3.3e-08 sigma=0.15 N=1.1']
368
+ >>> distrParToText({'N':1.2e13, 'sigma':0.15, 'median':3.1234})
369
+ ['median=3.1 sigma=0.15 N=1.2e+13']
370
+ >>> distrParToText({'N':(1.,2.), 'sigma':(.2,.4), 'median':(40e-9,7e-8)})
371
+ ['median_0=4e-08 sigma_0=0.20 N_0=1', 'median_1=7e-08 sigma_1=0.40 N_1=2']
372
+ """
373
+ fmt = {"median": "{:.2g}", "sigma": "{:.2f}", "N": "{:.2g}"}
374
+ order = {key: list(fmt.keys()).index(key) for key in fmt.keys()}
375
+ return [
376
+ " ".join(p)
377
+ for p in grouper(
378
+ list(
379
+ zip(
380
+ *sorted(
381
+ [
382
+ (
383
+ i * 10 + order[key],
384
+ f"{key}"
385
+ + (f"_{i}" if isinstance(vals, Iterable) else "")
386
+ + "="
387
+ + fmt[key].format(v),
388
+ )
389
+ for key, vals in logNormPar.items()
390
+ for i, v in enumerate(vals if isinstance(vals, Iterable) else [vals])
391
+ ],
392
+ key=itemgetter(0),
393
+ )
394
+ )
395
+ )[-1],
396
+ 3,
397
+ )
398
+ ]
399
+
400
+
401
+ def distrParLatex(distrPar, *kwargs):
402
+ r"""
403
+ >>> distrParLatex({'N':1.1, 'sigma':0.15, 'median':33e-9})
404
+ '$median=3.3e-08\\;sigma=0.15\\;N=1.1$'
405
+ >>> distrParLatex({'N':(1.,2.), 'sigma':(.2,.4), 'median':(40e-9,7e-8)})
406
+ '$median_0=4e-08\\;sigma_0=0.20\\;N_0=1$\n$median_1=7e-08\\;sigma_1=0.40\\;N_1=2$'
407
+ """
408
+ return "\n".join(["$" + txt.replace(" ", r"\;") + "$" for txt in distrParToText(distrPar)])
409
+
410
+
411
+ def distrParToFilename(distrPar, prefix=""):
412
+ """
413
+ >>> distrParToFilename({'N':1.1, 'sigma':0.15, 'median':33e-9})
414
+ '_median=3.3e-08_sigma=0.15_N=1.1'
415
+ >>> distrParToFilename({'N':(1.,2.), 'sigma':(.2,.4), 'median':(40e-9,7e-8)})
416
+ '_median_0=4e-08_sigma_0=0.20_N_0=1_median_1=7e-08_sigma_1=0.40_N_1=2'
417
+ """
418
+ return "_".join([prefix] + distrParToText(distrPar)).replace(" ", "_")
419
+
420
+
421
+ def distrParFromFilename(fn):
422
+ """
423
+ >>> distrParFromFilename('_median=_33_sigma=0.15_N=1.1') == {'N':1.1, 'sigma':0.15, 'median':33}
424
+ True
425
+ >>> fn = '_median_0=4e-08_sigma_0=0.20_N_0=1_median_1=7e-08_sigma_1=0.40_N_1=2'
426
+ >>> distrParFromFilename(fn) == {'N':(1.,2.), 'sigma':(.2,.4), 'median':(40e-9,7e-8)}
427
+ True
428
+ """
429
+ fn = fn.split("=")
430
+ fn = [elem.lstrip("_") for elem in fn]
431
+ fn = [(elem.split("_", maxsplit=1) if elem[0].isnumeric() else [elem]) for elem in fn]
432
+ fn = list(itertools.chain(*fn))
433
+ result = {}
434
+ for k, v in grouper(fn, 2):
435
+ key = k.split("_")[0]
436
+ value = float(v) if "median" == key else float(v)
437
+ result[key] = (
438
+ value
439
+ if key not in result
440
+ else (
441
+ result[key] + (value,) if isinstance(result[key], tuple) else (result[key], value)
442
+ )
443
+ )
444
+ return result
@@ -0,0 +1,75 @@
1
+ # -*- coding: utf-8 -*-
2
+ # git.py
3
+
4
+ import os
5
+ import subprocess
6
+ import sys
7
+
8
+
9
+ def isRepo(path):
10
+ return os.path.exists(os.path.join(path, ".git"))
11
+
12
+
13
+ def isNBstripoutInstalled():
14
+ out = subprocess.run(
15
+ [sys.executable, "-m", "nbstripout", "--status"],
16
+ stdout=subprocess.PIPE,
17
+ stderr=subprocess.PIPE,
18
+ ).stdout.decode("utf-8")
19
+ return len(out) and "not recognized" not in out
20
+
21
+
22
+ def isNBstripoutActivated():
23
+ out = subprocess.run(
24
+ [sys.executable, "-m", "nbstripout", "--status"],
25
+ stdout=subprocess.PIPE,
26
+ stderr=subprocess.PIPE,
27
+ ).stdout.decode("utf-8")
28
+ return len(out) and "is installed" in out
29
+
30
+
31
+ def checkRepo():
32
+ if not isRepo("."):
33
+ print("Not a GIT repository.")
34
+ return
35
+ # is git installed?
36
+ try:
37
+ import git
38
+ except ImportError:
39
+ print("Could not load git module, is GIT installed and in PATH?")
40
+ return
41
+ # check the repository in detail
42
+ from IPython.display import HTML, display
43
+
44
+ repo = git.Repo(".")
45
+ # currentNB = os.path.basename(currentNBpath())
46
+ try:
47
+ editedOn = repo.git.show(no_patch=True, format="%cd, version %h by %cn", date="iso")
48
+ except git.GitCommandError:
49
+ print("Not a GIT repository.")
50
+ return
51
+ editedOn = editedOn.split(", ")
52
+ opacity = 0.3 # 1.0 if repo.is_dirty() else 0.5
53
+ display(
54
+ HTML(
55
+ '<div style="opacity: {opacity};">'
56
+ "<h3>Document updated on {}</h3>"
57
+ "<h4>({})</h4></div>".format(*editedOn, opacity=opacity)
58
+ )
59
+ )
60
+ if repo.is_dirty():
61
+ edits = repo.git.diff(stat=True)
62
+ import re
63
+
64
+ edits = re.sub(r" (\++)", r' <span style="color: green;">\1</span>', edits)
65
+ edits = re.sub(r"(\+)?(-+)(\s)", r'\1<span style="color: red;">\2</span>\3', edits)
66
+ display(
67
+ HTML(
68
+ '<div style="border-style: solid; border-color: darkred; border-width: 1px; '
69
+ 'padding: 0em 1em 1em 1em; margin: 1em 0em;">'
70
+ '<h4 style="color: darkred;">There are changes in this repository:</h4>'
71
+ "<pre>"
72
+ + edits
73
+ + "</pre></div>"
74
+ )
75
+ )