PyPI - jupyter-analysis-tools - Versions diffs - 1.7.0__py3-none-any.whl - Mend

jupyter-analysis-tools 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

jupyter_analysis_tools/__init__.py +13 -0
jupyter_analysis_tools/analysis.py +47 -0
jupyter_analysis_tools/binning.py +443 -0
jupyter_analysis_tools/datalocations.py +128 -0
jupyter_analysis_tools/datastore.py +173 -0
jupyter_analysis_tools/distrib.py +444 -0
jupyter_analysis_tools/git.py +75 -0
jupyter_analysis_tools/plotting.py +70 -0
jupyter_analysis_tools/readdata.py +193 -0
jupyter_analysis_tools/ssfz2json.py +57 -0
jupyter_analysis_tools/ssfz_compare.py +54 -0
jupyter_analysis_tools/utils.py +262 -0
jupyter_analysis_tools/widgets.py +89 -0
jupyter_analysis_tools-1.7.0.dist-info/METADATA +807 -0
jupyter_analysis_tools-1.7.0.dist-info/RECORD +20 -0
jupyter_analysis_tools-1.7.0.dist-info/WHEEL +5 -0
jupyter_analysis_tools-1.7.0.dist-info/entry_points.txt +3 -0
jupyter_analysis_tools-1.7.0.dist-info/licenses/AUTHORS.rst +6 -0
jupyter_analysis_tools-1.7.0.dist-info/licenses/LICENSE +9 -0
jupyter_analysis_tools-1.7.0.dist-info/top_level.txt +1 -0

jupyter_analysis_tools/plotting.py ADDED Viewed

@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+# plotting.py
+import matplotlib
+import matplotlib.pyplot as plt
+from .readdata import readPDH
+try:
+    # increase the limit for the warning to pop up
+    matplotlib.rcParams["figure.max_open_warning"] = 50
+except TypeError:  # ignore the error with Sphinx
+    pass
+def initFigure(fig, width=80, aspectRatio=4.0 / 3.0, quiet=False):
+    mmInch = 25.4
+    fig.set_size_inches(width / mmInch, width / aspectRatio / mmInch)
+    w, h = fig.get_size_inches()
+    if not quiet:
+        print("initFigure() with ({w:.1f}x{h:.1f}) mm".format(w=w * mmInch, h=h * mmInch))
+    return fig
+def createFigure(width=80, aspectRatio=4.0 / 3.0, quiet=False, **kwargs):
+    """output figure width in mm"""
+    fig = plt.figure(
+        # tight_layout=dict(pad=0.05),
+        **kwargs
+    )
+    initFigure(fig, width, aspectRatio, quiet)
+    return fig
+def plotVertBar(ax, xpos, ymax, **kwargs):
+    ax.plot((xpos, xpos), (0, ymax), **kwargs)
+def plotColor(idx):
+    pltcol = plt.rcParams["axes.prop_cycle"].by_key()["color"]
+    # print(pltcol)
+    pltcol = ["gray", "lightskyblue", "steelblue", "red", "salmon"]
+    return pltcol[idx]
+def lineWidth():
+    return plt.rcParams["lines.linewidth"]
+def plotPDH(filename, label, **kwargs):
+    """Plot a given .PDH file with the given label (shown in legend) using pandas and readPDH()."""
+    q_range = kwargs.pop("q_range", None)
+    print_filename = kwargs.pop("print_filename", True)  # default value from readdata()
+    df, _ = readPDH(filename, q_range=q_range, print_filename=print_filename)
+    df["e"] = df["e"].clip(lower=0)
+    defaults = dict(
+        yerr="e",
+        logx=True,
+        logy=True,
+        label=label,
+        grid=True,
+        figsize=(10, 5),
+        xlabel=r"$q$ (nm$^{{-1}}$)",
+        ylabel="Intensity",
+        ecolor="lightgray",
+    )
+    for k, v in defaults.items():
+        if k not in kwargs:
+            kwargs[k] = v
+    df.plot("q", "I", **kwargs)

jupyter_analysis_tools/readdata.py ADDED Viewed

@@ -0,0 +1,193 @@
+# -*- coding: utf-8 -*-
+# readdata.py
+import tempfile
+import warnings
+import xml.etree.ElementTree as et
+import zipfile
+from pathlib import Path
+import pandas as pd
+def readdata(fpath, q_range=None, read_csv_args=None, print_filename=True):
+    """Read a datafile pandas Dataframe
+    extract a file_name
+    select q-range: q_min <= q <= q_max
+    """
+    fpath = Path(fpath)
+    if print_filename:
+        print(f"Reading file '{str(fpath)}'")
+    if read_csv_args is None:
+        read_csv_args = dict()
+    if "sep" not in read_csv_args:
+        read_csv_args.update(sep=r"\s+")
+    if "names" not in read_csv_args:
+        read_csv_args.update(names=("q", "I", "e"))
+    if "index_col" not in read_csv_args:
+        read_csv_args.update(index_col=False)
+    # print("f_read_data, read_csv_args:", read_csv_args) # for debugging
+    file_ext = fpath.suffix
+    if file_ext.lower() == ".pdh":  # for PDH files
+        nrows = pd.read_csv(
+            fpath,
+            skiprows=2,
+            nrows=1,
+            usecols=[
+                0,
+            ],
+            sep=r"\s+",
+            header=None,
+        ).values[0, 0]
+        read_csv_args.update(skiprows=5, nrows=nrows)
+    df = pd.read_csv(fpath, **read_csv_args)
+    # select q-range
+    if q_range is not None:
+        q_min, q_max = q_range
+        df = df[(df.q > q_min) & (df.q < q_max)]
+    filename = fpath.stem.split("[")[0]
+    return df, filename
+readPDH = readdata
+def convertValue(val):
+    val = val.strip()
+    try:
+        return int(val)
+    except ValueError:
+        try:
+            return float(val)
+        except ValueError:
+            pass
+    return val
+def xmlPDHToDict(root):
+    result = {}
+    stack = [(root, result)]
+    while stack:
+        elem, parentCont = stack.pop()
+        elemCont = {}
+        idx = -1
+        key = elem.attrib.pop("key", None)
+        if (  # get a unique key, the key can occur in multiple groups in PDH
+            key is not None and elem.tag == "group" and elem.attrib.get("id", None) is not None
+        ):
+            key = elem.attrib.pop("id")
+        if (  # skip empty elements with a key only early
+            not len(list(elem))
+            and not len(elem.attrib)
+            and not (elem.text and len(elem.text.strip()))
+        ):
+            continue
+        if elem.tag == "list":
+            elemCont = []
+        else:  # add attributes & values to dict
+            # Attach text, if any
+            if elem.text and len(elem.text.strip()):
+                if elem.tag in ("value", "reference"):
+                    elemCont["value"] = convertValue(elem.text)
+                else:
+                    elemCont["#text"] = convertValue(elem.text)
+            # Attach attributes, if any
+            if elem.attrib:
+                elemCont.update(
+                    {k: convertValue(v) for k, v in elem.attrib.items() if len(v.strip())}
+                )
+            if key == "unit" and "value" in elemCont:  # fix some units
+                elemCont["value"] = elemCont["value"].replace("_", "")
+            if "unit" in elemCont:
+                elemCont["unit"] = elemCont["unit"].replace("_", "")
+            # reduce the extracted dict&attributes
+            idx = elemCont.get("index", -1)  # insert last/append if no index given
+            value = elemCont.get("value", None)
+            if value is not None and (
+                len(elemCont) == 1 or (len(elemCont) == 2 and "index" in elemCont)
+            ):
+                elemCont = value  # contains value only
+        parentKey = elem.tag
+        if key is not None and parentKey in ("list", "value", "group"):
+            # skip one level in hierarchy for these generic containers
+            parentKey = key
+            key = None
+        try:
+            if isinstance(parentCont, list):
+                parentCont.insert(idx, elemCont)
+            elif parentKey not in parentCont:  # add as new list
+                if key is None:  # make a list
+                    parentCont[parentKey] = elemCont
+                else:  # have a key
+                    parentCont[parentKey] = {key: elemCont}
+            else:  # parentKey exists already
+                if not isinstance(parentCont[parentKey], list) and not isinstance(
+                    parentCont[parentKey], dict
+                ):
+                    # if its a plain value before, make a list out of it and append in next step
+                    parentCont[parentKey] = [parentCont[parentKey]]
+                if isinstance(parentCont[parentKey], list):
+                    parentCont[parentKey].append(elemCont)
+                elif key is not None:
+                    parentCont[parentKey].update({key: elemCont})
+                else:  # key is None
+                    parentCont[parentKey].update(elemCont)
+        except AttributeError:
+            raise
+        # reversed for correct order
+        stack += [(child, elemCont) for child in reversed(list(elem))]
+    # fix some entry values, weird Anton Paar PDH format
+    try:
+        oldts = result["fileinfo"]["parameter"]["DateTime"]["value"]
+        # timestamp seems to be based on around 2009-01-01 (a day give or take)
+        delta = (39 * 365 + 10) * 24 * 3600
+        # make it compatible to datetime.datetime routines
+        result["fileinfo"]["parameter"]["DateTime"]["value"] = oldts + delta
+    except KeyError:
+        pass
+    return result
+def readPDHmeta(pathPDH):
+    """Reads the XML metadata at the end of a .PDH file to a Python dict."""
+    pathPDH = Path(pathPDH)
+    if pathPDH.suffix.lower() != ".pdh":
+        warnings.warn("readPDHmeta() supports .pdh files only!")
+        return  # for PDH files
+    lines = ""
+    with open(pathPDH) as fd:
+        lines = fd.readlines()
+    nrows = int(lines[2].split()[0])
+    xml = "".join(lines[nrows + 5 :])
+    return xmlPDHToDict(et.fromstring(xml))
+def readSSF(pathSSF):
+    """Reads the SAXSquant session file *pathSSF* (.SSF) to a Python dict."""
+    pathSSF = Path(pathSSF)
+    if pathSSF.suffix.lower() != ".ssf":
+        warnings.warn("readSession() supports .ssf files only!")
+        return  # for PDH files
+    data = ""
+    with open(pathSSF, encoding="utf-8-sig") as fd:
+        data = fd.read()
+    return xmlPDHToDict(et.fromstring(data))
+def readSSFZ(pathSSFZ):
+    """Extracts and reads the SAXSquant session file (.SSF) to a Python dict.
+    The .SSF is embedded in the .SSFZ provided by *pathSSFZ*."""
+    assert pathSSFZ.is_file()
+    # unpack the SSFZ to a temporary dir
+    data = None
+    with tempfile.TemporaryDirectory() as tempdir:
+        with zipfile.ZipFile(pathSSFZ, "r") as zipfd:
+            zipfd.extractall(tempdir)
+        # read the session metadata from the extracted SSF file
+        pathSSF = next(Path(tempdir).glob("*.ssf"))
+        assert pathSSF.is_file()
+        data = readSSF(pathSSF)
+    return data

jupyter_analysis_tools/ssfz2json.py ADDED Viewed

@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+# ssfz2json.py
+import argparse
+import json
+import sys
+from pathlib import Path
+from jupyter_analysis_tools.readdata import readSSFZ
+def main():
+    parser = argparse.ArgumentParser(
+        description="""
+            Reads and parses the embedded metadata of a .SSFZ file created by Anton Paar SAXSquant
+            software, converts it to JSON format and outputs it to <stdout>.
+            An output file path for the JSON data can be provided by optional argument.
+            """
+    )
+    parser.add_argument(
+        "ssfzPath",
+        type=lambda p: Path(p).absolute(),
+        help="Path of the input .SSFZ file to read.",
+    )
+    parser.add_argument(
+        "-o",
+        "--out",
+        nargs="?",
+        default="stdout",
+        help=(
+            "Output file path to write the JSON data to. If the filename is omitted, "
+            "it is derived from the input file name by adding the .json suffix."
+        ),
+    )
+    args = parser.parse_args()
+    # print(args)
+    if not args.ssfzPath.is_file():
+        print(f"Provided file '{args.ssfzPath}' not found!")
+        return 1
+    data = readSSFZ(args.ssfzPath)
+    json_args = dict(sort_keys=True, indent=2)
+    if args.out == "stdout":
+        print(json.dumps(data, **json_args))
+    else:
+        if args.out is None:
+            args.out = args.ssfzPath.with_suffix(args.ssfzPath.suffix + ".json")
+        if not Path(args.out).parent.is_dir():
+            print(f"Directory of provided output file '{args.out}' does not exist!")
+            return 1
+        with open(args.out, "w") as fd:
+            json.dump(data, fd, **json_args)
+        print(f"Wrote '{args.out}'.")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

jupyter_analysis_tools/ssfz_compare.py ADDED Viewed

@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+# ssfz2json.py
+import argparse
+import difflib
+import json
+import sys
+from pathlib import Path
+from jupyter_analysis_tools.readdata import readSSFZ
+def main():
+    parser = argparse.ArgumentParser(
+        description="""
+            Reads and parses the embedded metadata of two .SSFZ files created by Anton Paar
+            SAXSquant software, converts them to JSON format and performs a diff-like comparison
+            which is output on <stdout>.
+            """
+    )
+    parser.add_argument(
+        "fromfile",
+        type=lambda p: Path(p).absolute(),
+        help="Path of the first .SSFZ file to compare.",
+    )
+    parser.add_argument(
+        "tofile",
+        type=lambda p: Path(p).absolute(),
+        help="Path of the second .SSFZ file to compare to.",
+    )
+    json_args = dict(sort_keys=True, indent=2)
+    args = parser.parse_args()
+    # print(args)
+    if not args.fromfile.is_file():
+        print(f"Provided file '{args.fromfile}' not found!")
+        return 1
+    if not args.tofile.is_file():
+        print(f"Provided file '{args.tofile}' not found!")
+        return 1
+    olddata = readSSFZ(args.fromfile)
+    newdata = readSSFZ(args.tofile)
+    diff = difflib.unified_diff(
+        json.dumps(olddata, **json_args).splitlines(keepends=True),
+        json.dumps(newdata, **json_args).splitlines(keepends=True),
+        fromfile=str(args.fromfile),
+        tofile=str(args.tofile),
+    )
+    for line in diff:
+        print(line, end="")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

jupyter_analysis_tools/utils.py ADDED Viewed

@@ -0,0 +1,262 @@
+# -*- coding: utf-8 -*-
+# utils.py
+import contextlib
+import copy
+import itertools
+import locale
+import os
+import platform
+import re
+import subprocess
+import sys
+from pathlib import Path
+import numpy as np
+indent = "    "
+def setLocaleUTF8():
+    """Fix the Jupyter locale which is not UTF-8 by default on Windows."""
+    locOld = locale.getpreferredencoding(False).lower()
+    def getpreferredencoding(do_setlocale=True):
+        return "utf-8"
+    locale.getpreferredencoding = getpreferredencoding
+    locNew = locale.getpreferredencoding(False)
+    if locOld != locNew:
+        print(f"Updated locale from {locOld} -> {locNew}.")
+def isLinux():
+    return platform.system().lower() in "linux"
+def isMac():
+    return platform.system().lower() in "darwin"
+def isWindows():
+    return platform.system().lower() in "windows"
+def isList(obj):
+    """Return true if the provided object is list-like including a numpy array but not a string.
+    >>> isList([1, 2, 'a'])
+    True
+    >>> isList(tuple((1, 2, 'a')))
+    True
+    >>> import numpy
+    >>> isList(numpy.arange(5))
+    True
+    >>> isList("dummy")
+    False
+    >>> isList(None)
+    False
+    """
+    return isinstance(obj, (list, tuple, np.ndarray))
+def shortenWinPath(path):
+    if not isWindows():
+        return path
+    import win32api
+    return win32api.GetShortPathName(path)
+def appendToPATH(parentPath, subdirs=None, verbose=False):
+    """Adds the given path with each subdirectory to the PATH environment variable."""
+    parentPath = Path(parentPath)
+    if not parentPath.is_dir():
+        return  # nothing to do
+    if subdirs is None:
+        subdirs = ["."]
+    sep = ";" if isWindows() else ":"
+    PATH = os.environ["PATH"].split(sep)
+    for path in subdirs:
+        path = parentPath / path
+        if verbose:
+            print(indent, path, "[exists: {}]".format(path.is_dir()))
+        if path not in PATH:
+            PATH.append(str(path))
+    os.environ["PATH"] = sep.join(PATH)
+def addEnvScriptsToPATH():
+    """Prepends the *Scripts* directory of the current Python environment base directory to systems
+    PATH variable.
+    It is intended for Conda (Miniforge) environments on Windows that do not have this in their PATH
+    environment variable, causing them to miss many commands provided from this location.
+    """
+    envPath = [p for p in sys.path if p.endswith("Lib")]
+    if not envPath:
+        return  # probably not a Miniforge environment
+    envPath = envPath[0]
+    envPath = Path(envPath).parent / "Scripts"
+    sep = ";" if isWindows() else ":"
+    environPATH = os.environ["PATH"].split(sep)
+    # print(environPATH)
+    if envPath.exists() and str(envPath) not in environPATH:
+        environPATH = [str(envPath)] + environPATH
+        os.environ["PATH"] = sep.join(environPATH)
+def networkdriveMapping(cmdOutput: str = None, resolveNames: bool = True):
+    """Returns a dict of mapping drive letters to network paths (on Windows)."""
+    if isWindows():
+        if cmdOutput is None:
+            proc = subprocess.run(["net", "use"], capture_output=True, text=True, encoding="cp850")
+            cmdOutput = proc.stdout
+        def resolveFQDN(uncPath):
+            if not resolveNames:
+                return uncPath
+            parts = uncPath.split("\\")
+            idx = [i for i, part in enumerate(parts) if len(part)][0]
+            proc = subprocess.run(
+                ["nslookup", parts[idx]], capture_output=True, text=True, encoding="cp850"
+            )
+            res = [line.split() for line in proc.stdout.splitlines() if line.startswith("Name:")]
+            if len(res) and len(res[0]) == 2:
+                parts[idx] = res[0][1]
+            return "\\".join(parts)
+        rows = [line.split() for line in cmdOutput.splitlines() if "Windows Network" in line]
+        rows = {
+            row[1]: resolveFQDN(row[2])
+            for row in rows
+            if row[1].endswith(":") and row[2].startswith(r"\\")
+        }
+        return rows
+    else:  # Linux (tested) or macOS (untested)
+        if cmdOutput is None:
+            proc = subprocess.run(["mount"], capture_output=True, text=True)
+            cmdOutput = proc.stdout
+        def parse(line):
+            # position of last opening parenthesis, start of options list
+            lastParen = list(i for i, c in enumerate(line) if "(" == c)[-1]
+            line = line[:lastParen].strip()
+            spaces = list(i for i, c in enumerate(line) if " " == c)
+            fstype = line[spaces[-1] :].strip()  # last remaining word is the filesystem type
+            line = line[: spaces[-2]].strip()  # strip the 'type' indicator as well
+            sepIdx = line.find(" on /")  # separates destination from mount point
+            dest = line[:sepIdx].strip()
+            mountpoint = line[sepIdx + 4 :].strip()
+            yield (mountpoint, dest, fstype)
+        return {
+            mp: dst
+            for line in cmdOutput.strip().splitlines()
+            for (mp, dst, fstype) in parse(line)
+            if fstype in ("nfs", "cifs", "sshfs", "afs", "ext4")
+        }
+    return {}
+def makeNetworkdriveAbsolute(filepath, cmdOutput: str = None, resolveNames: bool = True):
+    """Replaces the drive letter of the given path by the respective network path, if possible."""
+    if filepath.drive.startswith(r"\\"):
+        return filepath  # it's a UNC path already
+    if isWindows():
+        drivemap = networkdriveMapping(cmdOutput=cmdOutput, resolveNames=resolveNames)
+        prefix = drivemap.get(filepath.drive, None)
+        if prefix is not None:
+            filepath = Path(prefix).joinpath(*filepath.parts[1:])
+    else:  # Linux or macOS
+        drivemap = networkdriveMapping(cmdOutput=cmdOutput, resolveNames=resolveNames)
+        # search for the mountpoint, starting with the longest, most specific, first
+        for mp, target in sorted(drivemap.items(), key=lambda tup: len(tup[0]), reverse=True):
+            if filepath.is_relative_to(mp):
+                return Path(target).joinpath(filepath.relative_to(mp))
+    return filepath
+def checkWinFor7z():
+    """Extend the PATH environment variable for access to the 7-zip executable."""
+    if not isWindows():
+        return  # tests below are intended for Windows
+    sevenzippath = r"C:\Program Files\7-Zip"
+    if not os.path.isdir(sevenzippath):
+        print(
+            "7-Zip not found in '{}'.\n".format(sevenzippath)
+            + "7-Zip is required for managing data files and results!."
+        )
+        return
+    print("Adding the following directory to $PATH:")
+    appendToPATH(sevenzippath)
+    print("\nUpdated PATH:")
+    for path in os.environ["PATH"].split(";"):
+        print(indent, path)
+def extract7z(fn, workdir=None):
+    assert os.path.isfile(os.path.join(workdir, fn)), "Provided 7z archive '{}' not found!".format(
+        fn
+    )
+    print(f"Extracting '{fn}': ")
+    proc = subprocess.run(
+        ["7z", "x", fn],
+        cwd=workdir,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    print(proc.stdout.decode(errors="ignore"))
+    if len(proc.stderr):
+        print("## stderr:\n", proc.stderr.decode(errors="ignore"))
+# https://stackoverflow.com/a/13847807
+@contextlib.contextmanager
+def pushd(new_dir):
+    previous_dir = os.getcwd()
+    os.chdir(new_dir)
+    yield
+    os.chdir(previous_dir)
+def setPackage(globalsdict):
+    """Sets the current directory of the notebook as python package to make relative module imports
+    work.
+    Usage: `setPackage(globals())`
+    """
+    path = Path().resolve()
+    searchpath = str(path.parent)
+    if searchpath not in sys.path:
+        sys.path.insert(0, searchpath)
+    globalsdict["__package__"] = path.name
+    globalsdict["__name__"] = path.name
+    print(f"Setting the current directory as package '{path.name}': \n  {path}.")
+def grouper(iterable, n, fillvalue=None):
+    """Returns an iterator over a list of tuples (grouping) for a given flat iterable."""
+    args = [iter(iterable)] * n
+    return itertools.zip_longest(*args, fillvalue=fillvalue)
+def fmtErr(val, std, precision=2, width=None):
+    """Formats a given value and its stdandard deviation to physics notation, e.g. '1.23(4)'."""
+    if width is None:
+        width = ""
+    fmt = "{:" + str(width) + "." + str(precision) + "f}({:.0f})"
+    # print("fmtErr val:", val, "std:", std)
+    return fmt.format(val, std * 10 ** (precision))
+def updatedDict(d, key, value):
+    """Implements the \\|= operator for dict in Python version <3.9."""
+    dd = copy.copy(d)
+    dd[key] = value
+    return dd
+def naturalKey(name):
+    """Split string into list of strings and integers. Use as *key* function for sorting files."""
+    return [int(text) if text.isdigit() else text.lower() for text in re.split(r"(\d+)", name)]