PyPI - anemoi-utils - Versions diffs - 0.1.6__py3-none-any.whl - Mend

anemoi-utils 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of anemoi-utils might be problematic. Click here for more details.

Files changed (14) hide show

anemoi/utils/__init__.py +9 -0
anemoi/utils/_version.py +16 -0
anemoi/utils/checkpoints.py +76 -0
anemoi/utils/config.py +94 -0
anemoi/utils/dates.py +248 -0
anemoi/utils/grib.py +73 -0
anemoi/utils/humanize.py +474 -0
anemoi/utils/provenance.py +353 -0
anemoi/utils/text.py +345 -0
anemoi_utils-0.1.6.dist-info/LICENSE +201 -0
anemoi_utils-0.1.6.dist-info/METADATA +253 -0
anemoi_utils-0.1.6.dist-info/RECORD +14 -0
anemoi_utils-0.1.6.dist-info/WHEEL +5 -0
anemoi_utils-0.1.6.dist-info/top_level.txt +1 -0

anemoi/utils/provenance.py ADDED Viewed

@@ -0,0 +1,353 @@
+# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+"""
+Collect information about the current environment, like:
+ - The Python version
+ - The versions of the modules which are currently loaded
+ - The git information for the modules which are currently loaded from a git repository
+ - ...
+"""
+import datetime
+import json
+import logging
+import os
+import subprocess
+import sys
+import sysconfig
+LOG = logging.getLogger(__name__)
+def lookup_git_repo(path):
+    from git import InvalidGitRepositoryError
+    from git import Repo
+    while path != "/":
+        try:
+            return Repo(path)
+        except InvalidGitRepositoryError:
+            path = os.path.dirname(path)
+    return None
+def _check_for_git(paths, full):
+    versions = {}
+    for name, path in paths:
+        repo = lookup_git_repo(path)
+        if repo is None:
+            continue
+        try:
+            if not full:
+                versions[name] = dict(
+                    git=dict(
+                        sha1=repo.head.commit.hexsha,
+                        modified_files=len([item.a_path for item in repo.index.diff(None)]),
+                        untracked_files=len(repo.untracked_files),
+                    ),
+                )
+                continue
+            versions[name] = dict(
+                path=path,
+                git=dict(
+                    sha1=repo.head.commit.hexsha,
+                    remotes=[r.url for r in repo.remotes],
+                    modified_files=sorted([item.a_path for item in repo.index.diff(None)]),
+                    untracked_files=sorted(repo.untracked_files),
+                ),
+            )
+        except ValueError as e:
+            LOG.error(f"Error checking git repo {path}: {e}")
+    return versions
+def version(versions, name, module, roots, namespaces, paths, full):
+    path = None
+    if hasattr(module, "__file__"):
+        path = module.__file__
+        if path is not None:
+            for k, v in roots.items():
+                path = path.replace(k, f"<{v}>")
+            if path.startswith("/"):
+                paths.add((name, path))
+    try:
+        versions[name] = module.__version__
+        return
+    except AttributeError:
+        pass
+    try:
+        if path is None:
+            namespaces.add(name)
+            return
+        # For now, don't report on stdlib modules
+        if path.startswith("<stdlib>"):
+            return
+        if full:
+            versions[name] = path
+        else:
+            if not path.startswith("<"):
+                versions[name] = os.path.join("...", os.path.basename(path))
+        return
+    except AttributeError:
+        pass
+    if name in sys.builtin_module_names:
+        return
+    versions[name] = str(module)
+def _module_versions(full):
+    # https://docs.python.org/3/library/sysconfig.html
+    roots = {}
+    for name, path in sysconfig.get_paths().items():
+        if path not in roots:
+            roots[path] = name
+    # Sort by length of path, so that we get the most specific first
+    roots = {path: name for path, name in sorted(roots.items(), key=lambda x: len(x[0]), reverse=True)}
+    paths = set()
+    versions = {}
+    namespaces = set()
+    for k, v in sorted(sys.modules.items()):
+        if "." not in k:
+            version(versions, k, v, roots, namespaces, paths, full)
+    # Catter for modules like "earthkit.meteo"
+    for k, v in sorted(sys.modules.items()):
+        bits = k.split(".")
+        if len(bits) == 2 and bits[0] in namespaces:
+            version(versions, k, v, roots, namespaces, paths, full)
+    return versions, paths
+def module_versions(full):
+    versions, paths = _module_versions(full)
+    git_versions = _check_for_git(paths, full)
+    return versions, git_versions
+def _name(obj):
+    if hasattr(obj, "__name__"):
+        if hasattr(obj, "__module__"):
+            return f"{obj.__module__}.{obj.__name__}"
+        return obj.__name__
+    if hasattr(obj, "__class__"):
+        return _name(obj.__class__)
+    return str(obj)
+def _paths(path_or_object):
+    if path_or_object is None:
+        _, paths = _module_versions(full=False)
+        return paths
+    if isinstance(path_or_object, (list, tuple, set)):
+        paths = []
+        for p in path_or_object:
+            paths.extend(_paths(p))
+        return paths
+    if isinstance(path_or_object, str):
+        module = sys.modules.get(path_or_object)
+        if module is not None:
+            return _paths(module)
+        return [(path_or_object, path_or_object)]
+    if hasattr(path_or_object, "__module__"):
+        module = sys.modules.get(path_or_object.__module__)
+        return [(path_or_object.__module__, module.__file__)]
+    name = _name(path_or_object)
+    paths = []
+    if hasattr(path_or_object, "__file__"):
+        paths.append((name, path_or_object.__file__))
+    if hasattr(path_or_object, "__code__"):
+        paths.append((name, path_or_object.__code__.co_filename))
+    if hasattr(path_or_object, "__module__"):
+        module = sys.modules.get(path_or_object.__module__)
+        paths.append((name, module.__file__))
+    if not paths:
+        raise ValueError(f"Could not find path for {name} {path_or_object} {type(path_or_object)}")
+    return paths
+def git_check(*args):
+    """Return the git information for the given arguments.
+    Arguments can be:
+        - an empty list, in that case all loaded modules are checked
+        - a module name
+        - a module object
+        - an object or a class
+        - a path to a directory
+    Returns
+    -------
+    dict
+        An object with the git information for the given arguments.
+    >>> {
+            "anemoi.utils": {
+                "sha1": "c999d83ae283bcbb99f68d92c42d24315922129f",
+                "remotes": [
+                    "git@github.com:ecmwf/anemoi-utils.git"
+                ],
+                "modified_files": [
+                    "anemoi/utils/checkpoints.py"
+                ],
+                "untracked_files": []
+            }
+        }
+    """
+    paths = _paths(args if len(args) > 0 else None)
+    git = _check_for_git(paths, full=True)
+    result = {}
+    for k, v in git.items():
+        result[k] = v["git"]
+    return result
+def platform_info():
+    import platform
+    r = {}
+    for p in dir(platform):
+        if p.startswith("_"):
+            continue
+        try:
+            r[p] = getattr(platform, p)()
+        except Exception:
+            pass
+    def all_empty(x):
+        return all(all_empty(v) if isinstance(v, (list, tuple)) else v == "" for v in x)
+    for k, v in list(r.items()):
+        if isinstance(v, (list, tuple)) and all_empty(v):
+            del r[k]
+    return r
+def gpu_info():
+    import nvsmi
+    if not nvsmi.is_nvidia_smi_on_path():
+        return "nvdia-smi not found"
+    try:
+        return [json.loads(gpu.to_json()) for gpu in nvsmi.get_gpus()]
+    except subprocess.CalledProcessError as e:
+        return e.output.decode("utf-8").strip()
+def path_md5(path):
+    import hashlib
+    hash = hashlib.md5()
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(1024 * 1024), b""):
+            hash.update(chunk)
+    return hash.hexdigest()
+def assets_info(paths):
+    result = {}
+    for path in paths:
+        try:
+            (mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime) = os.stat(path)  # noqa: F841
+            md5 = path_md5(path)
+        except Exception as e:
+            result[path] = str(e)
+            continue
+        result[path] = dict(
+            size=size,
+            atime=datetime.datetime.fromtimestamp(atime).isoformat(),
+            mtime=datetime.datetime.fromtimestamp(mtime).isoformat(),
+            ctime=datetime.datetime.fromtimestamp(ctime).isoformat(),
+            md5=md5,
+        )
+        try:
+            from .checkpoint import peek
+            result[path]["peek"] = peek(path)
+        except Exception:
+            pass
+    return result
+def gather_provenance_info(assets=[], full=False) -> dict:
+    """Gather information about the current environment
+    Parameters
+    ----------
+    assets : list, optional
+        A list of file paths for which to collect the MD5 sum, the size and time attributes, by default []
+    full : bool, optional
+        If true, will also collect various paths, by default False
+    Returns
+    -------
+    dict
+        A dictionary with the collected information
+    """
+    executable = sys.executable
+    versions, git_versions = module_versions(full)
+    if not full:
+        return dict(
+            time=datetime.datetime.utcnow().isoformat(),
+            python=f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
+            module_versions=versions,
+            git_versions=git_versions,
+        )
+    else:
+        return dict(
+            time=datetime.datetime.utcnow().isoformat(),
+            executable=executable,
+            args=sys.argv,
+            python_path=sys.path,
+            config_paths=sysconfig.get_paths(),
+            module_versions=versions,
+            git_versions=git_versions,
+            platform=platform_info(),
+            gpus=gpu_info(),
+            assets=assets_info(assets),
+        )

anemoi/utils/text.py ADDED Viewed

@@ -0,0 +1,345 @@
+# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+"""
+Text utilities
+"""
+import sys
+from collections import defaultdict
+# https://en.wikipedia.org/wiki/Box-drawing_character
+def dotted_line(width=84) -> str:
+    """Return a dotted line using '┈'
+    >>> dotted_line(40)
+    ┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈
+    Parameters
+    ----------
+    width : int, optional
+        Number of characters, by default 84
+    Returns
+    -------
+    str
+        The dotted line
+    """
+    return "┈" * width
+def boxed(text, min_width=80, max_width=80) -> str:
+    """Put a box around a text
+    >>> boxed("Hello,\\nWorld!", max_width=40)
+    ┌──────────────────────────────────────────┐
+    │ Hello,                                   │
+    │ World!                                   │
+    └──────────────────────────────────────────┘
+    Parameters
+    ----------
+    text : str
+        The text to box
+    min_width : int, optional
+        The minimum width of the box, by default 80
+    max_width : int, optional
+        The maximum width of the box, by default 80
+    Returns
+    -------
+    str
+        A boxed version of the input text
+    """
+    lines = text.split("\n")
+    width = max(len(_) for _ in lines)
+    if min_width is not None:
+        width = max(width, min_width)
+    if max_width is not None:
+        width = min(width, max_width)
+        lines = []
+        for line in text.split("\n"):
+            if len(line) > max_width:
+                line = line[: max_width - 1] + "…"
+            lines.append(line)
+        text = "\n".join(lines)
+    box = []
+    box.append("┌" + "─" * (width + 2) + "┐")
+    for line in lines:
+        box.append(f"│ {line:{width}} │")
+    box.append("└" + "─" * (width + 2) + "┘")
+    return "\n".join(box)
+def bold(text):
+    from termcolor import colored
+    return colored(text, attrs=["bold"])
+def red(text):
+    from termcolor import colored
+    return colored(text, "red")
+def green(text):
+    from termcolor import colored
+    return colored(text, "green")
+class Tree:
+    def __init__(self, actor, parent=None):
+        self._actor = actor
+        self._kids = []
+        self._parent = parent
+    def adopt(self, kid):
+        kid._parent._kids.remove(kid)
+        self._kids.append(kid)
+        kid._parent = self
+        # assert False
+    def forget(self):
+        self._parent._kids.remove(self)
+        self._parent = None
+    @property
+    def is_leaf(self):
+        return len(self._kids) == 0
+    @property
+    def key(self):
+        return tuple(sorted(self._actor.as_dict().items()))
+    @property
+    def _text(self):
+        return self._actor.summary
+    @property
+    def summary(self):
+        return self._actor.summary
+    def as_dict(self):
+        return self._actor.as_dict()
+    def node(self, actor, insert=False):
+        node = Tree(actor, self)
+        if insert:
+            self._kids.insert(0, node)
+        else:
+            self._kids.append(node)
+        return node
+    def print(self, file=sys.stdout):
+        padding = []
+        while self._factorise():
+            pass
+        self._print(padding, file=file)
+    def _leaves(self, result):
+        if self.is_leaf:
+            result.append(self)
+        else:
+            for kid in self._kids:
+                kid._leaves(result)
+    def _factorise(self):
+        if len(self._kids) == 0:
+            return False
+        result = False
+        for kid in self._kids:
+            result = kid._factorise() or result
+        if result:
+            return True
+        same = defaultdict(list)
+        for kid in self._kids:
+            for grand_kid in kid._kids:
+                same[grand_kid.key].append((kid, grand_kid))
+        result = False
+        n = len(self._kids)
+        texts = []
+        for text, v in same.items():
+            if len(v) == n and n > 1:
+                for kid, grand_kid in v:
+                    kid._kids.remove(grand_kid)
+                texts.append((text, v[1][1]))
+                result = True
+        for text, actor in reversed(texts):
+            self.node(actor, True)
+        if result:
+            return True
+        if len(self._kids) != 1:
+            return False
+        kid = self._kids[0]
+        texts = []
+        for grand_kid in list(kid._kids):
+            if len(grand_kid._kids) == 0:
+                kid._kids.remove(grand_kid)
+                texts.append((grand_kid.key, grand_kid))
+                result = True
+        for text, actor in reversed(texts):
+            self.node(actor, True)
+        return result
+    def _print(self, padding, file=sys.stdout):
+        for i, p in enumerate(padding[:-1]):
+            if p == " └":
+                padding[i] = "  "
+            if p == " ├":
+                padding[i] = " │"
+        if padding:
+            print(f"{''.join(padding)}─{self._text}", file=file)
+        else:
+            print(self._text, file=file)
+        padding.append(" ")
+        for i, k in enumerate(self._kids):
+            sep = " ├" if i < len(self._kids) - 1 else " └"
+            padding[-1] = sep
+            k._print(padding, file=file)
+        padding.pop()
+    def to_json(self, depth=0):
+        while self._factorise():
+            pass
+        return {
+            "actor": self._actor.as_dict(),
+            "kids": [k.to_json(depth + 1) for k in self._kids],
+            "depth": depth,
+        }
+def table(rows, header, align, margin=0):
+    """Format a table
+    >>> table([['Aa', 12, 5],
+               ['B', 120, 1],
+               ['C', 9, 123]],
+               ['C1', 'C2', 'C3'],
+               ['<', '>', '>']))
+        C1 │  C2 │  C3
+        ───┼─────┼────
+        Aa │  12 │   5
+        B  │ 120 │   1
+        C  │   9 │ 123
+        ───┴─────┴────
+    Parameters
+    ----------
+    rows : list of lists (or tuples)
+        The rows of the table
+    header : A list or tuple of strings
+        The header of the table
+    align : A list of '<', '>', or '^'
+        To align the columns to the left, right, or center
+    margin : int, optional
+        Extra spaces on the left side of the table, by default 0
+    Returns
+    -------
+    str
+        A table as a string
+    """
+    def _(x):
+        try:
+            x = float(x)
+        except Exception:
+            pass
+        if isinstance(x, float):
+            return f"{x:g}"
+        if isinstance(x, str):
+            return x
+        if isinstance(x, int):
+            return str(x)
+        return str(x)
+    tmp = []
+    for row in rows:
+        tmp.append([_(x) for x in row])
+    all_rows = [header] + tmp
+    lens = [max(len(x) for x in col) for col in zip(*all_rows)]
+    result = []
+    for i, row in enumerate(all_rows):
+        def _(x, i, j):
+            if align[j] == "<":
+                return x.ljust(i)
+            if align[j] == ">":
+                return x.rjust(i)
+            return x.center(i)
+        result.append(" │ ".join([_(x, i, j) for j, (x, i) in enumerate(zip(row, lens))]))
+        if i == 0:
+            result.append("─┼─".join(["─" * i for i in lens]))
+    result.append("─┴─".join(["─" * i for i in lens]))
+    if margin:
+        result = [margin * " " + x for x in result]
+    return "\n".join(result)
+def progress(done, todo, width=80) -> str:
+    """_summary_
+    >>> print(progress(10, 100,width=50))
+    █████▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒
+    Parameters
+    ----------
+    done : function
+        _description_
+    todo : _type_
+        _description_
+    width : int, optional
+        _description_, by default 80
+    Returns
+    -------
+    str
+        _description_
+    """
+    done = min(int(done / todo * width + 0.5), width)
+    return green("█" * done) + red("█" * (width - done))