PyPI - cogzen - Versions diffs - 0.0.1__py2.py3-none-any.whl - Mend

cogzen 0.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

cogzen/__init__.py +43 -0
cogzen/_version.py +21 -0
cogzen/aux_log/__init__.py +4 -0
cogzen/aux_log/aux_log.py +160 -0
cogzen/aux_pandas/__init__.py +5 -0
cogzen/aux_pandas/aux_pandas.py +104 -0
cogzen/aux_srsly/__init__.py +5 -0
cogzen/aux_srsly/aux_srsly.py +137 -0
cogzen/aux_str/__init__.py +18 -0
cogzen/aux_str/aux_str.py +236 -0
cogzen/aux_str/clean_str_mappings.py +116 -0
cogzen/aux_str/now.py +45 -0
cogzen/aux_str/regex.py +18 -0
cogzen/aux_sys/__init__.py +4 -0
cogzen/aux_sys/aux_sys.py +93 -0
cogzen/cli/.cogzen.py.~undo-tree~.zst +0 -0
cogzen/cli/.gitkeep +0 -0
cogzen/cli/cogzen.py +21 -0
cogzen/cogzen.py +4 -0
cogzen/data/.gitkeep +0 -0
cogzen/data/emacs-logo/emacs-128x128.png +0 -0
cogzen/data/emacs-logo/emacs.svg +286 -0
cogzen/tests/__init__.py +1 -0
cogzen/tests/test_cogzen.py +23 -0
cogzen-0.0.1.dist-info/AUTHORS.rst +13 -0
cogzen-0.0.1.dist-info/LICENSE +33 -0
cogzen-0.0.1.dist-info/METADATA +133 -0
cogzen-0.0.1.dist-info/RECORD +31 -0
cogzen-0.0.1.dist-info/WHEEL +6 -0
cogzen-0.0.1.dist-info/entry_points.txt +2 -0
cogzen-0.0.1.dist-info/top_level.txt +1 -0

cogzen/__init__.py ADDED Viewed

@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""Top-level package for CogZen."""
+# fmt: off
+from . import _version
+__version__ = _version.get_versions()['version']
+__version_dict__ = _version.get_versions()
+# fmt: on
+__author__ = """cogsys.io"""
+__email__ = "cogsys@cogsys.io"
+from . import aux_sys
+from .aux_sys import chdir
+from .aux_sys import pushdir
+from .aux_sys import pushdir as pdir
+from . import aux_log  # noqa: F401
+from .aux_log import Log0
+from . import aux_str
+from .aux_str import clean_str
+from .aux_str.now import now
+from . import aux_pandas
+from .aux_pandas import disp_df
+from .aux_pandas import repr_df
+from .aux_pandas import disp_df as ddf
+from .aux_pandas import repr_df as rdf
+from . import aux_srsly
+from .aux_srsly import jsonable
+from .aux_srsly import yamlstr
+def get_module_version():
+    return __version__
+# end

cogzen/_version.py ADDED Viewed

@@ -0,0 +1,21 @@
+# This file was generated by 'versioneer.py' (0.29) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+import json
+version_json = '''
+{
+ "date": "2025-03-11T23:30:54+0100",
+ "dirty": false,
+ "error": null,
+ "full-revisionid": "21fab8c9056505786e2745a3165cd47d6e062b8e",
+ "version": "0.0.1"
+}
+'''  # END VERSION_JSON
+def get_versions():
+    return json.loads(version_json)

cogzen/aux_log/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+from .aux_log import Log0

cogzen/aux_log/aux_log.py ADDED Viewed

@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""Logger that handles two outputs (stdout and file)."""
+import logging
+import pathlib
+from datetime import datetime as dt
+from pytz import timezone as tz
+tz0 = tz("Europe/Berlin")
+# LOGGING_LEVELS = ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
+class Log0:
+    """Log0: logger that handles two outputs (stdout and file)."""
+    def __init__(
+        self,
+        dir0="logs",
+        fn0=None,
+        write=False,
+        stream_lvl="INFO",
+        file_lvl="DEBUG",
+    ):
+        """
+        Initialize Log0 class.
+        Examples
+        --------
+        Without writing to log file.
+        >>> import nvm
+        >>> logZ = nvm.Log0(
+        >>>     write=False,
+        >>>     stream_lvl="INFO",
+        >>>     file_lvl="DEBUG",
+        >>> )
+        >>> log0 = logZ.logger
+        >>> # Check log file location
+        >>> log0.info(f"{logZ.of0 = }")
+        Or simply
+        >>> import nvm
+        >>> import pathlib
+        >>> logZ = nvm.Log0()
+        >>> log0 = logZ.logger
+        >>> log0.info(f"{pathlib.Path.cwd() = }")
+        With writing to log file.
+        >>> import nvm
+        >>> logZ = nvm.Log0(
+        >>>     write=True,
+        >>>     stream_lvl="INFO",
+        >>>     file_lvl="DEBUG",
+        >>> )
+        >>> log0 = logZ.logger
+        >>> # Check log file location
+        >>> log0.info(f"{logZ.of0 = }")
+        Change logging levels.
+        >>> # Check levels
+        >>> log0.info(f"handler0: {logZ.logging.getLevelName(logZ.handler0)}")
+        >>> log0.info(f"handler1: {logZ.logging.getLevelName(logZ.handler1)}")
+        >>> log0.info(f"logger: {logZ.logging.getLevelName(log0)}")
+        >>> # Set levels
+        >>> logZ.handler0.setLevel("DEBUG")
+        >>> # Check levels again
+        >>> log0.info(f"handler0: {logZ.logging.getLevelName(logZ.handler0)}")
+        >>> log0.info(f"handler1: {logZ.logging.getLevelName(logZ.handler1)}")
+        >>> log0.info(f"logger: {logZ.logging.getLevelName(log0)}")
+        >>> # Set overall ogging level
+        >>> log0.setLevel("CRITICAL")
+        >>> log0.info(f"handler0: {logZ.logging.getLevelName(logZ.handler0)}")
+        >>> log0.info(f"handler1: {logZ.logging.getLevelName(logZ.handler1)}")
+        >>> log0.info(f"logger: {logZ.logging.getLevelName(log0)}")
+        >>> # no output expected from log0.info after setting "CRITICAL" logging level
+        """
+        # Loggig levels
+        """
+        self.logger.setLevel(logging.CRITICAL) # 50
+        self.logger.setLevel(logging.ERROR)    # 40
+        self.logger.setLevel(logging.WARNING)  # 30
+        self.logger.setLevel(logging.INFO)     # 20
+        self.logger.setLevel(logging.DEBUG)    # 10
+        self.logger.setLevel(logging.NOTSET)   # 00
+        """
+        # Setup logging stream handler
+        self.handler0 = logging.StreamHandler()
+        self.handler0.setFormatter(
+            logging.Formatter(
+                " ".join(
+                    [
+                        # "%(asctime)s",
+                        # "%(name)s",
+                        "%(levelname).1s:",
+                        # "%(module)s",
+                        # "%(funcName)-16s ",
+                        "%(message)s",
+                    ]
+                ),
+                datefmt="%Y%m%dT%H%M%S",
+            )
+        )
+        self.file_lvl = file_lvl
+        self.stream_lvl = stream_lvl
+        self.logging = logging  # module accessible from instance
+        self.logger = logging.getLogger(__name__)
+        self.handler0.setLevel(self.stream_lvl)
+        self.logger.setLevel(self.handler0.level)
+        # Detach any old handlers
+        for handler in self.logger.handlers[:]:
+            self.logger.removeHandler(handler)
+        # Attach new handle
+        self.logger.addHandler(self.handler0)
+        if not write:
+            self.of0 = None
+        else:
+            self.dir0 = pathlib.Path(dir0)
+            self.fn0 = (
+                str(fn0)
+                if fn0 is not None
+                else f"{dt.now(tz0).strftime('%Y%m%dT%H%M%S')}.log"
+            )
+            self.of0 = self.dir0 / self.fn0
+            self.dir0.mkdir(mode=0o700, parents=True, exist_ok=True)
+            # Setup logging file handler
+            self.handler1 = logging.FileHandler(self.of0)
+            self.handler1.setFormatter(
+                logging.Formatter(
+                    " ".join(
+                        [
+                            "%(asctime)s",
+                            # "%(name)s",
+                            "%(levelname).1s:",
+                            # "%(module)s",
+                            "%(funcName)-16s ",
+                            "%(message)s",
+                        ]
+                    ),
+                    datefmt="%Y%m%dT%H%M%S",
+                )
+            )
+            # Set logging levels
+            self.handler1.setLevel(self.file_lvl)
+            self.logger.setLevel(min(self.handler0.level, self.handler1.level))
+            # Attach new handle
+            self.logger.addHandler(self.handler1)

cogzen/aux_pandas/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .aux_pandas import wine_df
+from .aux_pandas import disp_df
+from .aux_pandas import repr_df
+from .aux_pandas import fix_column_names
+from .aux_pandas import split_dataframe

cogzen/aux_pandas/aux_pandas.py ADDED Viewed

@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+import numpy as np
+import pandas as pd
+from contextlib import ExitStack
+from IPython.core.display import display
+from sklearn.datasets import load_wine
+wine_ds = load_wine()
+wine_df = pd.DataFrame(
+    data=np.c_[wine_ds["data"], wine_ds["target"]],
+    columns=wine_ds["feature_names"] + ["target"],
+)
+def fix_column_names(df0, lowercase=False):
+    df0.columns = df0.columns.str.strip()
+    df0.columns = df0.columns.map(lambda x: x.replace(" ", "_"))
+    df0.columns = df0.columns.map(lambda x: x.replace("-", "_"))
+    df0.columns = df0.columns.map(lambda x: x.replace(".", "_"))
+    if lowercase:
+        df0.columns = df0.columns.map(str.lower)
+    return df0
+def _context_pandas(
+    max_columns=222,
+    max_colwidth=44,
+    width=2222,
+    max_rows=44,
+    min_rows=33,
+):
+    """Apply custom context to dataframe representation (ExitStack)."""
+    return [
+        pd.option_context("display.max_columns", max_columns),
+        pd.option_context("display.max_colwidth", max_colwidth),
+        pd.option_context("display.width", width),
+        pd.option_context("display.max_rows", max_rows),
+        pd.option_context("display.min_rows", min_rows),
+    ]
+def disp_df(df0, **opt):
+    """Display DF using custom formatting context.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from nvm import disp_df
+    >>> from nvm.aux_pandas import wine_df
+    >>> disp_df(df0)
+    """
+    with ExitStack() as stack:
+        _ = [stack.enter_context(cont) for cont in _context_pandas(**opt)]
+        display(df0)
+def repr_df(df0, **opt):
+    """Get DF repr using custom formatting context.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from nvm import disp_df
+    >>> from nvm.aux_pandas import wine_df
+    >>> print(repr_df(df0))
+    """
+    with ExitStack() as stack:
+        _ = [stack.enter_context(cont) for cont in _context_pandas(**opt)]
+        return str(df0)
+def split_dataframe(dframe, max_rows):
+    """Split pandas dataframe into chunks with max_rows.
+    Examples
+    --------
+    >>> import pathlib.Path
+    >>> from nvm.aux_pandas import split_dataframe
+    >>> df0 = pd.DataFrame({'A': range(1, 21), 'B': range(21, 41)})
+    >>> max_rows = 5e0 # 25e4
+    >>> chunks_dict = split_dataframe(df0, max_rows)
+    >>>
+    >>> dir0 = "../../data/i0000-data-chunks/"
+    >>> dir0 = pathlib.Path(dir0)
+    >>> dir0.mkdir(mode=0o700, parents=True, exist_ok=True)
+    >>>
+    >>> for key, chunk in chunks_dict.items():
+    >>>     print(f"{key}: {chunk.shape}")
+    >>>     # print(chunk)
+    >>>     chunk.to_pickle((dir0/key).with_suffix(".pkl"))
+    >>>     print("")
+    """
+    max_rows = int(max_rows)
+    chunks = len(dframe) // max_rows + (1 if len(dframe) % max_rows else 0)
+    return {f'chunk_{i+1:04d}':
+            dframe[i*max_rows:(i+1)*max_rows] for i in range(chunks)}

cogzen/aux_srsly/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+from .aux_srsly import json_serializable_or_repr
+from .aux_srsly import json_serializable_or_repr as jsonable
+from .aux_srsly import yamlstr

cogzen/aux_srsly/aux_srsly.py ADDED Viewed

@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+import json
+import srsly
+import textwrap
+from typing import (
+    Dict,
+    Mapping,
+    Optional,
+)
+def yamlstr(
+    obj: Mapping,
+    prefix: str = "got:\n",
+    indent: int = 5,
+    kwargs: Optional[Mapping] = None,
+):
+    """Get indented yaml string from mapping.
+    This function comes handy for logging (or just printing)
+    more complex mappings (e.g., dictionaries or dict-like
+    objects/structures).
+    Parameters
+    ----------
+    obj : Mapping
+        Mapping (e.g., dictionary or dict-like object) to be parsed.
+    prefix : str
+        Prefix string (defaults to ``"got:\\n"``).
+    indent : int
+        Extra (additional) indentation (defaults to ``5``).
+    kwargs : Mapping
+        Extra arguments for ``srsly.yaml_dumps``.
+        For example: ``indent_mapping``, ``indent_sequence``,
+        ``indent_offset`` and ``sort_keys``.
+    Returns
+    -------
+    str
+        string representation of the parsed mapping object.
+    Examples
+    --------
+    >>> from nvm.aux_srsly import yamlstr
+    >>> dict0 = dict(a=1, b=2, c=dict(d=4, e=5))
+    >>> print(yamlstr(dict0))
+    got:
+         a: 1
+         b: 2
+         c:
+           d: 4
+           e: 5
+    """
+    if kwargs is None:
+        kwargs = dict()
+    return prefix + textwrap.indent(
+        srsly.yaml_dumps(
+            json_serializable_or_repr(dict(obj)),
+            **kwargs,
+        ),
+        indent * " ",
+    )
+def json_serializable_or_repr(obj: Mapping, content=True) -> Dict:
+    """Return dictionary without JSON non-serializable items.
+    Parameters
+    ----------
+    obj : Mapping
+        Mapping (e.g., dictionary or dict-like object) to be parsed.
+    content : bool
+        Replace unserializable data with its string representation.
+        If ``False`` use type description instead.
+    Returns
+    -------
+    Dict
+        Parsed dictionary.
+    Examples
+    --------
+    >>> from nvm.aux_srsly import json_serializable_or_repr as jsonable
+    >>> import numpy as np
+    >>> import srsly
+    >>> import textwrap
+    >>>
+    >>> dict0 = dict(
+    >>>     check="yes",
+    >>>     items=list([1, 2, 3, "a", "b", "c"]),
+    >>>     test=np.linspace(42, 44, 10),
+    >>>     )
+    >>> print(
+    >>>     f"METADATA:\\n{textwrap.indent(srsly.yaml_dumps(jsonable(dict0)), '   ')}"
+    >>> )
+    METADATA:
+       check: yes
+       items:
+         - 1
+         - 2
+         - 3
+         - a
+         - b
+         - c
+       test: "[42. 42.22222222 42.44444444 42.66666667 42.88888889 43.11111111\\n\\
+         \\ 43.33333333 43.55555556 43.77777778 44.]"
+    >>>
+    >>> content = False
+    >>> print(
+    >>>     f"METADATA:\\n{textwrap.indent(srsly.yaml_dumps(jsonable(dict0, content=content)), '   ')}"
+    >>> )
+    METADATA:
+       check: yes
+       items:
+         - 1
+         - 2
+         - 3
+         - a
+         - b
+         - c
+       test: '<<non-serializable: ndarray>>'
+    """
+    def default(o):
+        return f"{o}" if content else f"<<non-serializable: {type(o).__qualname__}>>"
+    return json.loads(json.dumps(obj, default=default))

cogzen/aux_str/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+from .aux_str import is_ascii
+from .aux_str import is_ascii_alt
+from .aux_str import clean_str
+from .clean_str_mappings import (
+    CLEAN_STR_MAPPINGS_TINY,
+    CLEAN_STR_MAPPINGS_LARGE,
+    CLEAN_STR_MAPPINGS_HUGE,
+    CLEAN_STR_MAPPINGS_SPACE,
+    CLEAN_STR_MAPPINGS_DROP_HASHTAGS,
+)
+from .regex import (
+    REGEX_ABC_DASH_XYZ_ASTERISK,
+)