cogzen 0.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cogzen/__init__.py ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """Top-level package for CogZen."""
5
+
6
+ # fmt: off
7
+ from . import _version
8
+ __version__ = _version.get_versions()['version']
9
+ __version_dict__ = _version.get_versions()
10
+ # fmt: on
11
+
12
+ __author__ = """cogsys.io"""
13
+ __email__ = "cogsys@cogsys.io"
14
+
15
+
16
+ from . import aux_sys
17
+ from .aux_sys import chdir
18
+ from .aux_sys import pushdir
19
+ from .aux_sys import pushdir as pdir
20
+
21
+ from . import aux_log # noqa: F401
22
+ from .aux_log import Log0
23
+
24
+ from . import aux_str
25
+ from .aux_str import clean_str
26
+ from .aux_str.now import now
27
+
28
+ from . import aux_pandas
29
+ from .aux_pandas import disp_df
30
+ from .aux_pandas import repr_df
31
+ from .aux_pandas import disp_df as ddf
32
+ from .aux_pandas import repr_df as rdf
33
+
34
+ from . import aux_srsly
35
+ from .aux_srsly import jsonable
36
+ from .aux_srsly import yamlstr
37
+
38
+
39
+ def get_module_version():
40
+ return __version__
41
+
42
+
43
+ # end
cogzen/_version.py ADDED
@@ -0,0 +1,21 @@
1
+
2
+ # This file was generated by 'versioneer.py' (0.29) from
3
+ # revision-control system data, or from the parent directory name of an
4
+ # unpacked source archive. Distribution tarballs contain a pre-generated copy
5
+ # of this file.
6
+
7
+ import json
8
+
9
+ version_json = '''
10
+ {
11
+ "date": "2025-03-11T23:30:54+0100",
12
+ "dirty": false,
13
+ "error": null,
14
+ "full-revisionid": "21fab8c9056505786e2745a3165cd47d6e062b8e",
15
+ "version": "0.0.1"
16
+ }
17
+ ''' # END VERSION_JSON
18
+
19
+
20
+ def get_versions():
21
+ return json.loads(version_json)
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env python3
2
+
3
+
4
+ from .aux_log import Log0
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """Logger that handles two outputs (stdout and file)."""
4
+
5
+ import logging
6
+ import pathlib
7
+
8
+ from datetime import datetime as dt
9
+ from pytz import timezone as tz
10
+
11
+ tz0 = tz("Europe/Berlin")
12
+
13
+
14
+ # LOGGING_LEVELS = ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
15
+
16
+
17
+ class Log0:
18
+ """Log0: logger that handles two outputs (stdout and file)."""
19
+
20
+ def __init__(
21
+ self,
22
+ dir0="logs",
23
+ fn0=None,
24
+ write=False,
25
+ stream_lvl="INFO",
26
+ file_lvl="DEBUG",
27
+ ):
28
+ """
29
+ Initialize Log0 class.
30
+
31
+ Examples
32
+ --------
33
+
34
+ Without writing to log file.
35
+
36
+ >>> import nvm
37
+ >>> logZ = nvm.Log0(
38
+ >>> write=False,
39
+ >>> stream_lvl="INFO",
40
+ >>> file_lvl="DEBUG",
41
+ >>> )
42
+ >>> log0 = logZ.logger
43
+ >>> # Check log file location
44
+ >>> log0.info(f"{logZ.of0 = }")
45
+
46
+ Or simply
47
+
48
+ >>> import nvm
49
+ >>> import pathlib
50
+ >>> logZ = nvm.Log0()
51
+ >>> log0 = logZ.logger
52
+ >>> log0.info(f"{pathlib.Path.cwd() = }")
53
+
54
+
55
+ With writing to log file.
56
+
57
+ >>> import nvm
58
+ >>> logZ = nvm.Log0(
59
+ >>> write=True,
60
+ >>> stream_lvl="INFO",
61
+ >>> file_lvl="DEBUG",
62
+ >>> )
63
+ >>> log0 = logZ.logger
64
+ >>> # Check log file location
65
+ >>> log0.info(f"{logZ.of0 = }")
66
+
67
+ Change logging levels.
68
+
69
+ >>> # Check levels
70
+ >>> log0.info(f"handler0: {logZ.logging.getLevelName(logZ.handler0)}")
71
+ >>> log0.info(f"handler1: {logZ.logging.getLevelName(logZ.handler1)}")
72
+ >>> log0.info(f"logger: {logZ.logging.getLevelName(log0)}")
73
+ >>> # Set levels
74
+ >>> logZ.handler0.setLevel("DEBUG")
75
+ >>> # Check levels again
76
+ >>> log0.info(f"handler0: {logZ.logging.getLevelName(logZ.handler0)}")
77
+ >>> log0.info(f"handler1: {logZ.logging.getLevelName(logZ.handler1)}")
78
+ >>> log0.info(f"logger: {logZ.logging.getLevelName(log0)}")
79
+ >>> # Set overall ogging level
80
+ >>> log0.setLevel("CRITICAL")
81
+ >>> log0.info(f"handler0: {logZ.logging.getLevelName(logZ.handler0)}")
82
+ >>> log0.info(f"handler1: {logZ.logging.getLevelName(logZ.handler1)}")
83
+ >>> log0.info(f"logger: {logZ.logging.getLevelName(log0)}")
84
+ >>> # no output expected from log0.info after setting "CRITICAL" logging level
85
+
86
+ """
87
+ # Loggig levels
88
+ """
89
+ self.logger.setLevel(logging.CRITICAL) # 50
90
+ self.logger.setLevel(logging.ERROR) # 40
91
+ self.logger.setLevel(logging.WARNING) # 30
92
+ self.logger.setLevel(logging.INFO) # 20
93
+ self.logger.setLevel(logging.DEBUG) # 10
94
+ self.logger.setLevel(logging.NOTSET) # 00
95
+ """
96
+ # Setup logging stream handler
97
+ self.handler0 = logging.StreamHandler()
98
+ self.handler0.setFormatter(
99
+ logging.Formatter(
100
+ " ".join(
101
+ [
102
+ # "%(asctime)s",
103
+ # "%(name)s",
104
+ "%(levelname).1s:",
105
+ # "%(module)s",
106
+ # "%(funcName)-16s ",
107
+ "%(message)s",
108
+ ]
109
+ ),
110
+ datefmt="%Y%m%dT%H%M%S",
111
+ )
112
+ )
113
+ self.file_lvl = file_lvl
114
+ self.stream_lvl = stream_lvl
115
+ self.logging = logging # module accessible from instance
116
+ self.logger = logging.getLogger(__name__)
117
+ self.handler0.setLevel(self.stream_lvl)
118
+ self.logger.setLevel(self.handler0.level)
119
+
120
+ # Detach any old handlers
121
+ for handler in self.logger.handlers[:]:
122
+ self.logger.removeHandler(handler)
123
+
124
+ # Attach new handle
125
+ self.logger.addHandler(self.handler0)
126
+
127
+ if not write:
128
+ self.of0 = None
129
+ else:
130
+ self.dir0 = pathlib.Path(dir0)
131
+ self.fn0 = (
132
+ str(fn0)
133
+ if fn0 is not None
134
+ else f"{dt.now(tz0).strftime('%Y%m%dT%H%M%S')}.log"
135
+ )
136
+ self.of0 = self.dir0 / self.fn0
137
+ self.dir0.mkdir(mode=0o700, parents=True, exist_ok=True)
138
+ # Setup logging file handler
139
+ self.handler1 = logging.FileHandler(self.of0)
140
+ self.handler1.setFormatter(
141
+ logging.Formatter(
142
+ " ".join(
143
+ [
144
+ "%(asctime)s",
145
+ # "%(name)s",
146
+ "%(levelname).1s:",
147
+ # "%(module)s",
148
+ "%(funcName)-16s ",
149
+ "%(message)s",
150
+ ]
151
+ ),
152
+ datefmt="%Y%m%dT%H%M%S",
153
+ )
154
+ )
155
+
156
+ # Set logging levels
157
+ self.handler1.setLevel(self.file_lvl)
158
+ self.logger.setLevel(min(self.handler0.level, self.handler1.level))
159
+ # Attach new handle
160
+ self.logger.addHandler(self.handler1)
@@ -0,0 +1,5 @@
1
+ from .aux_pandas import wine_df
2
+ from .aux_pandas import disp_df
3
+ from .aux_pandas import repr_df
4
+ from .aux_pandas import fix_column_names
5
+ from .aux_pandas import split_dataframe
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from contextlib import ExitStack
6
+ from IPython.core.display import display
7
+
8
+ from sklearn.datasets import load_wine
9
+
10
+
11
+ wine_ds = load_wine()
12
+ wine_df = pd.DataFrame(
13
+ data=np.c_[wine_ds["data"], wine_ds["target"]],
14
+ columns=wine_ds["feature_names"] + ["target"],
15
+ )
16
+
17
+
18
+ def fix_column_names(df0, lowercase=False):
19
+ df0.columns = df0.columns.str.strip()
20
+ df0.columns = df0.columns.map(lambda x: x.replace(" ", "_"))
21
+ df0.columns = df0.columns.map(lambda x: x.replace("-", "_"))
22
+ df0.columns = df0.columns.map(lambda x: x.replace(".", "_"))
23
+ if lowercase:
24
+ df0.columns = df0.columns.map(str.lower)
25
+
26
+ return df0
27
+
28
+
29
+ def _context_pandas(
30
+ max_columns=222,
31
+ max_colwidth=44,
32
+ width=2222,
33
+ max_rows=44,
34
+ min_rows=33,
35
+ ):
36
+ """Apply custom context to dataframe representation (ExitStack)."""
37
+ return [
38
+ pd.option_context("display.max_columns", max_columns),
39
+ pd.option_context("display.max_colwidth", max_colwidth),
40
+ pd.option_context("display.width", width),
41
+ pd.option_context("display.max_rows", max_rows),
42
+ pd.option_context("display.min_rows", min_rows),
43
+ ]
44
+
45
+
46
+ def disp_df(df0, **opt):
47
+ """Display DF using custom formatting context.
48
+
49
+ Examples
50
+ --------
51
+ >>> import numpy as np
52
+ >>> import pandas as pd
53
+ >>> from nvm import disp_df
54
+ >>> from nvm.aux_pandas import wine_df
55
+ >>> disp_df(df0)
56
+
57
+ """
58
+ with ExitStack() as stack:
59
+ _ = [stack.enter_context(cont) for cont in _context_pandas(**opt)]
60
+ display(df0)
61
+
62
+
63
+ def repr_df(df0, **opt):
64
+ """Get DF repr using custom formatting context.
65
+
66
+ Examples
67
+ --------
68
+ >>> import numpy as np
69
+ >>> import pandas as pd
70
+ >>> from nvm import disp_df
71
+ >>> from nvm.aux_pandas import wine_df
72
+ >>> print(repr_df(df0))
73
+
74
+ """
75
+ with ExitStack() as stack:
76
+ _ = [stack.enter_context(cont) for cont in _context_pandas(**opt)]
77
+ return str(df0)
78
+
79
+ def split_dataframe(dframe, max_rows):
80
+ """Split pandas dataframe into chunks with max_rows.
81
+
82
+ Examples
83
+ --------
84
+ >>> import pathlib.Path
85
+ >>> from nvm.aux_pandas import split_dataframe
86
+ >>> df0 = pd.DataFrame({'A': range(1, 21), 'B': range(21, 41)})
87
+ >>> max_rows = 5e0 # 25e4
88
+ >>> chunks_dict = split_dataframe(df0, max_rows)
89
+ >>>
90
+ >>> dir0 = "../../data/i0000-data-chunks/"
91
+ >>> dir0 = pathlib.Path(dir0)
92
+ >>> dir0.mkdir(mode=0o700, parents=True, exist_ok=True)
93
+ >>>
94
+ >>> for key, chunk in chunks_dict.items():
95
+ >>> print(f"{key}: {chunk.shape}")
96
+ >>> # print(chunk)
97
+ >>> chunk.to_pickle((dir0/key).with_suffix(".pkl"))
98
+ >>> print("")
99
+
100
+ """
101
+ max_rows = int(max_rows)
102
+ chunks = len(dframe) // max_rows + (1 if len(dframe) % max_rows else 0)
103
+ return {f'chunk_{i+1:04d}':
104
+ dframe[i*max_rows:(i+1)*max_rows] for i in range(chunks)}
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from .aux_srsly import json_serializable_or_repr
4
+ from .aux_srsly import json_serializable_or_repr as jsonable
5
+ from .aux_srsly import yamlstr
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import json
4
+ import srsly
5
+ import textwrap
6
+
7
+ from typing import (
8
+ Dict,
9
+ Mapping,
10
+ Optional,
11
+ )
12
+
13
+
14
+ def yamlstr(
15
+ obj: Mapping,
16
+ prefix: str = "got:\n",
17
+ indent: int = 5,
18
+ kwargs: Optional[Mapping] = None,
19
+ ):
20
+ """Get indented yaml string from mapping.
21
+
22
+ This function comes handy for logging (or just printing)
23
+ more complex mappings (e.g., dictionaries or dict-like
24
+ objects/structures).
25
+
26
+
27
+ Parameters
28
+ ----------
29
+ obj : Mapping
30
+ Mapping (e.g., dictionary or dict-like object) to be parsed.
31
+ prefix : str
32
+ Prefix string (defaults to ``"got:\\n"``).
33
+ indent : int
34
+ Extra (additional) indentation (defaults to ``5``).
35
+ kwargs : Mapping
36
+ Extra arguments for ``srsly.yaml_dumps``.
37
+ For example: ``indent_mapping``, ``indent_sequence``,
38
+ ``indent_offset`` and ``sort_keys``.
39
+
40
+
41
+ Returns
42
+ -------
43
+ str
44
+ string representation of the parsed mapping object.
45
+
46
+
47
+ Examples
48
+ --------
49
+ >>> from nvm.aux_srsly import yamlstr
50
+ >>> dict0 = dict(a=1, b=2, c=dict(d=4, e=5))
51
+ >>> print(yamlstr(dict0))
52
+ got:
53
+ a: 1
54
+ b: 2
55
+ c:
56
+ d: 4
57
+ e: 5
58
+
59
+ """
60
+ if kwargs is None:
61
+ kwargs = dict()
62
+
63
+ return prefix + textwrap.indent(
64
+ srsly.yaml_dumps(
65
+ json_serializable_or_repr(dict(obj)),
66
+ **kwargs,
67
+ ),
68
+ indent * " ",
69
+ )
70
+
71
+
72
+ def json_serializable_or_repr(obj: Mapping, content=True) -> Dict:
73
+ """Return dictionary without JSON non-serializable items.
74
+
75
+ Parameters
76
+ ----------
77
+ obj : Mapping
78
+ Mapping (e.g., dictionary or dict-like object) to be parsed.
79
+ content : bool
80
+ Replace unserializable data with its string representation.
81
+ If ``False`` use type description instead.
82
+
83
+ Returns
84
+ -------
85
+ Dict
86
+ Parsed dictionary.
87
+
88
+ Examples
89
+ --------
90
+
91
+ >>> from nvm.aux_srsly import json_serializable_or_repr as jsonable
92
+ >>> import numpy as np
93
+ >>> import srsly
94
+ >>> import textwrap
95
+ >>>
96
+ >>> dict0 = dict(
97
+ >>> check="yes",
98
+ >>> items=list([1, 2, 3, "a", "b", "c"]),
99
+ >>> test=np.linspace(42, 44, 10),
100
+ >>> )
101
+ >>> print(
102
+ >>> f"METADATA:\\n{textwrap.indent(srsly.yaml_dumps(jsonable(dict0)), ' ')}"
103
+ >>> )
104
+ METADATA:
105
+ check: yes
106
+ items:
107
+ - 1
108
+ - 2
109
+ - 3
110
+ - a
111
+ - b
112
+ - c
113
+ test: "[42. 42.22222222 42.44444444 42.66666667 42.88888889 43.11111111\\n\\
114
+ \\ 43.33333333 43.55555556 43.77777778 44.]"
115
+ >>>
116
+ >>> content = False
117
+ >>> print(
118
+ >>> f"METADATA:\\n{textwrap.indent(srsly.yaml_dumps(jsonable(dict0, content=content)), ' ')}"
119
+ >>> )
120
+ METADATA:
121
+ check: yes
122
+ items:
123
+ - 1
124
+ - 2
125
+ - 3
126
+ - a
127
+ - b
128
+ - c
129
+ test: '<<non-serializable: ndarray>>'
130
+
131
+
132
+ """
133
+
134
+ def default(o):
135
+ return f"{o}" if content else f"<<non-serializable: {type(o).__qualname__}>>"
136
+
137
+ return json.loads(json.dumps(obj, default=default))
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from .aux_str import is_ascii
4
+ from .aux_str import is_ascii_alt
5
+
6
+ from .aux_str import clean_str
7
+
8
+ from .clean_str_mappings import (
9
+ CLEAN_STR_MAPPINGS_TINY,
10
+ CLEAN_STR_MAPPINGS_LARGE,
11
+ CLEAN_STR_MAPPINGS_HUGE,
12
+ CLEAN_STR_MAPPINGS_SPACE,
13
+ CLEAN_STR_MAPPINGS_DROP_HASHTAGS,
14
+ )
15
+
16
+ from .regex import (
17
+ REGEX_ABC_DASH_XYZ_ASTERISK,
18
+ )