rmk2 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rmk2/__init__.py ADDED
File without changes
rmk2/api.py ADDED
@@ -0,0 +1,57 @@
1
+ import functools
2
+ import json
3
+ import os
4
+ import re
5
+ from typing import Any, Iterator
6
+ from urllib.parse import urlparse, urlunparse
7
+ from uuid import uuid4
8
+
9
+
10
+ def save_response(prefix: str = None) -> callable:
11
+ """Wrapper to save the return value of a function to JSON"""
12
+
13
+ def decorator(func: callable) -> callable:
14
+ @functools.wraps(func)
15
+ def wrapper(*args, **kwargs) -> Any:
16
+ response = func(*args, **kwargs)
17
+
18
+ _filename = os.path.join(
19
+ os.path.dirname(__file__),
20
+ prefix,
21
+ f"{func.__name__}_{str(uuid4())}.json",
22
+ )
23
+
24
+ with open(_filename, mode="w", encoding="utf-8") as outfile:
25
+ _response = (
26
+ list(response) if isinstance(response, Iterator) else response
27
+ )
28
+
29
+ json.dump(_response, outfile)
30
+
31
+ return _response
32
+
33
+ return wrapper
34
+
35
+ return decorator
36
+
37
+
38
+ def create_url(
39
+ base_url: str,
40
+ path: list[str] | None,
41
+ query: dict[str, str | int] | None,
42
+ ) -> str:
43
+ """Helper function to create valid URLs by extending a base URL"""
44
+ _scheme, _netloc, _path, _, _query, _ = urlparse(base_url)
45
+
46
+ _path = (
47
+ "/".join([re.sub(r"/$", "", _path), *[str(x) for x in path]])
48
+ if path is not None
49
+ else _path
50
+ )
51
+ _query = (
52
+ "&".join([f"{k}={str(v)}" for k, v in query.items()])
53
+ if query is not None
54
+ else {}
55
+ )
56
+
57
+ return urlunparse((_scheme, _netloc, _path, None, _query, None))
File without changes
rmk2/config/mapping.py ADDED
@@ -0,0 +1,230 @@
1
+ import argparse
2
+ import dataclasses
3
+ import itertools
4
+ import os
5
+ from copy import deepcopy
6
+ from enum import Enum
7
+ from typing import Any, Optional, Sequence
8
+
9
+
10
+ class ConfigField(Enum):
11
+ ARGUMENT = "argument"
12
+ ATTRIBUTE = "attribute"
13
+ VARIABLE = "variable"
14
+
15
+
16
+ class PrefixSeparator(Enum):
17
+ ARGUMENT = "-"
18
+ ATTRIBUTE = "."
19
+ VARIABLE = "_"
20
+
21
+
22
+ @dataclasses.dataclass
23
+ class ConfigMapping:
24
+ attribute: str
25
+ argument: str | None = None
26
+ variable: str | None = None
27
+ default: Any = None
28
+ description: str = None
29
+ group: str | list[str] | None = None
30
+
31
+ def __post_init__(self):
32
+ self.argument = (self.argument or self.attribute).lower()
33
+ self.variable = (self.variable or self.attribute).upper()
34
+
35
+ if isinstance(self.group, str):
36
+ self.group = [self.group]
37
+ elif isinstance(self.group, list):
38
+ self.group = self.group
39
+ else:
40
+ self.group = []
41
+
42
+ def as_dict(self) -> dict[Any, Any]:
43
+ """Return class attributes as dictionary"""
44
+ return dataclasses.asdict(self)
45
+
46
+ def as_tuple(self) -> list[tuple[Any, Any]]:
47
+ """Return class attributes as list of tuples"""
48
+ return list(dataclasses.asdict(self).items())
49
+
50
+
51
+ class BaseConfig:
52
+ mappings: list[ConfigMapping] | None = None
53
+
54
+ def __init__(self, mappings: Optional[Sequence[ConfigMapping]] = None):
55
+ mappings = deepcopy(mappings) if mappings is not None else []
56
+
57
+ self.mappings = self.mappings or []
58
+ self.merge(mappings=mappings)
59
+ self.update()
60
+
61
+ def generate_mappings(self) -> None:
62
+ """Generate mappings for attributes that were set directly"""
63
+ attributes = [(k, v) for k, v in vars(self).items() if k != "mappings"]
64
+ _mapped = set([x.attribute for x in self.mappings])
65
+ _unmapped = [(k, v) for k, v in attributes if k not in _mapped]
66
+
67
+ mappings = [ConfigMapping(attribute=k, default=v) for k, v in _unmapped]
68
+
69
+ self.merge(mappings=mappings)
70
+
71
+ def merge(self, mappings: Optional[Sequence[ConfigMapping]]) -> None:
72
+ """Merge multiple ConfigMapping definitions"""
73
+ assert type(mappings) in (tuple, list), f"Not a valid sequence: {mappings=}"
74
+
75
+ mappings = deepcopy(mappings)
76
+
77
+ _mappings_merged = {x.attribute: x for x in [*self.mappings, *mappings]}
78
+ self.mappings = list(_mappings_merged.values())
79
+
80
+ def remove_unmapped_attributes(self):
81
+ """Remove attributes without valid attribute mappings, retain groups"""
82
+ attributes = set([k for k, v in vars(self).items() if k != "mappings"])
83
+ _mapped = set([x.attribute for x in self.mappings])
84
+
85
+ for attribute in set(attributes).difference(_mapped):
86
+ if isinstance(getattr(self, attribute), BaseConfig):
87
+ continue
88
+
89
+ delattr(self, attribute)
90
+
91
+ def _set_prefix(
92
+ self,
93
+ field: ConfigField,
94
+ prefix: str,
95
+ group: str | list[str] | None = None,
96
+ merge_group: bool = False,
97
+ ):
98
+ """Set a common prefix for arguments or variables, optionally group-only"""
99
+ fields = {
100
+ ConfigField.ARGUMENT: (
101
+ ConfigField.ARGUMENT.value,
102
+ PrefixSeparator.ARGUMENT.value,
103
+ ),
104
+ ConfigField.VARIABLE: (
105
+ ConfigField.VARIABLE.value,
106
+ PrefixSeparator.VARIABLE.value,
107
+ ),
108
+ }
109
+
110
+ conversions = {
111
+ ConfigField.ARGUMENT: lambda x: x.lower(),
112
+ ConfigField.VARIABLE: lambda x: x.upper(),
113
+ }
114
+
115
+ _field, _separator = fields[field]
116
+ _prefix = conversions[field](prefix)
117
+
118
+ _group = group or []
119
+ _group = [group] if isinstance(group, str) else _group
120
+ _group = [conversions[field](g) for g in _group]
121
+
122
+ mappings = (
123
+ [x for x in self.mappings if x.group == _group]
124
+ if group is not None
125
+ else self.mappings
126
+ )
127
+
128
+ _mappings = []
129
+
130
+ for mapping in mappings:
131
+ _mapping = mapping.as_dict()
132
+
133
+ if isinstance(_group, list) and merge_group is True:
134
+ _members = [_prefix, *_group, _mapping[_field]]
135
+ else:
136
+ _members = [_prefix, _mapping[_field]]
137
+
138
+ _prefixed = _separator.join(_members)
139
+
140
+ _mappings.append(ConfigMapping(**{**_mapping, _field: _prefixed}))
141
+
142
+ self.merge(mappings=_mappings)
143
+
144
+ def set_argument_prefix(self, prefix: str, group: str = None):
145
+ """Set a common prefix for arguments, optionally group-only"""
146
+ self._set_prefix(field=ConfigField.ARGUMENT, prefix=prefix, group=group)
147
+
148
+ def set_variable_prefix(self, prefix: str, group: str = None):
149
+ """Set a common prefix for arguments, optionally group-only"""
150
+ self._set_prefix(field=ConfigField.VARIABLE, prefix=prefix, group=group)
151
+
152
+ def update(self, reset: bool = False) -> None:
153
+ """Update attributes self.mappings"""
154
+ for mapping in self.mappings:
155
+ if (
156
+ hasattr(self, mapping.attribute)
157
+ and getattr(self, mapping.attribute) != mapping.default
158
+ and reset is False
159
+ ):
160
+ continue
161
+
162
+ setattr(self, mapping.attribute, mapping.default)
163
+
164
+ def update_from_args(self, args: argparse.Namespace) -> None:
165
+ """Update attribute values from argparse arguments"""
166
+ # Transform Namespace into dict, remove None values to honour defaults
167
+ _args = {k: v for k, v in vars(args).items() if v is not None}
168
+
169
+ for m in self.mappings:
170
+ setattr(
171
+ self, m.attribute, _args.get(m.argument, getattr(self, m.attribute))
172
+ )
173
+
174
+ def update_from_env(self) -> None:
175
+ """Update attribute values from environment variables"""
176
+ for m in self.mappings:
177
+ _variable = m.variable.upper()
178
+
179
+ setattr(self, m.attribute, os.getenv(_variable, getattr(self, m.attribute)))
180
+
181
+ def update_groups(self) -> None:
182
+ """Recursively create groups from mappings, resulting in a nested class tree"""
183
+
184
+ def __grouper(_mapping: ConfigMapping) -> str:
185
+ return _mapping.group[0]
186
+
187
+ _groups = [x for x in self.mappings if x.group not in ([], None)]
188
+ _sorted = sorted(_groups, key=__grouper)
189
+ _grouped = [(k, list(g)) for k, g in itertools.groupby(_sorted, __grouper)]
190
+
191
+ for group, mappings in _grouped:
192
+ # Remove mappings from parent
193
+ self.mappings = [x for x in self.mappings if x not in mappings]
194
+
195
+ _mappings = deepcopy(mappings)
196
+
197
+ # Push mapping groups down one level
198
+ for mapping in _mappings:
199
+ mapping.group = mapping.group[1:]
200
+
201
+ if not hasattr(self, group):
202
+ setattr(self, group, BaseConfig())
203
+
204
+ _group = getattr(self, group)
205
+ _group.merge(mappings=_mappings)
206
+
207
+ # Recurse into subgroups
208
+ if any([x.group is not None for x in _group.mappings]):
209
+ _group.update_groups()
210
+
211
+ # Update ungrouped mappings
212
+ _group.update()
213
+
214
+ # Clean up any attributes which have been pushed into a group
215
+ self.remove_unmapped_attributes()
216
+
217
+ def validate(self):
218
+ """Validate that mappings are correct and mapped attributes exist"""
219
+ assert type(self.mappings) in (
220
+ list,
221
+ tuple,
222
+ ), f"Invalid sequence: {self.mappings=}"
223
+
224
+ for mapping in self.mappings:
225
+ assert isinstance(
226
+ mapping, ConfigMapping
227
+ ), f"Invalid mapping type: {mapping=}"
228
+ assert hasattr(
229
+ self, mapping.attribute
230
+ ), f"Missing attribute: {mapping.attribute}"
rmk2/data/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from rmk2.data.data import Data
rmk2/data/data.py ADDED
@@ -0,0 +1,70 @@
1
+ from typing import Any, Self
2
+
3
+ import rmk2.data.dictionary
4
+ import rmk2.data.object
5
+
6
+
7
+ class Data:
8
+ """@DynamicAttrs"""
9
+ __annotations__: dict = {}
10
+
11
+ def __init__(self, **kwargs):
12
+ for k, v in kwargs.items():
13
+ if isinstance(v, dict):
14
+ setattr(self, k, type(self)(**v))
15
+ self.__annotations__[k] = type(self)
16
+ else:
17
+ setattr(self, k, v)
18
+ self.__annotations__[k] = type(v)
19
+
20
+ def __init_subclass__(cls, **kwargs):
21
+ cls.__init__ = rmk2.data.object.get_metaclass(cls).__init__
22
+
23
+ return cls
24
+
25
+ def __repr__(self) -> str:
26
+ return str(vars(self))
27
+
28
+ def __eq__(self, other):
29
+ return (
30
+ self.__todict__() == other.__todict__()
31
+ if isinstance(other, type(self))
32
+ else False
33
+ )
34
+
35
+ def __todict__(self) -> dict:
36
+ """Recursively cast instance to dict"""
37
+ return {
38
+ k: v.__todict__() if isinstance(v, type(self)) else v
39
+ for k, v in vars(self).items()
40
+ }
41
+
42
+ def get_path(self, path: list[str] | str) -> Any | None:
43
+ """Get attribute from an object, nested under a given path"""
44
+ return rmk2.data.object.get_path(cls=self, path=path)
45
+
46
+ def merge(self, other: Self | dict) -> Self:
47
+ """Recursively merge data into the current instance, key by key"""
48
+ self.__dict__ = vars(
49
+ rmk2.data.object.merge(
50
+ left=self,
51
+ right=type(self)(**other) if isinstance(other, dict) else other,
52
+ )
53
+ )
54
+
55
+ return self
56
+
57
+ def put_path(self, path: list[str] | str, value: Any) -> Self:
58
+ """Put a value as an attribute at a given path into an object"""
59
+ self.merge(
60
+ other=rmk2.data.dictionary.put_path(
61
+ data={}, path=path, value=value
62
+ )
63
+ )
64
+
65
+ return self
66
+
67
+ @classmethod
68
+ def from_path(cls, path: list[str] | str, value: Any) -> Self:
69
+ """Initialise instance directly by putting a value at a given path"""
70
+ return cls().put_path(path=path, value=value)
@@ -0,0 +1,47 @@
1
+ import functools
2
+ from typing import Any, TypeVar
3
+
4
+ T = TypeVar("T")
5
+
6
+
7
+ def merge(
8
+ left: dict[str, Any], right: dict[str, Any], merge_lists: bool = False
9
+ ) -> dict[str, Any]:
10
+ """Recursively merge two dictionaries, key by key"""
11
+ result = {}
12
+
13
+ # Combine keys via iteration instead of a set to maintain key ordering
14
+ for k in [*left.keys(), *[x for x in right.keys() if x not in left.keys()]]:
15
+ _left = left.get(k)
16
+ _right = right.get(k)
17
+
18
+ if isinstance(_left, dict) and isinstance(_right, dict):
19
+ merged = merge(left=_left, right=_right, merge_lists=merge_lists)
20
+ elif isinstance(_left, list) and isinstance(_right, list) and merge_lists:
21
+ merged = [*_left, *_right]
22
+ elif not _left and _right is not None:
23
+ merged = _right
24
+ else:
25
+ merged = _right or _left
26
+
27
+ result[k] = merged
28
+
29
+ return result
30
+
31
+
32
+ def get_path(
33
+ data: dict[str, dict | T], path: list[str] | str, default: Any | None = None
34
+ ) -> T | None:
35
+ """Get a value at a given path from a dictionary"""
36
+ path = path.split(".") if isinstance(path, str) else path
37
+ result = functools.reduce(lambda a, b: a.get(b, {}), path, data)
38
+
39
+ return default if (result == {} and default) else result
40
+
41
+
42
+ def put_path(data: dict[str, dict], path: list[str] | str, value: Any) -> dict:
43
+ """Put a value at a given path into a dictionary"""
44
+ path = path.split(".") if isinstance(path, str) else path
45
+ branch = functools.reduce(lambda a, b: {b: a}, reversed(path), value)
46
+
47
+ return merge(left=data, right=branch, merge_lists=False)
rmk2/data/object.py ADDED
@@ -0,0 +1,93 @@
1
+ import copy
2
+ import dataclasses
3
+ import functools
4
+ import inspect
5
+ from typing import TypeVar, Callable, Type, Any
6
+
7
+ import rmk2.data.dictionary
8
+
9
+ T = TypeVar("T")
10
+
11
+
12
+ class Missing:
13
+ pass
14
+
15
+
16
+ def merge(left: T, right: T, merge_lists: bool = False, merge_dicts: bool = False) -> T:
17
+ """Recursively merge two class instances, key by key"""
18
+ assert type(left) is type(
19
+ right
20
+ ), f"Cannot merge different Types, left='{type(left)}', right='{type(right)}'"
21
+
22
+ primitives = [int, str, float, bool, list, dict, set, type(None)]
23
+ kwargs = {}
24
+
25
+ for k in {*vars(left).keys(), *vars(right).keys()}:
26
+ _left = getattr(left, k, None)
27
+ _right = getattr(right, k, None)
28
+
29
+ if (
30
+ type(_left) not in primitives
31
+ and type(_right) not in primitives
32
+ and type(_left) is type(_right)
33
+ ):
34
+ merged = merge(
35
+ left=_left,
36
+ right=_right,
37
+ merge_lists=merge_lists,
38
+ merge_dicts=merge_dicts,
39
+ )
40
+ elif isinstance(_left, list) and isinstance(_right, list) and merge_lists:
41
+ merged = [*_left, *_right]
42
+ elif isinstance(_left, dict) and isinstance(_right, dict) and merge_dicts:
43
+ merged = rmk2.data.dictionary.merge(
44
+ left=_left, right=_right, merge_lists=merge_lists
45
+ )
46
+ elif not _left and _right is not None:
47
+ merged = _right
48
+ else:
49
+ merged = _right or _left
50
+
51
+ kwargs[k] = merged
52
+
53
+ return type(left)(**kwargs)
54
+
55
+
56
+ def get_path(cls: object, path: list[str] | str, default: Any | None = None) -> Any:
57
+ """Get attribute from an object, nested under a given path"""
58
+ path = path.split(".") if isinstance(path, str) else path
59
+ result = functools.reduce(lambda a, b: getattr(a, b, Missing), path, cls)
60
+
61
+ return default if result == Missing else result
62
+
63
+
64
+ def get_metaclass(cls: object) -> Type:
65
+ """Get the root metaclass for a given object"""
66
+ _tree = inspect.getclasstree([cls if isinstance(cls, Type) else type(cls)])
67
+
68
+ return _tree[0][0] if _tree else type(None)
69
+
70
+
71
+ def pushdown(func: Callable, parent: list[str] = None) -> Callable:
72
+ """Recursively apply a class method to all subclasses of a given class"""
73
+ parent = parent or []
74
+
75
+ @functools.wraps(func)
76
+ def wrapper(cls: T, *args, **kwargs) -> T:
77
+ _fields = {
78
+ f.name
79
+ for f in dataclasses.fields(cls)
80
+ if dataclasses.is_dataclass(f.type)
81
+ and issubclass(f.type, get_metaclass(cls))
82
+ }
83
+
84
+ for k in _fields:
85
+ # Affect current subclass
86
+ func(getattr(cls, k), *args, **kwargs)
87
+
88
+ # Recurse further for nodes underneath current subclass
89
+ pushdown(func, parent=[*parent, k])(getattr(cls, k), *args, **kwargs)
90
+
91
+ return cls
92
+
93
+ return wrapper
rmk2/file.py ADDED
@@ -0,0 +1,102 @@
1
+ import datetime
2
+ import json
3
+ import logging
4
+ import os
5
+ import pathlib
6
+ from enum import Enum
7
+ from typing import Iterator, Any
8
+
9
+ Expected = bool | str | int | float | datetime.date | datetime.datetime | None
10
+ Jsonified = bool | str | int | float | None
11
+
12
+
13
+ class WriteMode(Enum):
14
+ APPEND = "a"
15
+ CREATE = "x"
16
+ TRUNCATE = "w"
17
+
18
+
19
+ def _jsonify_types(row: dict[str, Expected]) -> dict[str, Jsonified]:
20
+ """Cast types that are not supported by JSON to more compatible types"""
21
+ _castable_types = {
22
+ datetime.date: lambda x: x.isoformat(),
23
+ datetime.datetime: lambda x: x.isoformat(timespec="microseconds"),
24
+ datetime.timedelta: lambda x: str(x),
25
+ }
26
+
27
+ return {k: _castable_types.get(type(v), lambda x: x)(v) for k, v in row.items()}
28
+
29
+
30
+ def write_jsonl(
31
+ data: Iterator | list[list[tuple[str, Any]] | dict[str, Any]],
32
+ path: pathlib.Path | str,
33
+ mode: WriteMode = WriteMode.CREATE,
34
+ ) -> None:
35
+ """Write serialised data to a given path/file"""
36
+ path = pathlib.Path(path) # Ensure path is a Path object
37
+
38
+ try:
39
+ assert isinstance(
40
+ mode, WriteMode
41
+ ), f"Mode needs to be one of {[x.name for x in WriteMode]}"
42
+
43
+ with open(path, mode=mode.value, encoding="utf-8") as outfile:
44
+ logging.debug(f"Writing serialised data, {path=}")
45
+
46
+ for line in data:
47
+ if isinstance(line, dict):
48
+ _line = line
49
+ elif isinstance(line, list) and isinstance(line[0], tuple):
50
+ _line = dict(line)
51
+ else:
52
+ raise ValueError("Data must be a list of key/value pairs")
53
+
54
+ # Add OS-specific newline after each row
55
+ outfile.write(json.dumps(dict(_jsonify_types(_line))))
56
+ outfile.write(os.linesep)
57
+
58
+ except (AssertionError, FileExistsError) as e:
59
+ logging.error(str(e))
60
+ raise e
61
+
62
+
63
+ def read_jsonl(
64
+ path: pathlib.Path | str,
65
+ ) -> Iterator | list[list[tuple[str, Jsonified]]]:
66
+ """Read JSONL serialised data from a given path/file"""
67
+ path = pathlib.Path(path) # Ensure path is a Path object
68
+
69
+ try:
70
+ with open(path, mode="r", encoding="utf-8") as infile:
71
+ logging.debug(f"Reading serialised data, {path=}")
72
+
73
+ yield from iter(json.loads(line) for line in infile)
74
+
75
+ except FileNotFoundError as e:
76
+ logging.error(str(e))
77
+ raise e
78
+
79
+
80
+ def delete_file(path: pathlib.Path | str) -> None:
81
+ """Delete a given data file"""
82
+ path = pathlib.Path(path) # Ensure path is a Path object
83
+
84
+ try:
85
+ logging.debug(f"Deleting serialised data, {path=}")
86
+ os.remove(path)
87
+ except FileNotFoundError as e:
88
+ logging.error(str(e))
89
+ raise e
90
+
91
+
92
+ def count_file(path: pathlib.Path | str) -> int:
93
+ """Count number of lines in a given path/file"""
94
+ path = pathlib.Path(path) # Ensure path is a Path object
95
+
96
+ _idx = 0
97
+
98
+ with open(path, mode="rb") as infile:
99
+ for _idx, _ in enumerate(infile, start=1):
100
+ pass
101
+
102
+ return _idx
rmk2/hash.py ADDED
@@ -0,0 +1,55 @@
1
+ import datetime
2
+ import hashlib
3
+ import logging
4
+ from enum import Enum
5
+ from typing import Any
6
+
7
+
8
+ class HashAlgorithm(Enum):
9
+ SHA256 = hashlib.sha256
10
+ SHA384 = hashlib.sha384
11
+ SHA512 = hashlib.sha512
12
+
13
+
14
+ def hash_values(
15
+ *args: Any,
16
+ algorithm: HashAlgorithm = HashAlgorithm.SHA256,
17
+ replace_null: str = "",
18
+ ) -> str:
19
+ """Hash various items to create consistent digests for a given value
20
+
21
+ Casts its input(s) to str, encodes it as UTF-8 bytes, then hashes it"""
22
+ _types = (
23
+ int,
24
+ float,
25
+ str,
26
+ bool,
27
+ datetime.datetime,
28
+ datetime.date,
29
+ datetime.timedelta,
30
+ )
31
+
32
+ assert (
33
+ algorithm in HashAlgorithm
34
+ ), f"Algorithm needs to be one of {[x.name for x in HashAlgorithm]}"
35
+
36
+ try:
37
+ _hash = algorithm.value(usedforsecurity=True)
38
+
39
+ for value in args:
40
+ # If replace_null is defined, replace NULL values with an unlikely string
41
+ # to ensure that (1, None) and (None, 1) are different hashes
42
+ if value is None or value == "":
43
+ value = replace_null
44
+ else:
45
+ assert (
46
+ type(value) in _types
47
+ ), f"Cannot cast to string, type={type(value)}"
48
+
49
+ _hash.update(str(value).encode(encoding="utf-8"))
50
+
51
+ return _hash.hexdigest()
52
+
53
+ except AssertionError as e:
54
+ logging.error(str(e))
55
+ raise e