rmk2 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rmk2/__init__.py +0 -0
- rmk2/api.py +57 -0
- rmk2/config/__init__.py +0 -0
- rmk2/config/mapping.py +230 -0
- rmk2/data/__init__.py +1 -0
- rmk2/data/data.py +70 -0
- rmk2/data/dictionary.py +47 -0
- rmk2/data/object.py +93 -0
- rmk2/file.py +102 -0
- rmk2/hash.py +55 -0
- rmk2-1.0.0.dist-info/METADATA +41 -0
- rmk2-1.0.0.dist-info/RECORD +14 -0
- rmk2-1.0.0.dist-info/WHEEL +4 -0
- rmk2-1.0.0.dist-info/licenses/LICENSE +674 -0
rmk2/__init__.py
ADDED
|
File without changes
|
rmk2/api.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any, Iterator
|
|
6
|
+
from urllib.parse import urlparse, urlunparse
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def save_response(prefix: str = None) -> callable:
|
|
11
|
+
"""Wrapper to save the return value of a function to JSON"""
|
|
12
|
+
|
|
13
|
+
def decorator(func: callable) -> callable:
|
|
14
|
+
@functools.wraps(func)
|
|
15
|
+
def wrapper(*args, **kwargs) -> Any:
|
|
16
|
+
response = func(*args, **kwargs)
|
|
17
|
+
|
|
18
|
+
_filename = os.path.join(
|
|
19
|
+
os.path.dirname(__file__),
|
|
20
|
+
prefix,
|
|
21
|
+
f"{func.__name__}_{str(uuid4())}.json",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
with open(_filename, mode="w", encoding="utf-8") as outfile:
|
|
25
|
+
_response = (
|
|
26
|
+
list(response) if isinstance(response, Iterator) else response
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
json.dump(_response, outfile)
|
|
30
|
+
|
|
31
|
+
return _response
|
|
32
|
+
|
|
33
|
+
return wrapper
|
|
34
|
+
|
|
35
|
+
return decorator
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_url(
|
|
39
|
+
base_url: str,
|
|
40
|
+
path: list[str] | None,
|
|
41
|
+
query: dict[str, str | int] | None,
|
|
42
|
+
) -> str:
|
|
43
|
+
"""Helper function to create valid URLs by extending a base URL"""
|
|
44
|
+
_scheme, _netloc, _path, _, _query, _ = urlparse(base_url)
|
|
45
|
+
|
|
46
|
+
_path = (
|
|
47
|
+
"/".join([re.sub(r"/$", "", _path), *[str(x) for x in path]])
|
|
48
|
+
if path is not None
|
|
49
|
+
else _path
|
|
50
|
+
)
|
|
51
|
+
_query = (
|
|
52
|
+
"&".join([f"{k}={str(v)}" for k, v in query.items()])
|
|
53
|
+
if query is not None
|
|
54
|
+
else {}
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return urlunparse((_scheme, _netloc, _path, None, _query, None))
|
rmk2/config/__init__.py
ADDED
|
File without changes
|
rmk2/config/mapping.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import dataclasses
|
|
3
|
+
import itertools
|
|
4
|
+
import os
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Optional, Sequence
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ConfigField(Enum):
|
|
11
|
+
ARGUMENT = "argument"
|
|
12
|
+
ATTRIBUTE = "attribute"
|
|
13
|
+
VARIABLE = "variable"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PrefixSeparator(Enum):
|
|
17
|
+
ARGUMENT = "-"
|
|
18
|
+
ATTRIBUTE = "."
|
|
19
|
+
VARIABLE = "_"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclasses.dataclass
|
|
23
|
+
class ConfigMapping:
|
|
24
|
+
attribute: str
|
|
25
|
+
argument: str | None = None
|
|
26
|
+
variable: str | None = None
|
|
27
|
+
default: Any = None
|
|
28
|
+
description: str = None
|
|
29
|
+
group: str | list[str] | None = None
|
|
30
|
+
|
|
31
|
+
def __post_init__(self):
|
|
32
|
+
self.argument = (self.argument or self.attribute).lower()
|
|
33
|
+
self.variable = (self.variable or self.attribute).upper()
|
|
34
|
+
|
|
35
|
+
if isinstance(self.group, str):
|
|
36
|
+
self.group = [self.group]
|
|
37
|
+
elif isinstance(self.group, list):
|
|
38
|
+
self.group = self.group
|
|
39
|
+
else:
|
|
40
|
+
self.group = []
|
|
41
|
+
|
|
42
|
+
def as_dict(self) -> dict[Any, Any]:
|
|
43
|
+
"""Return class attributes as dictionary"""
|
|
44
|
+
return dataclasses.asdict(self)
|
|
45
|
+
|
|
46
|
+
def as_tuple(self) -> list[tuple[Any, Any]]:
|
|
47
|
+
"""Return class attributes as list of tuples"""
|
|
48
|
+
return list(dataclasses.asdict(self).items())
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class BaseConfig:
|
|
52
|
+
mappings: list[ConfigMapping] | None = None
|
|
53
|
+
|
|
54
|
+
def __init__(self, mappings: Optional[Sequence[ConfigMapping]] = None):
|
|
55
|
+
mappings = deepcopy(mappings) if mappings is not None else []
|
|
56
|
+
|
|
57
|
+
self.mappings = self.mappings or []
|
|
58
|
+
self.merge(mappings=mappings)
|
|
59
|
+
self.update()
|
|
60
|
+
|
|
61
|
+
def generate_mappings(self) -> None:
|
|
62
|
+
"""Generate mappings for attributes that were set directly"""
|
|
63
|
+
attributes = [(k, v) for k, v in vars(self).items() if k != "mappings"]
|
|
64
|
+
_mapped = set([x.attribute for x in self.mappings])
|
|
65
|
+
_unmapped = [(k, v) for k, v in attributes if k not in _mapped]
|
|
66
|
+
|
|
67
|
+
mappings = [ConfigMapping(attribute=k, default=v) for k, v in _unmapped]
|
|
68
|
+
|
|
69
|
+
self.merge(mappings=mappings)
|
|
70
|
+
|
|
71
|
+
def merge(self, mappings: Optional[Sequence[ConfigMapping]]) -> None:
|
|
72
|
+
"""Merge multiple ConfigMapping definitions"""
|
|
73
|
+
assert type(mappings) in (tuple, list), f"Not a valid sequence: {mappings=}"
|
|
74
|
+
|
|
75
|
+
mappings = deepcopy(mappings)
|
|
76
|
+
|
|
77
|
+
_mappings_merged = {x.attribute: x for x in [*self.mappings, *mappings]}
|
|
78
|
+
self.mappings = list(_mappings_merged.values())
|
|
79
|
+
|
|
80
|
+
def remove_unmapped_attributes(self):
|
|
81
|
+
"""Remove attributes without valid attribute mappings, retain groups"""
|
|
82
|
+
attributes = set([k for k, v in vars(self).items() if k != "mappings"])
|
|
83
|
+
_mapped = set([x.attribute for x in self.mappings])
|
|
84
|
+
|
|
85
|
+
for attribute in set(attributes).difference(_mapped):
|
|
86
|
+
if isinstance(getattr(self, attribute), BaseConfig):
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
delattr(self, attribute)
|
|
90
|
+
|
|
91
|
+
def _set_prefix(
|
|
92
|
+
self,
|
|
93
|
+
field: ConfigField,
|
|
94
|
+
prefix: str,
|
|
95
|
+
group: str | list[str] | None = None,
|
|
96
|
+
merge_group: bool = False,
|
|
97
|
+
):
|
|
98
|
+
"""Set a common prefix for arguments or variables, optionally group-only"""
|
|
99
|
+
fields = {
|
|
100
|
+
ConfigField.ARGUMENT: (
|
|
101
|
+
ConfigField.ARGUMENT.value,
|
|
102
|
+
PrefixSeparator.ARGUMENT.value,
|
|
103
|
+
),
|
|
104
|
+
ConfigField.VARIABLE: (
|
|
105
|
+
ConfigField.VARIABLE.value,
|
|
106
|
+
PrefixSeparator.VARIABLE.value,
|
|
107
|
+
),
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
conversions = {
|
|
111
|
+
ConfigField.ARGUMENT: lambda x: x.lower(),
|
|
112
|
+
ConfigField.VARIABLE: lambda x: x.upper(),
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
_field, _separator = fields[field]
|
|
116
|
+
_prefix = conversions[field](prefix)
|
|
117
|
+
|
|
118
|
+
_group = group or []
|
|
119
|
+
_group = [group] if isinstance(group, str) else _group
|
|
120
|
+
_group = [conversions[field](g) for g in _group]
|
|
121
|
+
|
|
122
|
+
mappings = (
|
|
123
|
+
[x for x in self.mappings if x.group == _group]
|
|
124
|
+
if group is not None
|
|
125
|
+
else self.mappings
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
_mappings = []
|
|
129
|
+
|
|
130
|
+
for mapping in mappings:
|
|
131
|
+
_mapping = mapping.as_dict()
|
|
132
|
+
|
|
133
|
+
if isinstance(_group, list) and merge_group is True:
|
|
134
|
+
_members = [_prefix, *_group, _mapping[_field]]
|
|
135
|
+
else:
|
|
136
|
+
_members = [_prefix, _mapping[_field]]
|
|
137
|
+
|
|
138
|
+
_prefixed = _separator.join(_members)
|
|
139
|
+
|
|
140
|
+
_mappings.append(ConfigMapping(**{**_mapping, _field: _prefixed}))
|
|
141
|
+
|
|
142
|
+
self.merge(mappings=_mappings)
|
|
143
|
+
|
|
144
|
+
def set_argument_prefix(self, prefix: str, group: str = None):
|
|
145
|
+
"""Set a common prefix for arguments, optionally group-only"""
|
|
146
|
+
self._set_prefix(field=ConfigField.ARGUMENT, prefix=prefix, group=group)
|
|
147
|
+
|
|
148
|
+
def set_variable_prefix(self, prefix: str, group: str = None):
|
|
149
|
+
"""Set a common prefix for arguments, optionally group-only"""
|
|
150
|
+
self._set_prefix(field=ConfigField.VARIABLE, prefix=prefix, group=group)
|
|
151
|
+
|
|
152
|
+
def update(self, reset: bool = False) -> None:
|
|
153
|
+
"""Update attributes self.mappings"""
|
|
154
|
+
for mapping in self.mappings:
|
|
155
|
+
if (
|
|
156
|
+
hasattr(self, mapping.attribute)
|
|
157
|
+
and getattr(self, mapping.attribute) != mapping.default
|
|
158
|
+
and reset is False
|
|
159
|
+
):
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
setattr(self, mapping.attribute, mapping.default)
|
|
163
|
+
|
|
164
|
+
def update_from_args(self, args: argparse.Namespace) -> None:
|
|
165
|
+
"""Update attribute values from argparse arguments"""
|
|
166
|
+
# Transform Namespace into dict, remove None values to honour defaults
|
|
167
|
+
_args = {k: v for k, v in vars(args).items() if v is not None}
|
|
168
|
+
|
|
169
|
+
for m in self.mappings:
|
|
170
|
+
setattr(
|
|
171
|
+
self, m.attribute, _args.get(m.argument, getattr(self, m.attribute))
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def update_from_env(self) -> None:
|
|
175
|
+
"""Update attribute values from environment variables"""
|
|
176
|
+
for m in self.mappings:
|
|
177
|
+
_variable = m.variable.upper()
|
|
178
|
+
|
|
179
|
+
setattr(self, m.attribute, os.getenv(_variable, getattr(self, m.attribute)))
|
|
180
|
+
|
|
181
|
+
def update_groups(self) -> None:
|
|
182
|
+
"""Recursively create groups from mappings, resulting in a nested class tree"""
|
|
183
|
+
|
|
184
|
+
def __grouper(_mapping: ConfigMapping) -> str:
|
|
185
|
+
return _mapping.group[0]
|
|
186
|
+
|
|
187
|
+
_groups = [x for x in self.mappings if x.group not in ([], None)]
|
|
188
|
+
_sorted = sorted(_groups, key=__grouper)
|
|
189
|
+
_grouped = [(k, list(g)) for k, g in itertools.groupby(_sorted, __grouper)]
|
|
190
|
+
|
|
191
|
+
for group, mappings in _grouped:
|
|
192
|
+
# Remove mappings from parent
|
|
193
|
+
self.mappings = [x for x in self.mappings if x not in mappings]
|
|
194
|
+
|
|
195
|
+
_mappings = deepcopy(mappings)
|
|
196
|
+
|
|
197
|
+
# Push mapping groups down one level
|
|
198
|
+
for mapping in _mappings:
|
|
199
|
+
mapping.group = mapping.group[1:]
|
|
200
|
+
|
|
201
|
+
if not hasattr(self, group):
|
|
202
|
+
setattr(self, group, BaseConfig())
|
|
203
|
+
|
|
204
|
+
_group = getattr(self, group)
|
|
205
|
+
_group.merge(mappings=_mappings)
|
|
206
|
+
|
|
207
|
+
# Recurse into subgroups
|
|
208
|
+
if any([x.group is not None for x in _group.mappings]):
|
|
209
|
+
_group.update_groups()
|
|
210
|
+
|
|
211
|
+
# Update ungrouped mappings
|
|
212
|
+
_group.update()
|
|
213
|
+
|
|
214
|
+
# Clean up any attributes which have been pushed into a group
|
|
215
|
+
self.remove_unmapped_attributes()
|
|
216
|
+
|
|
217
|
+
def validate(self):
|
|
218
|
+
"""Validate that mappings are correct and mapped attributes exist"""
|
|
219
|
+
assert type(self.mappings) in (
|
|
220
|
+
list,
|
|
221
|
+
tuple,
|
|
222
|
+
), f"Invalid sequence: {self.mappings=}"
|
|
223
|
+
|
|
224
|
+
for mapping in self.mappings:
|
|
225
|
+
assert isinstance(
|
|
226
|
+
mapping, ConfigMapping
|
|
227
|
+
), f"Invalid mapping type: {mapping=}"
|
|
228
|
+
assert hasattr(
|
|
229
|
+
self, mapping.attribute
|
|
230
|
+
), f"Missing attribute: {mapping.attribute}"
|
rmk2/data/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from rmk2.data.data import Data
|
rmk2/data/data.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from typing import Any, Self
|
|
2
|
+
|
|
3
|
+
import rmk2.data.dictionary
|
|
4
|
+
import rmk2.data.object
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Data:
|
|
8
|
+
"""@DynamicAttrs"""
|
|
9
|
+
__annotations__: dict = {}
|
|
10
|
+
|
|
11
|
+
def __init__(self, **kwargs):
|
|
12
|
+
for k, v in kwargs.items():
|
|
13
|
+
if isinstance(v, dict):
|
|
14
|
+
setattr(self, k, type(self)(**v))
|
|
15
|
+
self.__annotations__[k] = type(self)
|
|
16
|
+
else:
|
|
17
|
+
setattr(self, k, v)
|
|
18
|
+
self.__annotations__[k] = type(v)
|
|
19
|
+
|
|
20
|
+
def __init_subclass__(cls, **kwargs):
|
|
21
|
+
cls.__init__ = rmk2.data.object.get_metaclass(cls).__init__
|
|
22
|
+
|
|
23
|
+
return cls
|
|
24
|
+
|
|
25
|
+
def __repr__(self) -> str:
|
|
26
|
+
return str(vars(self))
|
|
27
|
+
|
|
28
|
+
def __eq__(self, other):
|
|
29
|
+
return (
|
|
30
|
+
self.__todict__() == other.__todict__()
|
|
31
|
+
if isinstance(other, type(self))
|
|
32
|
+
else False
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def __todict__(self) -> dict:
|
|
36
|
+
"""Recursively cast instance to dict"""
|
|
37
|
+
return {
|
|
38
|
+
k: v.__todict__() if isinstance(v, type(self)) else v
|
|
39
|
+
for k, v in vars(self).items()
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
def get_path(self, path: list[str] | str) -> Any | None:
|
|
43
|
+
"""Get attribute from an object, nested under a given path"""
|
|
44
|
+
return rmk2.data.object.get_path(cls=self, path=path)
|
|
45
|
+
|
|
46
|
+
def merge(self, other: Self | dict) -> Self:
|
|
47
|
+
"""Recursively merge data into the current instance, key by key"""
|
|
48
|
+
self.__dict__ = vars(
|
|
49
|
+
rmk2.data.object.merge(
|
|
50
|
+
left=self,
|
|
51
|
+
right=type(self)(**other) if isinstance(other, dict) else other,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
return self
|
|
56
|
+
|
|
57
|
+
def put_path(self, path: list[str] | str, value: Any) -> Self:
|
|
58
|
+
"""Put a value as an attribute at a given path into an object"""
|
|
59
|
+
self.merge(
|
|
60
|
+
other=rmk2.data.dictionary.put_path(
|
|
61
|
+
data={}, path=path, value=value
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def from_path(cls, path: list[str] | str, value: Any) -> Self:
|
|
69
|
+
"""Initialise instance directly by putting a value at a given path"""
|
|
70
|
+
return cls().put_path(path=path, value=value)
|
rmk2/data/dictionary.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
from typing import Any, TypeVar
|
|
3
|
+
|
|
4
|
+
T = TypeVar("T")
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def merge(
|
|
8
|
+
left: dict[str, Any], right: dict[str, Any], merge_lists: bool = False
|
|
9
|
+
) -> dict[str, Any]:
|
|
10
|
+
"""Recursively merge two dictionaries, key by key"""
|
|
11
|
+
result = {}
|
|
12
|
+
|
|
13
|
+
# Combine keys via iteration instead of a set to maintain key ordering
|
|
14
|
+
for k in [*left.keys(), *[x for x in right.keys() if x not in left.keys()]]:
|
|
15
|
+
_left = left.get(k)
|
|
16
|
+
_right = right.get(k)
|
|
17
|
+
|
|
18
|
+
if isinstance(_left, dict) and isinstance(_right, dict):
|
|
19
|
+
merged = merge(left=_left, right=_right, merge_lists=merge_lists)
|
|
20
|
+
elif isinstance(_left, list) and isinstance(_right, list) and merge_lists:
|
|
21
|
+
merged = [*_left, *_right]
|
|
22
|
+
elif not _left and _right is not None:
|
|
23
|
+
merged = _right
|
|
24
|
+
else:
|
|
25
|
+
merged = _right or _left
|
|
26
|
+
|
|
27
|
+
result[k] = merged
|
|
28
|
+
|
|
29
|
+
return result
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_path(
|
|
33
|
+
data: dict[str, dict | T], path: list[str] | str, default: Any | None = None
|
|
34
|
+
) -> T | None:
|
|
35
|
+
"""Get a value at a given path from a dictionary"""
|
|
36
|
+
path = path.split(".") if isinstance(path, str) else path
|
|
37
|
+
result = functools.reduce(lambda a, b: a.get(b, {}), path, data)
|
|
38
|
+
|
|
39
|
+
return default if (result == {} and default) else result
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def put_path(data: dict[str, dict], path: list[str] | str, value: Any) -> dict:
|
|
43
|
+
"""Put a value at a given path into a dictionary"""
|
|
44
|
+
path = path.split(".") if isinstance(path, str) else path
|
|
45
|
+
branch = functools.reduce(lambda a, b: {b: a}, reversed(path), value)
|
|
46
|
+
|
|
47
|
+
return merge(left=data, right=branch, merge_lists=False)
|
rmk2/data/object.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import dataclasses
|
|
3
|
+
import functools
|
|
4
|
+
import inspect
|
|
5
|
+
from typing import TypeVar, Callable, Type, Any
|
|
6
|
+
|
|
7
|
+
import rmk2.data.dictionary
|
|
8
|
+
|
|
9
|
+
T = TypeVar("T")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Missing:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def merge(left: T, right: T, merge_lists: bool = False, merge_dicts: bool = False) -> T:
|
|
17
|
+
"""Recursively merge two class instances, key by key"""
|
|
18
|
+
assert type(left) is type(
|
|
19
|
+
right
|
|
20
|
+
), f"Cannot merge different Types, left='{type(left)}', right='{type(right)}'"
|
|
21
|
+
|
|
22
|
+
primitives = [int, str, float, bool, list, dict, set, type(None)]
|
|
23
|
+
kwargs = {}
|
|
24
|
+
|
|
25
|
+
for k in {*vars(left).keys(), *vars(right).keys()}:
|
|
26
|
+
_left = getattr(left, k, None)
|
|
27
|
+
_right = getattr(right, k, None)
|
|
28
|
+
|
|
29
|
+
if (
|
|
30
|
+
type(_left) not in primitives
|
|
31
|
+
and type(_right) not in primitives
|
|
32
|
+
and type(_left) is type(_right)
|
|
33
|
+
):
|
|
34
|
+
merged = merge(
|
|
35
|
+
left=_left,
|
|
36
|
+
right=_right,
|
|
37
|
+
merge_lists=merge_lists,
|
|
38
|
+
merge_dicts=merge_dicts,
|
|
39
|
+
)
|
|
40
|
+
elif isinstance(_left, list) and isinstance(_right, list) and merge_lists:
|
|
41
|
+
merged = [*_left, *_right]
|
|
42
|
+
elif isinstance(_left, dict) and isinstance(_right, dict) and merge_dicts:
|
|
43
|
+
merged = rmk2.data.dictionary.merge(
|
|
44
|
+
left=_left, right=_right, merge_lists=merge_lists
|
|
45
|
+
)
|
|
46
|
+
elif not _left and _right is not None:
|
|
47
|
+
merged = _right
|
|
48
|
+
else:
|
|
49
|
+
merged = _right or _left
|
|
50
|
+
|
|
51
|
+
kwargs[k] = merged
|
|
52
|
+
|
|
53
|
+
return type(left)(**kwargs)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_path(cls: object, path: list[str] | str, default: Any | None = None) -> Any:
|
|
57
|
+
"""Get attribute from an object, nested under a given path"""
|
|
58
|
+
path = path.split(".") if isinstance(path, str) else path
|
|
59
|
+
result = functools.reduce(lambda a, b: getattr(a, b, Missing), path, cls)
|
|
60
|
+
|
|
61
|
+
return default if result == Missing else result
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_metaclass(cls: object) -> Type:
|
|
65
|
+
"""Get the root metaclass for a given object"""
|
|
66
|
+
_tree = inspect.getclasstree([cls if isinstance(cls, Type) else type(cls)])
|
|
67
|
+
|
|
68
|
+
return _tree[0][0] if _tree else type(None)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def pushdown(func: Callable, parent: list[str] = None) -> Callable:
|
|
72
|
+
"""Recursively apply a class method to all subclasses of a given class"""
|
|
73
|
+
parent = parent or []
|
|
74
|
+
|
|
75
|
+
@functools.wraps(func)
|
|
76
|
+
def wrapper(cls: T, *args, **kwargs) -> T:
|
|
77
|
+
_fields = {
|
|
78
|
+
f.name
|
|
79
|
+
for f in dataclasses.fields(cls)
|
|
80
|
+
if dataclasses.is_dataclass(f.type)
|
|
81
|
+
and issubclass(f.type, get_metaclass(cls))
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
for k in _fields:
|
|
85
|
+
# Affect current subclass
|
|
86
|
+
func(getattr(cls, k), *args, **kwargs)
|
|
87
|
+
|
|
88
|
+
# Recurse further for nodes underneath current subclass
|
|
89
|
+
pushdown(func, parent=[*parent, k])(getattr(cls, k), *args, **kwargs)
|
|
90
|
+
|
|
91
|
+
return cls
|
|
92
|
+
|
|
93
|
+
return wrapper
|
rmk2/file.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import pathlib
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Iterator, Any
|
|
8
|
+
|
|
9
|
+
Expected = bool | str | int | float | datetime.date | datetime.datetime | None
|
|
10
|
+
Jsonified = bool | str | int | float | None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class WriteMode(Enum):
|
|
14
|
+
APPEND = "a"
|
|
15
|
+
CREATE = "x"
|
|
16
|
+
TRUNCATE = "w"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _jsonify_types(row: dict[str, Expected]) -> dict[str, Jsonified]:
|
|
20
|
+
"""Cast types that are not supported by JSON to more compatible types"""
|
|
21
|
+
_castable_types = {
|
|
22
|
+
datetime.date: lambda x: x.isoformat(),
|
|
23
|
+
datetime.datetime: lambda x: x.isoformat(timespec="microseconds"),
|
|
24
|
+
datetime.timedelta: lambda x: str(x),
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return {k: _castable_types.get(type(v), lambda x: x)(v) for k, v in row.items()}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def write_jsonl(
|
|
31
|
+
data: Iterator | list[list[tuple[str, Any]] | dict[str, Any]],
|
|
32
|
+
path: pathlib.Path | str,
|
|
33
|
+
mode: WriteMode = WriteMode.CREATE,
|
|
34
|
+
) -> None:
|
|
35
|
+
"""Write serialised data to a given path/file"""
|
|
36
|
+
path = pathlib.Path(path) # Ensure path is a Path object
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
assert isinstance(
|
|
40
|
+
mode, WriteMode
|
|
41
|
+
), f"Mode needs to be one of {[x.name for x in WriteMode]}"
|
|
42
|
+
|
|
43
|
+
with open(path, mode=mode.value, encoding="utf-8") as outfile:
|
|
44
|
+
logging.debug(f"Writing serialised data, {path=}")
|
|
45
|
+
|
|
46
|
+
for line in data:
|
|
47
|
+
if isinstance(line, dict):
|
|
48
|
+
_line = line
|
|
49
|
+
elif isinstance(line, list) and isinstance(line[0], tuple):
|
|
50
|
+
_line = dict(line)
|
|
51
|
+
else:
|
|
52
|
+
raise ValueError("Data must be a list of key/value pairs")
|
|
53
|
+
|
|
54
|
+
# Add OS-specific newline after each row
|
|
55
|
+
outfile.write(json.dumps(dict(_jsonify_types(_line))))
|
|
56
|
+
outfile.write(os.linesep)
|
|
57
|
+
|
|
58
|
+
except (AssertionError, FileExistsError) as e:
|
|
59
|
+
logging.error(str(e))
|
|
60
|
+
raise e
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def read_jsonl(
|
|
64
|
+
path: pathlib.Path | str,
|
|
65
|
+
) -> Iterator | list[list[tuple[str, Jsonified]]]:
|
|
66
|
+
"""Read JSONL serialised data from a given path/file"""
|
|
67
|
+
path = pathlib.Path(path) # Ensure path is a Path object
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
with open(path, mode="r", encoding="utf-8") as infile:
|
|
71
|
+
logging.debug(f"Reading serialised data, {path=}")
|
|
72
|
+
|
|
73
|
+
yield from iter(json.loads(line) for line in infile)
|
|
74
|
+
|
|
75
|
+
except FileNotFoundError as e:
|
|
76
|
+
logging.error(str(e))
|
|
77
|
+
raise e
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def delete_file(path: pathlib.Path | str) -> None:
|
|
81
|
+
"""Delete a given data file"""
|
|
82
|
+
path = pathlib.Path(path) # Ensure path is a Path object
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
logging.debug(f"Deleting serialised data, {path=}")
|
|
86
|
+
os.remove(path)
|
|
87
|
+
except FileNotFoundError as e:
|
|
88
|
+
logging.error(str(e))
|
|
89
|
+
raise e
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def count_file(path: pathlib.Path | str) -> int:
|
|
93
|
+
"""Count number of lines in a given path/file"""
|
|
94
|
+
path = pathlib.Path(path) # Ensure path is a Path object
|
|
95
|
+
|
|
96
|
+
_idx = 0
|
|
97
|
+
|
|
98
|
+
with open(path, mode="rb") as infile:
|
|
99
|
+
for _idx, _ in enumerate(infile, start=1):
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
return _idx
|
rmk2/hash.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import hashlib
|
|
3
|
+
import logging
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class HashAlgorithm(Enum):
|
|
9
|
+
SHA256 = hashlib.sha256
|
|
10
|
+
SHA384 = hashlib.sha384
|
|
11
|
+
SHA512 = hashlib.sha512
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def hash_values(
|
|
15
|
+
*args: Any,
|
|
16
|
+
algorithm: HashAlgorithm = HashAlgorithm.SHA256,
|
|
17
|
+
replace_null: str = "",
|
|
18
|
+
) -> str:
|
|
19
|
+
"""Hash various items to create consistent digests for a given value
|
|
20
|
+
|
|
21
|
+
Casts its input(s) to str, encodes it as UTF-8 bytes, then hashes it"""
|
|
22
|
+
_types = (
|
|
23
|
+
int,
|
|
24
|
+
float,
|
|
25
|
+
str,
|
|
26
|
+
bool,
|
|
27
|
+
datetime.datetime,
|
|
28
|
+
datetime.date,
|
|
29
|
+
datetime.timedelta,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
assert (
|
|
33
|
+
algorithm in HashAlgorithm
|
|
34
|
+
), f"Algorithm needs to be one of {[x.name for x in HashAlgorithm]}"
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
_hash = algorithm.value(usedforsecurity=True)
|
|
38
|
+
|
|
39
|
+
for value in args:
|
|
40
|
+
# If replace_null is defined, replace NULL values with an unlikely string
|
|
41
|
+
# to ensure that (1, None) and (None, 1) are different hashes
|
|
42
|
+
if value is None or value == "":
|
|
43
|
+
value = replace_null
|
|
44
|
+
else:
|
|
45
|
+
assert (
|
|
46
|
+
type(value) in _types
|
|
47
|
+
), f"Cannot cast to string, type={type(value)}"
|
|
48
|
+
|
|
49
|
+
_hash.update(str(value).encode(encoding="utf-8"))
|
|
50
|
+
|
|
51
|
+
return _hash.hexdigest()
|
|
52
|
+
|
|
53
|
+
except AssertionError as e:
|
|
54
|
+
logging.error(str(e))
|
|
55
|
+
raise e
|