PostBOUND 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- postbound/__init__.py +211 -0
- postbound/_base.py +6 -0
- postbound/_bench.py +1012 -0
- postbound/_core.py +1153 -0
- postbound/_hints.py +1373 -0
- postbound/_jointree.py +1079 -0
- postbound/_pipelines.py +1121 -0
- postbound/_qep.py +1986 -0
- postbound/_stages.py +876 -0
- postbound/_validation.py +734 -0
- postbound/db/__init__.py +72 -0
- postbound/db/_db.py +2348 -0
- postbound/db/_duckdb.py +785 -0
- postbound/db/mysql.py +1195 -0
- postbound/db/postgres.py +4216 -0
- postbound/experiments/__init__.py +12 -0
- postbound/experiments/analysis.py +674 -0
- postbound/experiments/benchmarking.py +54 -0
- postbound/experiments/ceb.py +877 -0
- postbound/experiments/interactive.py +105 -0
- postbound/experiments/querygen.py +334 -0
- postbound/experiments/workloads.py +980 -0
- postbound/optimizer/__init__.py +92 -0
- postbound/optimizer/__init__.pyi +73 -0
- postbound/optimizer/_cardinalities.py +369 -0
- postbound/optimizer/_joingraph.py +1150 -0
- postbound/optimizer/dynprog.py +1825 -0
- postbound/optimizer/enumeration.py +432 -0
- postbound/optimizer/native.py +539 -0
- postbound/optimizer/noopt.py +54 -0
- postbound/optimizer/presets.py +147 -0
- postbound/optimizer/randomized.py +650 -0
- postbound/optimizer/tonic.py +1479 -0
- postbound/optimizer/ues.py +1607 -0
- postbound/qal/__init__.py +343 -0
- postbound/qal/_qal.py +9678 -0
- postbound/qal/formatter.py +1089 -0
- postbound/qal/parser.py +2344 -0
- postbound/qal/relalg.py +4257 -0
- postbound/qal/transform.py +2184 -0
- postbound/shortcuts.py +70 -0
- postbound/util/__init__.py +46 -0
- postbound/util/_errors.py +33 -0
- postbound/util/collections.py +490 -0
- postbound/util/dataframe.py +71 -0
- postbound/util/dicts.py +330 -0
- postbound/util/jsonize.py +68 -0
- postbound/util/logging.py +106 -0
- postbound/util/misc.py +168 -0
- postbound/util/networkx.py +401 -0
- postbound/util/numbers.py +438 -0
- postbound/util/proc.py +107 -0
- postbound/util/stats.py +37 -0
- postbound/util/system.py +48 -0
- postbound/util/typing.py +35 -0
- postbound/vis/__init__.py +5 -0
- postbound/vis/fdl.py +69 -0
- postbound/vis/graphs.py +48 -0
- postbound/vis/optimizer.py +538 -0
- postbound/vis/plots.py +84 -0
- postbound/vis/tonic.py +70 -0
- postbound/vis/trees.py +105 -0
- postbound-0.19.0.dist-info/METADATA +355 -0
- postbound-0.19.0.dist-info/RECORD +67 -0
- postbound-0.19.0.dist-info/WHEEL +5 -0
- postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
- postbound-0.19.0.dist-info/top_level.txt +1 -0
postbound/util/dicts.py
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""Contains utilities to access and modify dictionaries more conveniently."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
import collections
|
|
8
|
+
import itertools
|
|
9
|
+
import numbers
|
|
10
|
+
import typing
|
|
11
|
+
import warnings
|
|
12
|
+
from collections.abc import Callable, Iterable, Sequence
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
T = typing.TypeVar("T")
|
|
16
|
+
K = typing.TypeVar("K")
|
|
17
|
+
V = typing.TypeVar("V")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def stringify(d: dict[K, V]) -> str:
|
|
21
|
+
"""Generates a string-representation of a dictionary.
|
|
22
|
+
|
|
23
|
+
In contrast to calling ``str()`` directly, this method generates proper string representations of both keys and values and
|
|
24
|
+
does not use ``repr()`` for them. Nested objects are stilled formatted according to ``str()`` however.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
d : dict[K, V]
|
|
29
|
+
The dictionary to stringify
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
str
|
|
34
|
+
The string representation
|
|
35
|
+
"""
|
|
36
|
+
items_str = ", ".join(f"{k}: {v}" for k, v in d.items())
|
|
37
|
+
return "{" + items_str + "}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def key(dictionary: dict[K, V]) -> K:
|
|
41
|
+
"""Provides the key of a dictionary with just 1 item.
|
|
42
|
+
|
|
43
|
+
`key({'a': 1}) = 'a'`
|
|
44
|
+
"""
|
|
45
|
+
if not len(dictionary) == 1:
|
|
46
|
+
nvals = len(dictionary)
|
|
47
|
+
raise ValueError(
|
|
48
|
+
f"Dictionary must contain exactly 1 entry, not {nvals}: {dictionary}"
|
|
49
|
+
)
|
|
50
|
+
return next(iter(dictionary.keys()))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def value(dictionary: dict[K, V]) -> V:
|
|
54
|
+
"""Provides the value of a dictionary with just 1 item.
|
|
55
|
+
|
|
56
|
+
`value({'a': 1}) = 1`
|
|
57
|
+
"""
|
|
58
|
+
if not len(dictionary) == 1:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
"Dictionary must contain exactly 1 entry, not " + str(len(dictionary))
|
|
61
|
+
)
|
|
62
|
+
return next(iter(dictionary.values()))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def difference(a: dict[K, V], b: dict[K, V]) -> dict[K, V]:
|
|
66
|
+
"""Computes the set difference between two dictionaries based on their keys.
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
a : dict[K, V]
|
|
71
|
+
The dict to remove entries from
|
|
72
|
+
b : dict[K, V]
|
|
73
|
+
The entries to remove
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
dict[K, V]
|
|
78
|
+
A dictionary that contains all *key, value* pairs from *a* where the *key* is not in *b*.
|
|
79
|
+
"""
|
|
80
|
+
return {k: v for k, v in a.items() if k not in b}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def intersection(a: dict[K, V], b: dict[K, V]) -> dict[K, V]:
|
|
84
|
+
"""Computes the set intersection between two dictionaries based on their keys.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
a : dict[K, V]
|
|
89
|
+
The first dictionary
|
|
90
|
+
b : dict[K, V]
|
|
91
|
+
The second dictionary
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
dict[K, V]
|
|
96
|
+
A dictionary that contains all *key, value* pairs from *a* where the *key* is also in *b*.
|
|
97
|
+
"""
|
|
98
|
+
return {k: v for k, v in a.items() if k in b}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def merge(
|
|
102
|
+
a: dict[K, V], b: dict[K, V], *, updater: Optional[Callable[[K, V, V], V]] = None
|
|
103
|
+
) -> dict[K, V]:
|
|
104
|
+
"""Creates a new dict containing all key/values pairs from both argument dictionaries.
|
|
105
|
+
|
|
106
|
+
If keys overlap, entries from dictionary `b` will take priority, unless an `update` method is given.
|
|
107
|
+
If `update` is given, and `a[k] = v` and `b[k] = v'` (i.e. both `a` and `b` share a key `k`) the merged dictionary
|
|
108
|
+
will contain the result of `update(k, v, v')` as entry for `k`.
|
|
109
|
+
|
|
110
|
+
Note that as of Python 3.9, such a method was added to dictionaries as well (via the `|=` syntax). Our current
|
|
111
|
+
implementation is not optimized for larger dictionaries and will probably have a pretty bad performance on such
|
|
112
|
+
input data.
|
|
113
|
+
"""
|
|
114
|
+
if not updater:
|
|
115
|
+
return dict([*a.items()] + [*b.items()])
|
|
116
|
+
else:
|
|
117
|
+
merged = dict(a)
|
|
118
|
+
for k, v in b.items():
|
|
119
|
+
if k in merged:
|
|
120
|
+
merged[k] = updater(k, merged[k], v)
|
|
121
|
+
else:
|
|
122
|
+
merged[k] = v
|
|
123
|
+
return merged
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def update(dictionary: dict[K, V], updater: Callable[[K, V], T]) -> dict[K, T]:
|
|
127
|
+
"""Creates a new dict by calling update on each key/value pair on the old dict, retaining its keys."""
|
|
128
|
+
return {k: updater(k, v) for k, v in dictionary.items()}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def explode(dictionary: dict[K, list[V]]) -> list[tuple[K, V]]:
|
|
132
|
+
"""Transforms dicts mapping keys to lists of values to a list of key/value pairs."""
|
|
133
|
+
values: list[tuple[K, V]] = []
|
|
134
|
+
for k, dict_values in dictionary.items():
|
|
135
|
+
values.extend(zip(itertools.cycle([k]), dict_values))
|
|
136
|
+
return values
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def hash_dict(dictionary: dict[K, V]) -> int:
|
|
140
|
+
"""Calculates a hash value based on the current dict contents (keys and values)."""
|
|
141
|
+
keys = list(dictionary.keys())
|
|
142
|
+
values = []
|
|
143
|
+
for val in dictionary.values():
|
|
144
|
+
if isinstance(val, collections.abc.Hashable):
|
|
145
|
+
values.append(hash(val))
|
|
146
|
+
elif isinstance(val, list) or isinstance(val, set):
|
|
147
|
+
values.append(hash(tuple(val)))
|
|
148
|
+
elif isinstance(val, dict):
|
|
149
|
+
values.append(hash_dict(val))
|
|
150
|
+
else:
|
|
151
|
+
warnings.warn("Unhashable type, skipping: " + type(val))
|
|
152
|
+
keys_hash = hash(tuple(keys))
|
|
153
|
+
values_hash = hash(tuple(values))
|
|
154
|
+
return hash((keys_hash, values_hash))
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def generate_multi(entries: list[tuple[K, V]]) -> dict[K, list[V]]:
|
|
158
|
+
"""Generates a multi-dict based on its entries.
|
|
159
|
+
|
|
160
|
+
Each key can occur multiple times and values will be aggregated in a list.
|
|
161
|
+
"""
|
|
162
|
+
collector = collections.defaultdict(list)
|
|
163
|
+
for k, v in entries:
|
|
164
|
+
collector[k].append(v)
|
|
165
|
+
return dict(collector)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def reduce_multi(
|
|
169
|
+
multi_dict: dict[K, list[V]], reduction: Callable[[K, list[V]], V]
|
|
170
|
+
) -> dict[K, V]:
|
|
171
|
+
"""Ungroups a multi-dict by aggregating the values based on key and values."""
|
|
172
|
+
return {k: reduction(k, vs) for k, vs in multi_dict.items()}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def invert_multi(mapping: dict[K, list[V]]) -> dict[V, list[K]]:
|
|
176
|
+
"""Inverts the `key -> values` mapping of a dict to become `value -> keys` instead.
|
|
177
|
+
|
|
178
|
+
Supppose a multi-dict has the following contents: `{'a': [1, 2], 'b': [2, 3]}`.
|
|
179
|
+
Calling `invert` transforms this mapping to `{1: ['a'], 2: ['a', 'b'], 3: ['b']}`.
|
|
180
|
+
"""
|
|
181
|
+
level1 = {tuple(vs): k for k, vs in mapping.items()}
|
|
182
|
+
level2: dict[V, list[K]] = {}
|
|
183
|
+
for vs, k in level1.items():
|
|
184
|
+
for v in vs:
|
|
185
|
+
if v not in level2:
|
|
186
|
+
level2[v] = [k]
|
|
187
|
+
else:
|
|
188
|
+
level2[v].append(k)
|
|
189
|
+
return level2
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def aggregate(dictionaries: Iterable[dict[K, V]]) -> dict[K, Sequence[V]]:
|
|
193
|
+
aggregated_dict = collections.defaultdict(list)
|
|
194
|
+
for d in dictionaries:
|
|
195
|
+
for k, v in d.items():
|
|
196
|
+
aggregated_dict[k].append(v)
|
|
197
|
+
return dict(aggregated_dict)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def invert(mapping: dict[K, V]) -> dict[V, K]:
|
|
201
|
+
"""Inverts the `key -> value` mapping of a dict to become `value -> key` instead.
|
|
202
|
+
|
|
203
|
+
In contrast to `invert_multi` this does not handle duplicate values (which leads to duplicate keys), nor does it
|
|
204
|
+
process the original values of `mapping` in any way.
|
|
205
|
+
|
|
206
|
+
Basically, this function is just a better-readable shortcut for `{v: k for k, v in d.items()}`.
|
|
207
|
+
"""
|
|
208
|
+
return {v: k for k, v in mapping.items()}
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def argmin(mapping: dict[K, numbers.Number]) -> K:
|
|
212
|
+
"""
|
|
213
|
+
For a dict mapping keys to numeric types, returns the key `k` with minimum value `v`, s.t. for all keys `k'` with
|
|
214
|
+
values `v'` it holds that `v <= v'`.
|
|
215
|
+
"""
|
|
216
|
+
return min(mapping, key=mapping.get)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def dict_to_numpy(data: dict[K, V]) -> np.array[V]:
|
|
220
|
+
sorted_dict = {k: data[k] for k in sorted(data)}
|
|
221
|
+
return np.asarray(list(sorted_dict.values()))
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class HashableDict(collections.UserDict[K, V]):
|
|
225
|
+
"""A dictionary implementation that can be hashed.
|
|
226
|
+
|
|
227
|
+
Warnings
|
|
228
|
+
--------
|
|
229
|
+
This type should be used with extreme caution in order to not violate any invariants due to unintended data modification.
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
def __hash__(self) -> int:
|
|
233
|
+
return hash_dict(self.data)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class CustomHashDict(collections.UserDict[K, V]):
|
|
237
|
+
"""Wrapper of a normal Python dictionary that uses a custom hash function instead of the default hash() method.
|
|
238
|
+
|
|
239
|
+
All non-hashing related behavior is directly inherited from the default Python dictionary. Only the item access is changed
|
|
240
|
+
to enforce the usage of the new hashing function.
|
|
241
|
+
|
|
242
|
+
Notice that since the custom hash function always provides an integer value, collision detection is weaker than originally.
|
|
243
|
+
This is because the actual dictionary never sees the original keys to run an equality comparison. Instead, the comparison
|
|
244
|
+
is based on the integer values.
|
|
245
|
+
|
|
246
|
+
Parameters
|
|
247
|
+
----------
|
|
248
|
+
hash_func : Callable[[K], int]
|
|
249
|
+
The hashing function to use. It receives the key as input and must produce a valid hash value as output.
|
|
250
|
+
**kwargs : dict, optional
|
|
251
|
+
Additional keyword arguments that should be passed to the hashing function upon each invocation.
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
def __init__(self, hash_func: Callable[[K], int], **kwargs) -> None:
|
|
255
|
+
super().__init__()
|
|
256
|
+
self.hash_function = hash_func
|
|
257
|
+
self._hash_args = kwargs
|
|
258
|
+
|
|
259
|
+
def _apply_hash(self, key: K) -> int:
|
|
260
|
+
return self.hash_function(key, **self._hash_args)
|
|
261
|
+
|
|
262
|
+
def __getitem__(self, k: K) -> V:
|
|
263
|
+
return super().__getitem__(self._apply_hash(k))
|
|
264
|
+
|
|
265
|
+
def __setitem__(self, k: K, item: V) -> None:
|
|
266
|
+
super().__setitem__(self._apply_hash(k), item)
|
|
267
|
+
|
|
268
|
+
def __delitem__(self, key: K) -> None:
|
|
269
|
+
return super().__delitem__(self._apply_hash(key))
|
|
270
|
+
|
|
271
|
+
def __contains__(self, key: K) -> bool:
|
|
272
|
+
return super().__contains__(self._apply_hash(key))
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class DynamicDefaultDict(collections.UserDict[K, V]):
|
|
276
|
+
"""Wrapper of a normal Python `defaultdict` that permits dynamic default values.
|
|
277
|
+
|
|
278
|
+
When using a standard Python `defaultdict`, the default value must be decided up-front. This value is used for all missing
|
|
279
|
+
keys. In contrast, this dictionary implementation allows for the default value to be calculated based on the requested key.
|
|
280
|
+
|
|
281
|
+
Parameters
|
|
282
|
+
----------
|
|
283
|
+
factory : Callable[[K], V]
|
|
284
|
+
A function that generates the value based on a missing key. It receives the key as input and must return the value.
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
def __init__(self, factory: Callable[[K], V]) -> None:
|
|
288
|
+
super().__init__()
|
|
289
|
+
self.factory = factory
|
|
290
|
+
|
|
291
|
+
def __getitem__(self, k: K) -> V:
|
|
292
|
+
if k not in self.data:
|
|
293
|
+
self.data[k] = self.factory(k)
|
|
294
|
+
return self.data[k]
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class frozendict(collections.UserDict[K, V]):
|
|
298
|
+
"""Read-only variant of a normal Python dictionary.
|
|
299
|
+
|
|
300
|
+
Once the dictionary has been created, its key/value pairs can no longer be modified. At the same time, this allows the
|
|
301
|
+
dictionary to be hashable by default.
|
|
302
|
+
|
|
303
|
+
Parameters
|
|
304
|
+
----------
|
|
305
|
+
items : any, optional
|
|
306
|
+
Supports the same argument types as the normal dictionary. If no items are supplied, an empty frozen dictionary is
|
|
307
|
+
returned.
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
def __init__(self, items=None) -> None:
|
|
311
|
+
self._frozen = False
|
|
312
|
+
super().__init__(items)
|
|
313
|
+
self.clear = None
|
|
314
|
+
self.pop = None
|
|
315
|
+
self.popitem = None
|
|
316
|
+
self.update
|
|
317
|
+
self._frozen = True
|
|
318
|
+
|
|
319
|
+
def __setitem__(self, key: K, item: V) -> None:
|
|
320
|
+
if self._frozen:
|
|
321
|
+
raise TypeError("Cannot set frozendict entries after creation")
|
|
322
|
+
return super().__setitem__(key, item)
|
|
323
|
+
|
|
324
|
+
def __delitem__(self, key: K) -> None:
|
|
325
|
+
if self._frozen:
|
|
326
|
+
raise TypeError("Cannot remove frozendict entries after creation")
|
|
327
|
+
return super().__delitem__(key)
|
|
328
|
+
|
|
329
|
+
def __hash__(self) -> int:
|
|
330
|
+
return hash_dict(self)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Contains utilities to store and load objects more conveniently to/from JSON.
|
|
2
|
+
|
|
3
|
+
More specifically, this module introduces the `JsonizeEncoder`, which can be accessed via the `to_json` utility method.
|
|
4
|
+
This encoder allows to transform instances of any class to JSON by providing a `__json__` method in the class
|
|
5
|
+
implementation. This method does not take any (required) parameters and returns a JSON-izeable representation of the
|
|
6
|
+
current instance, e.g. a `dict` or a `list`.
|
|
7
|
+
|
|
8
|
+
Sadly (or luckily?), the inverse conversion does not work because JSON does not store any type information.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import abc
|
|
14
|
+
import enum
|
|
15
|
+
import json
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import IO, Any, Protocol, runtime_checkable
|
|
18
|
+
|
|
19
|
+
jsondict = dict
|
|
20
|
+
"""Type alias for a JSON-izeable dictionary."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@runtime_checkable
|
|
24
|
+
class Jsonizable(Protocol):
|
|
25
|
+
"""Protocol to indicate that a certain class provides the `__json__` method."""
|
|
26
|
+
|
|
27
|
+
@abc.abstractmethod
|
|
28
|
+
def __json__(self) -> jsondict:
|
|
29
|
+
raise NotImplementedError
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class JsonizeEncoder(json.JSONEncoder):
|
|
33
|
+
"""The JsonizeEncoder allows to transform instances of any class to JSON.
|
|
34
|
+
|
|
35
|
+
This can be achieved by providing a `__json__` method in the class implementation. This method does not take any
|
|
36
|
+
(required) parameters and returns a JSON-izeable representation of the current instance, e.g. a `dict` or a `list`.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def default(self, obj: Any) -> Any:
|
|
40
|
+
if isinstance(obj, enum.Enum):
|
|
41
|
+
return obj.value
|
|
42
|
+
elif isinstance(obj, (set, frozenset)):
|
|
43
|
+
return list(obj)
|
|
44
|
+
elif isinstance(obj, Path):
|
|
45
|
+
return str(obj)
|
|
46
|
+
elif "__json__" in dir(obj):
|
|
47
|
+
return obj.__json__()
|
|
48
|
+
return json.JSONEncoder.default(self, obj)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def to_json(obj: Any, *args, **kwargs) -> str | None:
|
|
52
|
+
"""Utility to transform any object to a JSON object, while making use of the `JsonizeEncoder`.
|
|
53
|
+
|
|
54
|
+
All arguments other than the object itself are passed to the default Python `json.dumps` function.
|
|
55
|
+
"""
|
|
56
|
+
if obj is None:
|
|
57
|
+
return None
|
|
58
|
+
kwargs.pop("cls", None)
|
|
59
|
+
return json.dumps(obj, *args, cls=JsonizeEncoder, **kwargs)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def to_json_dump(obj: Any, file: IO, *args, **kwargs) -> None:
|
|
63
|
+
"""Utility to transform any object to a JSON object and write it to a file, while making use of the `JsonizeEncoder`.
|
|
64
|
+
|
|
65
|
+
All arguments other than the object itself are passed to the default Python `json.dump` function.
|
|
66
|
+
"""
|
|
67
|
+
kwargs.pop("cls", None)
|
|
68
|
+
json.dump(obj, file, cls=JsonizeEncoder, *args, **kwargs)
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Contains utilities to conveniently log different information."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import atexit
|
|
6
|
+
import pprint
|
|
7
|
+
import functools
|
|
8
|
+
import sys
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import IO
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
Logger = Callable[..., None]
|
|
15
|
+
"""Type alias for our loggers.
|
|
16
|
+
|
|
17
|
+
Each logger accepts an arbitrary amount of arguments and is inteded to function as a replacement for the `print` function.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def timestamp() -> str:
|
|
22
|
+
"""Provides the current time as a nice and normalized string."""
|
|
23
|
+
return datetime.now().strftime("%y-%m-%d %H:%M:%S")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def make_logger(
|
|
27
|
+
enabled: bool = True,
|
|
28
|
+
*,
|
|
29
|
+
file: IO[str] = sys.stderr,
|
|
30
|
+
pretty: bool = False,
|
|
31
|
+
prefix: str | Callable[[], str] = "",
|
|
32
|
+
) -> Logger:
|
|
33
|
+
"""Creates a new logging utility.
|
|
34
|
+
|
|
35
|
+
The generated method can be used like a regular `print`, but with defaults that are better suited for logging purposes.
|
|
36
|
+
|
|
37
|
+
If `enabled` is `False`, calling the logging function will not actually print anything and simply return. This
|
|
38
|
+
is especially useful to implement logging-hooks in longer functions without permanently re-checking whether logging
|
|
39
|
+
is enabled or not.
|
|
40
|
+
|
|
41
|
+
By default, all logging output will be written to stderr, but this can be customized by supplying a different
|
|
42
|
+
`file`.
|
|
43
|
+
|
|
44
|
+
If `pretty` is enabled, structured objects such as dictionaries will be pretty-printed instead of being written
|
|
45
|
+
on a single line. Note that pprint is used for all of the logging data everytime in that case.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
enabled : bool, optional
|
|
50
|
+
Whether logging is enabled, by default *True*
|
|
51
|
+
file : IO[str], optional
|
|
52
|
+
Destination to write the log entries to, by default ``sys.stderr``
|
|
53
|
+
pretty : bool, optional
|
|
54
|
+
Whether complex objects should be pretty-printed using the ``pprint`` module, by default *False*
|
|
55
|
+
prefix : str | Callable[[], str], optional
|
|
56
|
+
A common prefix that should be added before each log entry. Can be either a hard-coded string, or a callable that
|
|
57
|
+
dynamically produces a string for each logging action separately (e.g. timestamp).
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
Callable
|
|
62
|
+
_description_
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def _log(*args, **kwargs) -> None:
|
|
66
|
+
if prefix and isinstance(prefix, str):
|
|
67
|
+
args = [prefix] + list(args)
|
|
68
|
+
elif prefix:
|
|
69
|
+
args = [prefix()] + list(args)
|
|
70
|
+
print(*args, file=file, **kwargs)
|
|
71
|
+
|
|
72
|
+
def _dummy_log(*args, **kwargs) -> None:
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
if pretty and enabled:
|
|
76
|
+
return functools.partial(pprint.pprint, stream=file)
|
|
77
|
+
|
|
78
|
+
return _log if enabled else _dummy_log
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def print_stderr(*args, **kwargs) -> None:
|
|
82
|
+
"""A normal `print` that writes to stderr instead of stdout."""
|
|
83
|
+
kwargs.pop("file", None)
|
|
84
|
+
print(*args, file=sys.stderr, **kwargs)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def print_if(should_print: bool, *args, use_stderr: bool = False, **kwargs) -> None:
|
|
88
|
+
"""A normal `print` that only prints something if `should_print` evaluates true-ish. Can optionally print to stderr."""
|
|
89
|
+
if should_print:
|
|
90
|
+
out_device = kwargs.pop("file", sys.stderr if use_stderr else sys.stdout)
|
|
91
|
+
print(*args, file=out_device, **kwargs)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class _TeeLogger:
|
|
95
|
+
def __init__(self, target_file: str, output_mode: str = "a") -> None:
|
|
96
|
+
self._original_stdout = sys.stdout
|
|
97
|
+
self._log_out = open(target_file, output_mode)
|
|
98
|
+
atexit.register(lambda: self._log_out.close())
|
|
99
|
+
|
|
100
|
+
def write(self, message: str) -> None:
|
|
101
|
+
self._original_stdout.write(message)
|
|
102
|
+
self._log_out.write(message)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def tee_stdout(target_file: str, output_mode: str = "a") -> None:
|
|
106
|
+
sys.stdout = _TeeLogger(target_file, output_mode)
|
postbound/util/misc.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Contains various utilities that did not fit any other category."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import collections
|
|
6
|
+
import re
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Any, Generator, Generic, Optional, TypeVar
|
|
10
|
+
|
|
11
|
+
from . import jsonize
|
|
12
|
+
|
|
13
|
+
T = TypeVar("T")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def current_timestamp() -> str:
|
|
17
|
+
"""Provides the current time (year-month-day hour:minute)"""
|
|
18
|
+
return datetime.now().strftime("%y-%m-%d %H:%M")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_CamelCasePattern = re.compile(r"(?<!^)(?=[A-Z])")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def camel_case2snake_case(camel_case: str) -> str:
|
|
25
|
+
# adapted from https://stackoverflow.com/a/1176023
|
|
26
|
+
return _CamelCasePattern.sub("_", camel_case).lower()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _wrap_version(v: Any) -> Version:
|
|
30
|
+
"""Transforms any object into a Version instance if it is not already."""
|
|
31
|
+
return v if isinstance(v, Version) else Version(v)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Version(jsonize.Jsonizable):
|
|
35
|
+
"""Version instances represent versioning information and ensure that comparison operations work as expected.
|
|
36
|
+
|
|
37
|
+
For example, Version instances can be created for strings such as "14.6" or "1.3.1" and ensure that 14.6 > 1.3.1
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, ver: str | int | list[str] | list[int]) -> None:
|
|
41
|
+
try:
|
|
42
|
+
if isinstance(ver, int):
|
|
43
|
+
self._version = [ver]
|
|
44
|
+
elif isinstance(ver, str):
|
|
45
|
+
self._version = [int(v) for v in ver.split(".")]
|
|
46
|
+
elif isinstance(ver, list) and ver:
|
|
47
|
+
self._version = [int(v) for v in ver]
|
|
48
|
+
else:
|
|
49
|
+
raise ValueError(f"Unknown version string: '{ver}'")
|
|
50
|
+
except ValueError:
|
|
51
|
+
raise ValueError(f"Unknown version string: '{ver}'")
|
|
52
|
+
|
|
53
|
+
def formatted(self, *, prefix: str = "", suffix: str = "", separator: str = "."):
|
|
54
|
+
return prefix + separator.join(str(v) for v in self._version) + suffix
|
|
55
|
+
|
|
56
|
+
def __json__(self) -> object:
|
|
57
|
+
return str(self)
|
|
58
|
+
|
|
59
|
+
def __eq__(self, __o: object) -> bool:
|
|
60
|
+
try:
|
|
61
|
+
other = _wrap_version(__o)
|
|
62
|
+
if not len(self) == len(other):
|
|
63
|
+
return False
|
|
64
|
+
for i in range(len(self)):
|
|
65
|
+
if not self._version[i] == other._version[i]:
|
|
66
|
+
return False
|
|
67
|
+
return True
|
|
68
|
+
except ValueError:
|
|
69
|
+
return False
|
|
70
|
+
|
|
71
|
+
def __ge__(self, __o: object) -> bool:
|
|
72
|
+
return not self < __o
|
|
73
|
+
|
|
74
|
+
def __gt__(self, __o: object) -> bool:
|
|
75
|
+
return not self <= __o
|
|
76
|
+
|
|
77
|
+
def __le__(self, __o: object) -> bool:
|
|
78
|
+
other = _wrap_version(__o)
|
|
79
|
+
for comp in zip(self._version, other._version):
|
|
80
|
+
own_version, other_version = comp
|
|
81
|
+
if own_version < other_version:
|
|
82
|
+
return True
|
|
83
|
+
if other_version < own_version:
|
|
84
|
+
return False
|
|
85
|
+
return len(self) <= len(other)
|
|
86
|
+
|
|
87
|
+
def __lt__(self, __o: object) -> bool:
|
|
88
|
+
other = _wrap_version(__o)
|
|
89
|
+
for comp in zip(self._version, other._version):
|
|
90
|
+
own_version, other_version = comp
|
|
91
|
+
if own_version < other_version:
|
|
92
|
+
return True
|
|
93
|
+
if other_version < own_version:
|
|
94
|
+
return False
|
|
95
|
+
return len(self) < len(other)
|
|
96
|
+
|
|
97
|
+
def __len__(self) -> int:
|
|
98
|
+
return len(self._version)
|
|
99
|
+
|
|
100
|
+
def __repr__(self) -> str:
|
|
101
|
+
return str(self)
|
|
102
|
+
|
|
103
|
+
def __str__(self) -> str:
|
|
104
|
+
return "v" + ".".join(str(v) for v in self._version)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class DependencyGraph(Generic[T]):
|
|
108
|
+
"""A simple dependency graph abstraction. Entries are added via `add_task` and iteration yields the source nodes first."""
|
|
109
|
+
|
|
110
|
+
def __init__(self) -> None:
|
|
111
|
+
self._source_nodes: set[int] = set()
|
|
112
|
+
self._dependencies: dict[int, list[int]] = collections.defaultdict(list)
|
|
113
|
+
self._nodes: dict[int, T] = {}
|
|
114
|
+
|
|
115
|
+
def add_task(self, node: T, *, depends_on: Optional[Iterable[T]] = None) -> None:
|
|
116
|
+
"""Queues a new task/entry/whatever.
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
node : T
|
|
121
|
+
The new task
|
|
122
|
+
depends_on : Optional[Iterable[T]], optional
|
|
123
|
+
Optional other tasks that have to be completed before this one. If those task have not been added yet, this will
|
|
124
|
+
be done automatically.
|
|
125
|
+
"""
|
|
126
|
+
node_id = hash(node)
|
|
127
|
+
self._nodes[node_id] = node
|
|
128
|
+
|
|
129
|
+
if not depends_on:
|
|
130
|
+
self._source_nodes.add(node_id)
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
if node_id in self._source_nodes:
|
|
134
|
+
self._source_nodes.remove(node_id)
|
|
135
|
+
|
|
136
|
+
for dep in depends_on:
|
|
137
|
+
dep_id = hash(dep)
|
|
138
|
+
self._dependencies[dep_id].append(node_id)
|
|
139
|
+
|
|
140
|
+
if dep_id not in self._nodes:
|
|
141
|
+
self._source_nodes.add(dep_id)
|
|
142
|
+
self._nodes[dep_id] = dep
|
|
143
|
+
|
|
144
|
+
def __iter__(self) -> Generator[T, Any, None]:
|
|
145
|
+
provided_nodes: set[int] = set()
|
|
146
|
+
for node_id in self._source_nodes:
|
|
147
|
+
yield self._nodes[node_id]
|
|
148
|
+
|
|
149
|
+
provided_nodes.add(node_id)
|
|
150
|
+
dependency_stack = list(self._dependencies[node_id])
|
|
151
|
+
|
|
152
|
+
while dependency_stack:
|
|
153
|
+
dep_id = dependency_stack.pop()
|
|
154
|
+
if dep_id in provided_nodes:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
yield self._nodes[dep_id]
|
|
158
|
+
|
|
159
|
+
provided_nodes.add(dep_id)
|
|
160
|
+
dependency_stack.extend(self._dependencies[dep_id])
|
|
161
|
+
provided_nodes = set()
|
|
162
|
+
|
|
163
|
+
def __repr__(self) -> str:
|
|
164
|
+
return str(self)
|
|
165
|
+
|
|
166
|
+
def __str__(self) -> str:
|
|
167
|
+
nodes_str = ", ".join(str(node) for node in self._nodes.values())
|
|
168
|
+
return f"DependencyGraph({nodes_str})"
|