pydiverse-common 0.3.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydiverse/common/__init__.py +55 -0
- pydiverse/common/dtypes.py +418 -0
- pydiverse/common/errors/__init__.py +9 -0
- pydiverse/common/util/__init__.py +13 -0
- pydiverse/common/util/computation_tracing.py +341 -0
- pydiverse/common/util/deep_map.py +100 -0
- pydiverse/common/util/deep_merge.py +55 -0
- pydiverse/common/util/disposable.py +28 -0
- pydiverse/common/util/hashing.py +32 -0
- pydiverse/common/util/import_.py +135 -0
- pydiverse/common/util/structlog.py +115 -0
- pydiverse/common/version.py +10 -0
- pydiverse_common-0.3.2.dist-info/METADATA +64 -0
- pydiverse_common-0.3.2.dist-info/RECORD +16 -0
- pydiverse_common-0.3.2.dist-info/WHEEL +5 -0
- pydiverse_common-0.3.2.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,341 @@
|
|
1
|
+
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
+
import dis
|
4
|
+
import inspect
|
5
|
+
from enum import Enum
|
6
|
+
from typing import Any
|
7
|
+
|
8
|
+
|
9
|
+
class Operation(Enum):
|
10
|
+
OBJECT = 0
|
11
|
+
GETATTR = 10
|
12
|
+
SETATTR = 11
|
13
|
+
DELATTR = 12
|
14
|
+
GETITEM = 20
|
15
|
+
SETITEM = 21
|
16
|
+
DELITEM = 22
|
17
|
+
CALL = 50
|
18
|
+
GET = 60
|
19
|
+
BOOL = 70
|
20
|
+
|
21
|
+
def __repr__(self):
|
22
|
+
return self.name
|
23
|
+
|
24
|
+
|
25
|
+
class ComputationTracer:
|
26
|
+
proxy_type: type["ComputationTracerProxy"]
|
27
|
+
|
28
|
+
def __init__(self):
|
29
|
+
self.trace = []
|
30
|
+
self.patcher = MonkeyPatcher()
|
31
|
+
self.did_exit = False
|
32
|
+
self.proxy_type = ComputationTracerProxy
|
33
|
+
|
34
|
+
def create_proxy(self, identifier=None):
|
35
|
+
return self._get_proxy((Operation.OBJECT, identifier))
|
36
|
+
|
37
|
+
def __enter__(self):
|
38
|
+
self._monkey_patch()
|
39
|
+
# clear trace already filled during patching (modules may issue calls during
|
40
|
+
# initialization)
|
41
|
+
self.trace = []
|
42
|
+
return self
|
43
|
+
|
44
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
45
|
+
self.patcher.undo()
|
46
|
+
self.did_exit = True
|
47
|
+
|
48
|
+
def _get_proxy(self, computation: tuple):
|
49
|
+
idx = len(self.trace)
|
50
|
+
self._add_computation(computation)
|
51
|
+
return self.proxy_type(self, idx)
|
52
|
+
|
53
|
+
def _add_computation(self, computation: tuple):
|
54
|
+
if not self.did_exit:
|
55
|
+
from pydiverse.common.util.deep_map import deep_map
|
56
|
+
|
57
|
+
computation = deep_map(computation, self._computation_mapper)
|
58
|
+
self.trace.append(computation)
|
59
|
+
else:
|
60
|
+
raise RuntimeError(
|
61
|
+
"Can't modify ComputationTrace after exiting the context."
|
62
|
+
)
|
63
|
+
|
64
|
+
@staticmethod
|
65
|
+
def _computation_mapper(x):
|
66
|
+
if isinstance(x, ComputationTracerProxy):
|
67
|
+
return ComputationTraceRef(x)
|
68
|
+
|
69
|
+
if inspect.isfunction(x):
|
70
|
+
bytecode = dis.Bytecode(x)
|
71
|
+
return (
|
72
|
+
Operation.OBJECT,
|
73
|
+
"BYTECODE",
|
74
|
+
tuple((instr.opcode, instr.argval) for instr in bytecode),
|
75
|
+
)
|
76
|
+
|
77
|
+
return x
|
78
|
+
|
79
|
+
def _monkey_patch(self): ...
|
80
|
+
|
81
|
+
def trace_hash(self) -> str:
|
82
|
+
try:
|
83
|
+
from dask.base import tokenize
|
84
|
+
except ModuleNotFoundError:
|
85
|
+
raise ModuleNotFoundError(
|
86
|
+
"module dask is required to use computation_tracing."
|
87
|
+
) from None
|
88
|
+
|
89
|
+
return tokenize(self.trace)
|
90
|
+
|
91
|
+
|
92
|
+
class ComputationTracerProxy:
|
93
|
+
def __init__(self, tracer: ComputationTracer, identifier: int | str):
|
94
|
+
object.__setattr__(self, "_computation_tracer_", tracer)
|
95
|
+
object.__setattr__(self, "_computation_tracer_id_", identifier)
|
96
|
+
|
97
|
+
def __getattribute__(self, item):
|
98
|
+
if item in ("__class__", "__module__"):
|
99
|
+
return object.__getattribute__(self, item)
|
100
|
+
|
101
|
+
tracer = _get_tracer(self)
|
102
|
+
return tracer._get_proxy((Operation.GETATTR, self, item))
|
103
|
+
|
104
|
+
def __setattr__(self, key, value):
|
105
|
+
tracer = _get_tracer(self)
|
106
|
+
tracer._add_computation((Operation.SETATTR, self, key, value))
|
107
|
+
|
108
|
+
def __delattr__(self, key):
|
109
|
+
tracer = _get_tracer(self)
|
110
|
+
tracer._add_computation((Operation.DELATTR, self, key))
|
111
|
+
|
112
|
+
def __getitem__(self, item):
|
113
|
+
tracer = _get_tracer(self)
|
114
|
+
return tracer._get_proxy((Operation.GETITEM, self, item))
|
115
|
+
|
116
|
+
def __setitem__(self, key, value):
|
117
|
+
tracer = _get_tracer(self)
|
118
|
+
tracer._add_computation((Operation.SETITEM, self, key, value))
|
119
|
+
|
120
|
+
def __delitem__(self, key):
|
121
|
+
tracer = _get_tracer(self)
|
122
|
+
tracer._add_computation((Operation.DELITEM, self, key))
|
123
|
+
|
124
|
+
def __call__(self, *args, **kwargs):
|
125
|
+
tracer = _get_tracer(self)
|
126
|
+
return tracer._get_proxy((Operation.CALL, self, args, kwargs))
|
127
|
+
|
128
|
+
def __get__(self, instance, owner):
|
129
|
+
tracer = _get_tracer(self)
|
130
|
+
return tracer._get_proxy((Operation.GET, instance, self))
|
131
|
+
|
132
|
+
def __bool__(self):
|
133
|
+
tracer = _get_tracer(self)
|
134
|
+
return tracer._get_proxy((Operation.BOOL, self))
|
135
|
+
|
136
|
+
def __iter__(self):
|
137
|
+
raise RuntimeError("__iter__ is not supported by ComputationTracerProxy")
|
138
|
+
|
139
|
+
def __contains__(self, item):
|
140
|
+
raise RuntimeError("__contains__ is not supported by ComputationTracerProxy")
|
141
|
+
|
142
|
+
def __len__(self):
|
143
|
+
raise RuntimeError("__len__ is not supported by ComputationTracerProxy")
|
144
|
+
|
145
|
+
|
146
|
+
def _get_tracer(proxy: ComputationTracerProxy) -> ComputationTracer:
|
147
|
+
return object.__getattribute__(proxy, "_computation_tracer_")
|
148
|
+
|
149
|
+
|
150
|
+
__supported_dunder = {
|
151
|
+
"__add__",
|
152
|
+
"__radd__",
|
153
|
+
"__sub__",
|
154
|
+
"__rsub__",
|
155
|
+
"__mul__",
|
156
|
+
"__rmul__",
|
157
|
+
"__truediv__",
|
158
|
+
"__rtruediv__",
|
159
|
+
"__floordiv__",
|
160
|
+
"__rfloordiv__",
|
161
|
+
"__pow__",
|
162
|
+
"__rpow__",
|
163
|
+
"__mod__",
|
164
|
+
"__rmod__",
|
165
|
+
"__round__",
|
166
|
+
"__pos__",
|
167
|
+
"__neg__",
|
168
|
+
"__abs__",
|
169
|
+
"__and__",
|
170
|
+
"__rand__",
|
171
|
+
"__or__",
|
172
|
+
"__ror__",
|
173
|
+
"__xor__",
|
174
|
+
"__rxor__",
|
175
|
+
"__invert__",
|
176
|
+
"__lt__",
|
177
|
+
"__le__",
|
178
|
+
"__eq__",
|
179
|
+
"__ne__",
|
180
|
+
"__gt__",
|
181
|
+
"__ge__",
|
182
|
+
"__copy__",
|
183
|
+
"__deepcopy__",
|
184
|
+
}
|
185
|
+
|
186
|
+
|
187
|
+
def __create_dunder(name):
|
188
|
+
def dunder(self, *args):
|
189
|
+
return getattr(self, name)(self, *args)
|
190
|
+
|
191
|
+
return dunder
|
192
|
+
|
193
|
+
|
194
|
+
for dunder_ in __supported_dunder:
|
195
|
+
setattr(ComputationTracerProxy, dunder_, __create_dunder(dunder_))
|
196
|
+
|
197
|
+
|
198
|
+
class ComputationTraceRef:
|
199
|
+
__slots__ = ("id",)
|
200
|
+
|
201
|
+
def __init__(self, proxy: ComputationTracerProxy):
|
202
|
+
self.id = object.__getattribute__(proxy, "_computation_tracer_id_")
|
203
|
+
|
204
|
+
def __str__(self):
|
205
|
+
return f"ComputationTraceRef<{self.id}>"
|
206
|
+
|
207
|
+
def __repr__(self):
|
208
|
+
return f"ComputationTraceRef<{self.id}>"
|
209
|
+
|
210
|
+
def __dask_tokenize__(self):
|
211
|
+
return "ComputationTraceRef", self.id
|
212
|
+
|
213
|
+
|
214
|
+
class MonkeyPatcher:
|
215
|
+
"""Monkey Patching class inspired by pytest's MonkeyPatch class"""
|
216
|
+
|
217
|
+
def __init__(self):
|
218
|
+
self._setattr: list[tuple[object, str, Any]] = []
|
219
|
+
|
220
|
+
def patch_attr(self, obj: object, name: str, value: Any):
|
221
|
+
old_value = getattr(obj, name)
|
222
|
+
|
223
|
+
# avoid class descriptors like staticmethod / classmethod
|
224
|
+
if inspect.isclass(obj):
|
225
|
+
old_value = obj.__dict__[name]
|
226
|
+
|
227
|
+
setattr(obj, name, value)
|
228
|
+
self._setattr.append((obj, name, old_value))
|
229
|
+
|
230
|
+
def undo(self):
|
231
|
+
for obj, name, value in reversed(self._setattr):
|
232
|
+
setattr(obj, name, value)
|
233
|
+
self._setattr.clear()
|
234
|
+
|
235
|
+
|
236
|
+
def fully_qualified_name(obj):
|
237
|
+
if type(obj).__name__ == "builtin_function_or_method":
|
238
|
+
if obj.__module__ is not None:
|
239
|
+
module = obj.__module__
|
240
|
+
else:
|
241
|
+
if inspect.isclass(obj.__self__):
|
242
|
+
module = obj.__self__.__module__
|
243
|
+
else:
|
244
|
+
module = obj.__self__.__class__.__module__
|
245
|
+
return f"{module}.{obj.__qualname__}"
|
246
|
+
|
247
|
+
if type(obj).__name__ == "function":
|
248
|
+
if hasattr(obj, "__wrapped__"):
|
249
|
+
qualname = obj.__wrapped__.__qualname__
|
250
|
+
else:
|
251
|
+
qualname = obj.__qualname__
|
252
|
+
return f"{obj.__module__}.{qualname}"
|
253
|
+
|
254
|
+
if type(obj).__name__ in (
|
255
|
+
"member_descriptor",
|
256
|
+
"method_descriptor",
|
257
|
+
"wrapper_descriptor",
|
258
|
+
):
|
259
|
+
return f"{obj.__objclass__.__module__}.{obj.__qualname__}"
|
260
|
+
|
261
|
+
if type(obj).__name__ == "method":
|
262
|
+
if inspect.isclass(obj.__self__):
|
263
|
+
cls = obj.__self__.__qualname__
|
264
|
+
else:
|
265
|
+
cls = obj.__self__.__class__.__qualname__
|
266
|
+
return f"{obj.__self__.__module__}.{cls}.{obj.__name__}"
|
267
|
+
|
268
|
+
if type(obj).__name__ == "method-wrapper":
|
269
|
+
return f"{fully_qualified_name(obj.__self__)}.{obj.__name__}"
|
270
|
+
|
271
|
+
if type(obj).__name__ == "module":
|
272
|
+
return obj.__name__
|
273
|
+
|
274
|
+
if type(obj).__name__ == "property":
|
275
|
+
return f"{obj.fget.__module__}.{obj.fget.__qualname__}"
|
276
|
+
|
277
|
+
if inspect.isclass(obj):
|
278
|
+
return f"{obj.__module__}.{obj.__qualname__}"
|
279
|
+
|
280
|
+
return f"{obj.__class__.__module__}.{obj.__class__.__qualname__}"
|
281
|
+
|
282
|
+
|
283
|
+
def patch(tracer: ComputationTracer, target: object, name: str):
|
284
|
+
if isinstance(target, ComputationTracerProxy):
|
285
|
+
return
|
286
|
+
|
287
|
+
val = getattr(target, name)
|
288
|
+
|
289
|
+
if isinstance(val, type):
|
290
|
+
return patch_type(tracer, target, name)
|
291
|
+
if inspect.isfunction(val):
|
292
|
+
return patch_function(tracer, target, name)
|
293
|
+
if isinstance(val, object):
|
294
|
+
return patch_value(tracer, target, name)
|
295
|
+
|
296
|
+
raise RuntimeError
|
297
|
+
|
298
|
+
|
299
|
+
def patch_type(tracer: ComputationTracer, target: object, name: str):
|
300
|
+
val = getattr(target, name)
|
301
|
+
full_name = fully_qualified_name(val)
|
302
|
+
|
303
|
+
dunder_to_patch = (
|
304
|
+
"__getattr__",
|
305
|
+
"__setattr__",
|
306
|
+
"__delattr__",
|
307
|
+
"__getitem__",
|
308
|
+
"__setitem__",
|
309
|
+
"__delitem__",
|
310
|
+
"__call__",
|
311
|
+
"__repr__",
|
312
|
+
"__str__",
|
313
|
+
)
|
314
|
+
|
315
|
+
for key, _value in object.__getattribute__(val, "__dict__").items():
|
316
|
+
if key.startswith("__"):
|
317
|
+
if key in dunder_to_patch:
|
318
|
+
try:
|
319
|
+
tracer.patcher.patch_attr(
|
320
|
+
val, key, tracer.proxy_type(tracer, full_name + "." + key)
|
321
|
+
)
|
322
|
+
except AttributeError:
|
323
|
+
pass
|
324
|
+
continue
|
325
|
+
else:
|
326
|
+
tracer.patcher.patch_attr(
|
327
|
+
val, key, tracer.proxy_type(tracer, full_name + "." + key)
|
328
|
+
)
|
329
|
+
|
330
|
+
tracer.patcher.patch_attr(target, name, tracer.proxy_type(tracer, full_name))
|
331
|
+
|
332
|
+
|
333
|
+
def patch_function(tracer: ComputationTracer, target: object, name: str):
|
334
|
+
val = getattr(target, name)
|
335
|
+
full_name = fully_qualified_name(val)
|
336
|
+
tracer.patcher.patch_attr(target, name, tracer.proxy_type(tracer, full_name))
|
337
|
+
|
338
|
+
|
339
|
+
def patch_value(tracer: ComputationTracer, target: object, name: str):
|
340
|
+
full_name = fully_qualified_name(target) + "." + name
|
341
|
+
tracer.patcher.patch_attr(target, name, tracer.proxy_type(tracer, full_name))
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
+
|
4
|
+
"""Generic deep map or mutation operations.
|
5
|
+
|
6
|
+
Heavily inspired by the builtin copy module of python:
|
7
|
+
https://github.com/python/cpython/blob/main/Lib/copy.py
|
8
|
+
"""
|
9
|
+
|
10
|
+
from collections.abc import Callable
|
11
|
+
|
12
|
+
from .computation_tracing import fully_qualified_name
|
13
|
+
from .import_ import load_object
|
14
|
+
|
15
|
+
_nil = []
|
16
|
+
|
17
|
+
|
18
|
+
def deep_map(x, fn: Callable, memo=None):
|
19
|
+
if memo is None:
|
20
|
+
memo = {}
|
21
|
+
|
22
|
+
d = id(x)
|
23
|
+
y = memo.get(d, _nil)
|
24
|
+
if y is not _nil:
|
25
|
+
return y
|
26
|
+
|
27
|
+
cls = type(x)
|
28
|
+
|
29
|
+
if cls == list: # noqa: E721
|
30
|
+
y = _deep_map_list(x, fn, memo)
|
31
|
+
elif cls == tuple: # noqa: E721
|
32
|
+
y = _deep_map_tuple(x, fn, memo)
|
33
|
+
elif cls == dict: # noqa: E721
|
34
|
+
y = _deep_map_dict(x, fn, memo)
|
35
|
+
elif hasattr(cls, "__dataclass_fields__"):
|
36
|
+
# reconstruct data classes
|
37
|
+
y = load_object(
|
38
|
+
{
|
39
|
+
"class": fully_qualified_name(cls),
|
40
|
+
"args": _deep_map_dict(x.__dict__, fn, memo),
|
41
|
+
}
|
42
|
+
)
|
43
|
+
else:
|
44
|
+
y = fn(x)
|
45
|
+
|
46
|
+
# If is its own copy, don't memoize.
|
47
|
+
if y is not x:
|
48
|
+
memo[d] = y
|
49
|
+
_keep_alive(x, memo) # Make sure x lives at least as long as d
|
50
|
+
|
51
|
+
return y
|
52
|
+
|
53
|
+
|
54
|
+
def _deep_map_list(x, fn, memo):
|
55
|
+
y = []
|
56
|
+
append = y.append
|
57
|
+
for a in x:
|
58
|
+
append(deep_map(a, fn, memo))
|
59
|
+
return fn(y)
|
60
|
+
|
61
|
+
|
62
|
+
def _deep_map_tuple(x, fn, memo):
|
63
|
+
y = [deep_map(a, fn, memo) for a in x]
|
64
|
+
# We're not going to put the tuple in the memo, but it's still important we
|
65
|
+
# check for it, in case the tuple contains recursive mutable structures.
|
66
|
+
try:
|
67
|
+
return memo[id(x)]
|
68
|
+
except KeyError:
|
69
|
+
pass
|
70
|
+
for k, j in zip(x, y, strict=False):
|
71
|
+
if k is not j:
|
72
|
+
y = tuple(y)
|
73
|
+
break
|
74
|
+
else:
|
75
|
+
y = x
|
76
|
+
return fn(y)
|
77
|
+
|
78
|
+
|
79
|
+
def _deep_map_dict(x, fn, memo):
|
80
|
+
y = {}
|
81
|
+
memo[id(x)] = y
|
82
|
+
for key, value in x.items():
|
83
|
+
y[deep_map(key, fn, memo)] = deep_map(value, fn, memo)
|
84
|
+
return fn(y)
|
85
|
+
|
86
|
+
|
87
|
+
def _keep_alive(x, memo):
|
88
|
+
"""Keeps a reference to the object x in the memo.
|
89
|
+
Because we remember objects by their id, we have
|
90
|
+
to assure that possibly temporary objects are kept
|
91
|
+
alive by referencing them.
|
92
|
+
We store a reference at the id of the memo, which should
|
93
|
+
normally not be used unless someone tries to deepcopy
|
94
|
+
the memo itself...
|
95
|
+
"""
|
96
|
+
try:
|
97
|
+
memo[id(memo)].append(x)
|
98
|
+
except KeyError:
|
99
|
+
# aha, this is the first one :-)
|
100
|
+
memo[id(memo)] = [x]
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
+
|
4
|
+
"""Generic deep update function for nested dictionaries.
|
5
|
+
|
6
|
+
Seems to be solved already in various ways (do we like an extra dependency for pydantic.deep_update?)
|
7
|
+
https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth
|
8
|
+
But for snippets, license restrictions exist:
|
9
|
+
https://www.ictrecht.nl/en/blog/what-is-the-license-status-of-stackoverflow-code-snippets
|
10
|
+
""" # noqa: E501
|
11
|
+
|
12
|
+
from collections.abc import Iterable, Mapping
|
13
|
+
|
14
|
+
from box import Box
|
15
|
+
|
16
|
+
|
17
|
+
def deep_merge(x, y, check_enum=False):
|
18
|
+
if type(x) != type(y) and not (isinstance(x, Mapping) and isinstance(y, Mapping)): # noqa: E721
|
19
|
+
raise TypeError(
|
20
|
+
f"deep_merge failed due to type mismatch '{x}' (type: {type(x)}) vs. '{y}'"
|
21
|
+
f" (type: {type(y)})"
|
22
|
+
)
|
23
|
+
|
24
|
+
if isinstance(x, Box):
|
25
|
+
z = Box(_deep_merge_dict(x, y), frozen_box=True)
|
26
|
+
elif isinstance(x, Mapping):
|
27
|
+
z = _deep_merge_dict(x, y)
|
28
|
+
elif isinstance(x, Iterable) and not isinstance(x, str):
|
29
|
+
z = _deep_merge_iterable(x, y)
|
30
|
+
else:
|
31
|
+
z = y # update
|
32
|
+
|
33
|
+
return z
|
34
|
+
|
35
|
+
|
36
|
+
def _deep_merge_iterable(x: Iterable, y: Iterable):
|
37
|
+
# Merging lists is not trivial.
|
38
|
+
# There are a few different strategies: replace, unique, append, intersection, ...
|
39
|
+
return y
|
40
|
+
# return [*x, *y]
|
41
|
+
# return [deep_merge(a, b) for a, b in zip(x, y)]
|
42
|
+
|
43
|
+
|
44
|
+
def _deep_merge_dict(x: Mapping, y: Mapping):
|
45
|
+
z = dict(x)
|
46
|
+
for key in x:
|
47
|
+
if key in y:
|
48
|
+
if y[key] is None:
|
49
|
+
# this is a special case but we have no other way in yaml to express
|
50
|
+
# the deletion of fields from a dictionary in an override config
|
51
|
+
del z[key]
|
52
|
+
else:
|
53
|
+
z[key] = deep_merge(x[key], y[key])
|
54
|
+
z.update({key: value for key, value in y.items() if key not in z})
|
55
|
+
return z
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
+
from ..errors import DisposedError
|
4
|
+
|
5
|
+
|
6
|
+
class Disposable:
|
7
|
+
def __getattribute__(self, name):
|
8
|
+
try:
|
9
|
+
object.__getattribute__(self, "_Disposable__disposed")
|
10
|
+
obj_type = object.__getattribute__(self, "__class__")
|
11
|
+
raise DisposedError(f"Object of type {obj_type} has already been disposed.")
|
12
|
+
except AttributeError:
|
13
|
+
pass
|
14
|
+
|
15
|
+
return object.__getattribute__(self, name)
|
16
|
+
|
17
|
+
def __setattr__(self, key, value):
|
18
|
+
try:
|
19
|
+
object.__getattribute__(self, "_Disposable__disposed")
|
20
|
+
obj_type = object.__getattribute__(self, "__class__")
|
21
|
+
raise DisposedError(f"Object of type {obj_type} has already been disposed.")
|
22
|
+
except AttributeError:
|
23
|
+
pass
|
24
|
+
|
25
|
+
return object.__setattr__(self, key, value)
|
26
|
+
|
27
|
+
def dispose(self):
|
28
|
+
object.__setattr__(self, "_Disposable__disposed", True)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
+
import base64
|
4
|
+
import hashlib
|
5
|
+
|
6
|
+
|
7
|
+
def stable_hash(*args: str) -> str:
|
8
|
+
"""Compute a hash over a set of strings
|
9
|
+
|
10
|
+
:param args: Some strings from which to compute the cache key
|
11
|
+
:return: A sha256 base32 digest, trimmed to 20 char length
|
12
|
+
"""
|
13
|
+
|
14
|
+
combined_hash = hashlib.sha256(b"PYDIVERSE")
|
15
|
+
for arg in args:
|
16
|
+
arg_bytes = str(arg).encode("utf8")
|
17
|
+
arg_bytes_len = len(arg_bytes).to_bytes(length=8, byteorder="big")
|
18
|
+
|
19
|
+
combined_hash.update(arg_bytes_len)
|
20
|
+
combined_hash.update(arg_bytes)
|
21
|
+
|
22
|
+
# Only take first 20 characters of base32 digest (100 bits). This
|
23
|
+
# provides 50 bits of collision resistance, which is more than enough.
|
24
|
+
# To illustrate: If you were to generate 1k hashes per second,
|
25
|
+
# you still would have to wait over 800k years until you encounter
|
26
|
+
# a collision.
|
27
|
+
|
28
|
+
# NOTE: Can't use base64 because it contains lower and upper case
|
29
|
+
# letters; identifiers in pipedag are all lowercase
|
30
|
+
hash_digest = combined_hash.digest()
|
31
|
+
hash_str = base64.b32encode(hash_digest).decode("ascii").lower()
|
32
|
+
return hash_str[:20]
|
@@ -0,0 +1,135 @@
|
|
1
|
+
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
+
import builtins
|
4
|
+
import importlib
|
5
|
+
import os
|
6
|
+
from collections.abc import Collection
|
7
|
+
from typing import Any
|
8
|
+
|
9
|
+
_allowed_getattr = [
|
10
|
+
"__class__",
|
11
|
+
"__doc__",
|
12
|
+
"__name__",
|
13
|
+
"__qualname__",
|
14
|
+
"__module__",
|
15
|
+
]
|
16
|
+
|
17
|
+
|
18
|
+
def requires(requirements: Any | list, exception: BaseException | type[BaseException]):
|
19
|
+
"""Class decorator for handling optional imports.
|
20
|
+
|
21
|
+
If any of the requirements are falsy, this decorator prevents the class
|
22
|
+
from being instantiated and any class attributes from being accessed,
|
23
|
+
and raises the provided exception instead.
|
24
|
+
"""
|
25
|
+
|
26
|
+
if not isinstance(requirements, list | tuple):
|
27
|
+
requirements = (requirements,)
|
28
|
+
|
29
|
+
def decorator(cls):
|
30
|
+
if all(requirements):
|
31
|
+
return cls
|
32
|
+
|
33
|
+
# Modify class to raise exception
|
34
|
+
class RaiserMeta(type):
|
35
|
+
def __getattribute__(self, x):
|
36
|
+
# While building the documentation, we set the SPHINX_BUILD env
|
37
|
+
# variable. This allows us to properly generate the documentation
|
38
|
+
# without raising exceptions.
|
39
|
+
if os.environ.get("SPHINX_BUILD"):
|
40
|
+
return getattr(cls, x)
|
41
|
+
if x in _allowed_getattr:
|
42
|
+
return getattr(cls, x)
|
43
|
+
raise exception
|
44
|
+
|
45
|
+
def raiser(*args, **kwargs):
|
46
|
+
raise exception
|
47
|
+
|
48
|
+
__name = str(cls.__name__)
|
49
|
+
__bases = ()
|
50
|
+
__dict = {
|
51
|
+
"__metaclass__": RaiserMeta,
|
52
|
+
"__wrapped__": cls,
|
53
|
+
"__new__": raiser,
|
54
|
+
}
|
55
|
+
|
56
|
+
return RaiserMeta(__name, __bases, __dict)
|
57
|
+
|
58
|
+
return decorator
|
59
|
+
|
60
|
+
|
61
|
+
def import_object(import_path: str):
|
62
|
+
"""Loads a class given an import path
|
63
|
+
|
64
|
+
>>> # An import statement like this
|
65
|
+
>>> from pandas import DataFrame
|
66
|
+
>>> # can be expressed as follows:
|
67
|
+
>>> import_object("pandas.DataFrame")
|
68
|
+
"""
|
69
|
+
|
70
|
+
parts = [part for part in import_path.split(".") if part]
|
71
|
+
module, n = None, 0
|
72
|
+
|
73
|
+
while n < len(parts):
|
74
|
+
try:
|
75
|
+
module = importlib.import_module(".".join(parts[: n + 1]))
|
76
|
+
n = n + 1
|
77
|
+
except ImportError:
|
78
|
+
break
|
79
|
+
|
80
|
+
obj = module or builtins
|
81
|
+
for part in parts[n:]:
|
82
|
+
obj = getattr(obj, part)
|
83
|
+
|
84
|
+
return obj
|
85
|
+
|
86
|
+
|
87
|
+
def load_object(config_dict: dict, move_keys_into_args: Collection[str] | None = None):
|
88
|
+
"""Instantiates an instance of an object given
|
89
|
+
|
90
|
+
The import path (module.Class) should be specified as the "class" value
|
91
|
+
of the dict. The args section of the dict get used as the instance config.
|
92
|
+
|
93
|
+
If the class defines a `_init_conf_` function, it gets called using the
|
94
|
+
config values, otherwise they just get passed to the class initializer.
|
95
|
+
|
96
|
+
Additionally, any values of `config_dict` whose associated keys are in
|
97
|
+
`move_keys_into_args`, get also passed as an argument to the initializer.
|
98
|
+
::
|
99
|
+
|
100
|
+
# module.Class(argument="value")
|
101
|
+
load_object({
|
102
|
+
"class": "module.Class",
|
103
|
+
"args": {
|
104
|
+
"argument": "value",
|
105
|
+
},
|
106
|
+
})
|
107
|
+
"""
|
108
|
+
|
109
|
+
if "class" not in config_dict:
|
110
|
+
raise RuntimeError(
|
111
|
+
"Attribute 'class' is missing in configuration "
|
112
|
+
"section that supports multiple backends\n"
|
113
|
+
f"config section: {config_dict}"
|
114
|
+
)
|
115
|
+
if isinstance(config_dict["class"], type):
|
116
|
+
# it may be useful in tests to just pass in a dynamically created class
|
117
|
+
cls = config_dict["class"]
|
118
|
+
else:
|
119
|
+
cls = import_object(config_dict["class"])
|
120
|
+
|
121
|
+
args = config_dict.get("args", {}) or {}
|
122
|
+
if not isinstance(args, dict):
|
123
|
+
raise TypeError(
|
124
|
+
f"Invalid type for args section: {type(args)}\n"
|
125
|
+
f"config section: {config_dict}"
|
126
|
+
)
|
127
|
+
|
128
|
+
if move_keys_into_args:
|
129
|
+
args = args | {k: v for k, v in config_dict.items() if k in move_keys_into_args}
|
130
|
+
|
131
|
+
try:
|
132
|
+
init_conf = cls._init_conf_
|
133
|
+
return init_conf(args)
|
134
|
+
except AttributeError:
|
135
|
+
return cls(**args)
|