relib 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relib/__init__.py +3 -2
- relib/hashing.py +44 -120
- relib/measure_duration.py +1 -2
- relib/utils.py +41 -45
- {relib-1.1.2.dist-info → relib-1.2.1.dist-info}/METADATA +2 -4
- relib-1.2.1.dist-info/RECORD +8 -0
- {relib-1.1.2.dist-info → relib-1.2.1.dist-info}/WHEEL +1 -1
- relib/raypipe.py +0 -60
- relib-1.1.2.dist-info/RECORD +0 -9
- {relib-1.1.2.dist-info → relib-1.2.1.dist-info}/licenses/LICENSE +0 -0
relib/__init__.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
from .utils import (
|
2
2
|
clear_console,
|
3
|
+
console_link,
|
3
4
|
non_none,
|
5
|
+
as_any,
|
4
6
|
list_split,
|
5
7
|
drop_none,
|
6
8
|
distinct,
|
@@ -31,6 +33,5 @@ from .utils import (
|
|
31
33
|
StrFilter,
|
32
34
|
str_filterer,
|
33
35
|
)
|
34
|
-
from .
|
35
|
-
from .hashing import hash
|
36
|
+
from .hashing import hash, hash_obj
|
36
37
|
from .measure_duration import measure_duration
|
relib/hashing.py
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
"""
|
2
|
-
Fast cryptographic hash of Python objects, with a special case for fast
|
3
|
-
hashing of numpy arrays.
|
4
|
-
"""
|
5
|
-
|
6
1
|
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
|
7
2
|
# Copyright (c) 2009 Gael Varoquaux
|
8
3
|
# License: BSD Style, 3 clauses.
|
@@ -11,30 +6,25 @@ import pickle
|
|
11
6
|
import hashlib
|
12
7
|
import sys
|
13
8
|
import types
|
14
|
-
import struct
|
15
9
|
import io
|
16
10
|
import decimal
|
17
11
|
|
12
|
+
try:
|
13
|
+
import numpy
|
14
|
+
except:
|
15
|
+
has_numpy = False
|
16
|
+
else:
|
17
|
+
has_numpy = True
|
18
|
+
|
18
19
|
Pickler = pickle._Pickler
|
19
|
-
_bytes_or_unicode = (bytes, str)
|
20
20
|
|
21
21
|
|
22
22
|
class _ConsistentSet(object):
|
23
|
-
""" Class used to ensure the hash of Sets is preserved
|
24
|
-
whatever the order of its items.
|
25
|
-
"""
|
26
23
|
def __init__(self, set_sequence):
|
27
|
-
# Forces order of elements in set to ensure consistent hash.
|
28
24
|
try:
|
29
|
-
# Trying first to order the set assuming the type of elements is
|
30
|
-
# consistent and orderable.
|
31
|
-
# This fails on python 3 when elements are unorderable
|
32
|
-
# but we keep it in a try as it's faster.
|
33
25
|
self._sequence = sorted(set_sequence)
|
34
26
|
except (TypeError, decimal.InvalidOperation):
|
35
|
-
|
36
|
-
# This is slower but works in any case.
|
37
|
-
self._sequence = sorted((hash(e) for e in set_sequence))
|
27
|
+
self._sequence = sorted(map(hash_obj, set_sequence))
|
38
28
|
|
39
29
|
|
40
30
|
class _MyHash(object):
|
@@ -45,35 +35,30 @@ class _MyHash(object):
|
|
45
35
|
|
46
36
|
|
47
37
|
class Hasher(Pickler):
|
48
|
-
""" A subclass of pickler, to do cryptographic hashing, rather than
|
49
|
-
pickling.
|
50
|
-
"""
|
38
|
+
""" A subclass of pickler, to do cryptographic hashing, rather than pickling. """
|
51
39
|
|
52
|
-
def __init__(self, hash_name=
|
40
|
+
def __init__(self, hash_name="md5"):
|
53
41
|
self.stream = io.BytesIO()
|
54
|
-
#
|
55
|
-
# the major python version and not the minor one
|
42
|
+
# We want a pickle protocol that only changes with major Python versions
|
56
43
|
protocol = pickle.HIGHEST_PROTOCOL
|
57
44
|
Pickler.__init__(self, self.stream, protocol=protocol)
|
58
|
-
# Initialise the hash obj
|
59
45
|
self._hash = hashlib.new(hash_name)
|
60
46
|
|
61
|
-
def hash(self, obj
|
47
|
+
def hash(self, obj) -> str:
|
62
48
|
try:
|
63
49
|
self.dump(obj)
|
64
50
|
except pickle.PicklingError as e:
|
65
|
-
e.args += (
|
51
|
+
e.args += ("PicklingError while hashing %r: %r" % (obj, e),)
|
66
52
|
raise
|
67
53
|
dumps = self.stream.getvalue()
|
68
54
|
self._hash.update(dumps)
|
69
|
-
|
70
|
-
return self._hash.hexdigest()
|
55
|
+
return self._hash.hexdigest()
|
71
56
|
|
72
57
|
def save(self, obj):
|
73
58
|
if isinstance(obj, (types.MethodType, type({}.pop))):
|
74
59
|
# the Pickler cannot pickle instance methods; here we decompose
|
75
60
|
# them into components that make them uniquely identifiable
|
76
|
-
if hasattr(obj,
|
61
|
+
if hasattr(obj, "__func__"):
|
77
62
|
func_name = obj.__func__.__name__
|
78
63
|
else:
|
79
64
|
func_name = obj.__name__
|
@@ -90,28 +75,25 @@ class Hasher(Pickler):
|
|
90
75
|
|
91
76
|
def memoize(self, obj):
|
92
77
|
# We want hashing to be sensitive to value instead of reference.
|
93
|
-
# For example we want [
|
78
|
+
# For example we want ["aa", "aa"] and ["aa", "aaZ"[:2]]
|
94
79
|
# to hash to the same value and that's why we disable memoization
|
95
80
|
# for strings
|
96
|
-
if isinstance(obj,
|
81
|
+
if isinstance(obj, (bytes, str)):
|
97
82
|
return
|
98
83
|
Pickler.memoize(self, obj)
|
99
84
|
|
100
85
|
# The dispatch table of the pickler is not accessible in Python
|
101
86
|
# 3, as these lines are only bugware for IPython, we skip them.
|
102
|
-
def save_global(self, obj, name=None
|
87
|
+
def save_global(self, obj, name=None):
|
103
88
|
# We have to override this method in order to deal with objects
|
104
89
|
# defined interactively in IPython that are not injected in
|
105
90
|
# __main__
|
106
|
-
kwargs = dict(name=name, pack=pack)
|
107
|
-
if sys.version_info >= (3, 4):
|
108
|
-
del kwargs['pack']
|
109
91
|
try:
|
110
|
-
Pickler.save_global(self, obj,
|
92
|
+
Pickler.save_global(self, obj, name=name)
|
111
93
|
except pickle.PicklingError:
|
112
|
-
Pickler.save_global(self, obj,
|
94
|
+
Pickler.save_global(self, obj, name=name)
|
113
95
|
module = getattr(obj, "__module__", None)
|
114
|
-
if module ==
|
96
|
+
if module == "__main__":
|
115
97
|
my_name = name
|
116
98
|
if my_name is None:
|
117
99
|
my_name = obj.__name__
|
@@ -121,67 +103,35 @@ class Hasher(Pickler):
|
|
121
103
|
# interactively in __main__
|
122
104
|
setattr(mod, my_name, obj)
|
123
105
|
|
124
|
-
dispatch = Pickler.dispatch.copy()
|
125
|
-
# builtin
|
126
|
-
dispatch[type(len)] = save_global
|
127
|
-
# type
|
128
|
-
dispatch[type(object)] = save_global
|
129
|
-
# classobj
|
130
|
-
dispatch[type(Pickler)] = save_global
|
131
|
-
# function
|
132
|
-
dispatch[type(pickle.dump)] = save_global
|
133
|
-
|
134
106
|
def _batch_setitems(self, items):
|
135
|
-
# forces order of keys in dict to ensure consistent hash.
|
136
107
|
try:
|
137
|
-
# Trying first to compare dict assuming the type of keys is
|
138
|
-
# consistent and orderable.
|
139
|
-
# This fails on python 3 when keys are unorderable
|
140
|
-
# but we keep it in a try as it's faster.
|
141
108
|
Pickler._batch_setitems(self, iter(sorted(items)))
|
142
109
|
except TypeError:
|
143
|
-
|
144
|
-
# slower but works in any case.
|
145
|
-
Pickler._batch_setitems(self, iter(sorted((hash(k), v)
|
146
|
-
for k, v in items)))
|
110
|
+
Pickler._batch_setitems(self, iter(sorted((hash_obj(k), v) for k, v in items)))
|
147
111
|
|
148
112
|
def save_set(self, set_items):
|
149
|
-
# forces order of items in Set to ensure consistent hash
|
150
113
|
Pickler.save(self, _ConsistentSet(set_items))
|
151
114
|
|
115
|
+
dispatch = Pickler.dispatch.copy()
|
116
|
+
dispatch[type(len)] = save_global # builtin
|
117
|
+
dispatch[type(object)] = save_global # type
|
118
|
+
dispatch[type(Pickler)] = save_global # classobj
|
119
|
+
dispatch[type(pickle.dump)] = save_global # function
|
152
120
|
dispatch[type(set())] = save_set
|
153
121
|
|
154
122
|
|
155
123
|
class NumpyHasher(Hasher):
|
156
|
-
""
|
157
|
-
"""
|
158
|
-
|
159
|
-
def __init__(self, hash_name='md5', coerce_mmap=False):
|
160
|
-
"""
|
161
|
-
Parameters
|
162
|
-
----------
|
163
|
-
hash_name: string
|
164
|
-
The hash algorithm to be used
|
165
|
-
coerce_mmap: boolean
|
166
|
-
Make no difference between np.memmap and np.ndarray
|
167
|
-
objects.
|
168
|
-
"""
|
169
|
-
self.coerce_mmap = coerce_mmap
|
124
|
+
def __init__(self, hash_name="md5"):
|
170
125
|
Hasher.__init__(self, hash_name=hash_name)
|
171
|
-
# delayed import of numpy, to avoid tight coupling
|
172
|
-
import numpy as np
|
173
|
-
self.np = np
|
174
|
-
if hasattr(np, 'getbuffer'):
|
175
|
-
self._getbuffer = np.getbuffer
|
176
|
-
else:
|
177
|
-
self._getbuffer = memoryview
|
178
126
|
|
179
127
|
def save(self, obj):
|
180
128
|
""" Subclass the save method, to hash ndarray subclass, rather
|
181
129
|
than pickling them. Off course, this is a total abuse of
|
182
130
|
the Pickler class.
|
183
131
|
"""
|
184
|
-
|
132
|
+
import numpy as np
|
133
|
+
|
134
|
+
if isinstance(obj, np.ndarray) and not obj.dtype.hasobject:
|
185
135
|
# Compute a hash of the object
|
186
136
|
# The update function of the hash requires a c_contiguous buffer.
|
187
137
|
if obj.shape == ():
|
@@ -198,31 +148,14 @@ class NumpyHasher(Hasher):
|
|
198
148
|
# XXX: There might be a more efficient way of doing this
|
199
149
|
obj_c_contiguous = obj.flatten()
|
200
150
|
|
201
|
-
#
|
202
|
-
|
203
|
-
# workaround is to view the array as bytes before
|
204
|
-
# taking the memoryview.
|
205
|
-
self._hash.update(
|
206
|
-
self._getbuffer(obj_c_contiguous.view(self.np.uint8)))
|
207
|
-
|
208
|
-
# We store the class, to be able to distinguish between
|
209
|
-
# Objects with the same binary content, but different
|
210
|
-
# classes.
|
211
|
-
if self.coerce_mmap and isinstance(obj, self.np.memmap):
|
212
|
-
# We don't make the difference between memmap and
|
213
|
-
# normal ndarrays, to be able to reload previously
|
214
|
-
# computed results with memmap.
|
215
|
-
klass = self.np.ndarray
|
216
|
-
else:
|
217
|
-
klass = obj.__class__
|
218
|
-
# We also return the dtype and the shape, to distinguish
|
219
|
-
# different views on the same data with different dtypes.
|
151
|
+
# View the array as bytes to support dtypes like datetime64
|
152
|
+
self._hash.update(memoryview(obj_c_contiguous.view(np.uint8)))
|
220
153
|
|
221
154
|
# The object will be pickled by the pickler hashed at the end.
|
222
|
-
obj = (
|
223
|
-
elif isinstance(obj,
|
155
|
+
obj = (obj.__class__, ("HASHED", obj.dtype, obj.shape, obj.strides))
|
156
|
+
elif isinstance(obj, np.dtype):
|
224
157
|
# Atomic dtype objects are interned by their default constructor:
|
225
|
-
# np.dtype(
|
158
|
+
# np.dtype("f8") is np.dtype("f8")
|
226
159
|
# This interning is not maintained by a
|
227
160
|
# pickle.loads + pickle.dumps cycle, because __reduce__
|
228
161
|
# uses copy=True in the dtype constructor. This
|
@@ -232,24 +165,15 @@ class NumpyHasher(Hasher):
|
|
232
165
|
# To prevent the hash from being sensitive to this, we use
|
233
166
|
# .descr which is a full (and never interned) description of
|
234
167
|
# the array dtype according to the numpy doc.
|
235
|
-
|
236
|
-
|
168
|
+
obj = (obj.__class__, ("HASHED", obj.descr))
|
169
|
+
|
237
170
|
Hasher.save(self, obj)
|
238
171
|
|
239
172
|
|
240
|
-
def
|
241
|
-
|
242
|
-
|
243
|
-
Parameters
|
244
|
-
-----------
|
245
|
-
hash_name: 'md5' or 'sha1'
|
246
|
-
Hashing algorithm used. sha1 is supposedly safer, but md5 is
|
247
|
-
faster.
|
248
|
-
coerce_mmap: boolean
|
249
|
-
Make no difference between np.memmap and np.ndarray
|
250
|
-
"""
|
251
|
-
if 'numpy' in sys.modules:
|
252
|
-
hasher = NumpyHasher(hash_name=hash_name, coerce_mmap=coerce_mmap)
|
173
|
+
def hash_obj(obj, hash_name="md5") -> str:
|
174
|
+
if has_numpy:
|
175
|
+
return NumpyHasher(hash_name=hash_name).hash(obj)
|
253
176
|
else:
|
254
|
-
|
255
|
-
|
177
|
+
return Hasher(hash_name=hash_name).hash(obj)
|
178
|
+
|
179
|
+
hash = hash_obj
|
relib/measure_duration.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
from time import time
|
2
|
-
from termcolor import colored
|
3
2
|
|
4
3
|
active_mds = []
|
5
4
|
|
@@ -16,5 +15,5 @@ class measure_duration:
|
|
16
15
|
depth = len(active_mds) - 1
|
17
16
|
indent = ('──' * depth) + (' ' * (depth > 0))
|
18
17
|
text = '{}: {} seconds'.format(self.name, duration)
|
19
|
-
print(
|
18
|
+
print(indent + text)
|
20
19
|
active_mds.remove(self)
|
relib/utils.py
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
from typing import TypeVar, Iterable, Callable, Any, cast, overload
|
2
|
-
from itertools import chain
|
3
|
-
import numpy as np
|
4
1
|
import os
|
5
2
|
import re
|
6
|
-
|
7
|
-
|
8
|
-
U = TypeVar('U')
|
9
|
-
K = TypeVar('K')
|
10
|
-
K1, K2, K3, K4, K5, K6 = TypeVar('K1'), TypeVar('K2'), TypeVar('K3'), TypeVar('K4'), TypeVar('K5'), TypeVar('K6')
|
3
|
+
from typing import Iterable, Callable, Any, overload
|
4
|
+
from itertools import chain
|
11
5
|
|
12
6
|
def clear_console():
|
13
7
|
os.system("cls" if os.name == "nt" else "clear")
|
14
8
|
|
15
|
-
def
|
9
|
+
def console_link(text, url):
|
10
|
+
return f"\033]8;;{url}\033\\{text}\033]8;;\033\\"
|
11
|
+
|
12
|
+
def non_none[T](obj: T | None) -> T:
|
16
13
|
assert obj is not None
|
17
14
|
return obj
|
18
15
|
|
19
|
-
def
|
16
|
+
def as_any(obj: Any) -> Any:
|
17
|
+
return obj
|
18
|
+
|
19
|
+
def list_split[T](l: list[T], sep: T) -> list[list[T]]:
|
20
20
|
l = [sep, *l, sep]
|
21
21
|
split_at = [i for i, x in enumerate(l) if x is sep]
|
22
22
|
ranges = list(zip(split_at[0:-1], split_at[1:]))
|
@@ -25,16 +25,16 @@ def list_split(l: list[T], sep: T) -> list[list[T]]:
|
|
25
25
|
for start, end in ranges
|
26
26
|
]
|
27
27
|
|
28
|
-
def drop_none(l: Iterable[T | None]) -> list[T]:
|
28
|
+
def drop_none[T](l: Iterable[T | None]) -> list[T]:
|
29
29
|
return [x for x in l if x is not None]
|
30
30
|
|
31
|
-
def distinct(items: Iterable[T]) -> list[T]:
|
32
|
-
return list(
|
31
|
+
def distinct[T](items: Iterable[T]) -> list[T]:
|
32
|
+
return list(dict.fromkeys(items))
|
33
33
|
|
34
|
-
def first(iterable: Iterable[T]) -> T | None:
|
34
|
+
def first[T](iterable: Iterable[T]) -> T | None:
|
35
35
|
return next(iter(iterable), None)
|
36
36
|
|
37
|
-
def move_value(l: Iterable[T], from_i: int, to_i: int) -> list[T]:
|
37
|
+
def move_value[T](l: Iterable[T], from_i: int, to_i: int) -> list[T]:
|
38
38
|
l = list(l)
|
39
39
|
l.insert(to_i, l.pop(from_i))
|
40
40
|
return l
|
@@ -54,7 +54,7 @@ def transpose_dict(des):
|
|
54
54
|
{key: des[key][i] for key in keys}
|
55
55
|
for i in range(length)
|
56
56
|
]
|
57
|
-
raise ValueError(
|
57
|
+
raise ValueError("transpose_dict only accepts dict or list")
|
58
58
|
|
59
59
|
def make_combinations_by_dict(des, keys=None, pairs=[]):
|
60
60
|
keys = sorted(des.keys()) if keys == None else keys
|
@@ -68,7 +68,7 @@ def make_combinations_by_dict(des, keys=None, pairs=[]):
|
|
68
68
|
for pair in new_pairs
|
69
69
|
])
|
70
70
|
|
71
|
-
def merge_dicts(*dicts: dict[K, T]) -> dict[K, T]:
|
71
|
+
def merge_dicts[T, K](*dicts: dict[K, T]) -> dict[K, T]:
|
72
72
|
if len(dicts) == 1:
|
73
73
|
return dicts[0]
|
74
74
|
result = {}
|
@@ -76,32 +76,32 @@ def merge_dicts(*dicts: dict[K, T]) -> dict[K, T]:
|
|
76
76
|
result.update(d)
|
77
77
|
return result
|
78
78
|
|
79
|
-
def intersect(*lists: Iterable[T]) -> list[T]:
|
79
|
+
def intersect[T](*lists: Iterable[T]) -> list[T]:
|
80
80
|
return list(set.intersection(*map(set, lists)))
|
81
81
|
|
82
|
-
def ensure_tuple(value: T | tuple[T, ...]) -> tuple[T, ...]:
|
82
|
+
def ensure_tuple[T](value: T | tuple[T, ...]) -> tuple[T, ...]:
|
83
83
|
return value if isinstance(value, tuple) else (value,)
|
84
84
|
|
85
|
-
def key_of(dicts: Iterable[dict[T, U]], key: T) -> list[U]:
|
85
|
+
def key_of[T, U](dicts: Iterable[dict[T, U]], key: T) -> list[U]:
|
86
86
|
return [d[key] for d in dicts]
|
87
87
|
|
88
|
-
def omit(d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
|
88
|
+
def omit[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
|
89
89
|
if keys:
|
90
90
|
d = dict(d)
|
91
91
|
for key in keys:
|
92
92
|
del d[key]
|
93
93
|
return d
|
94
94
|
|
95
|
-
def pick(d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
|
95
|
+
def pick[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
|
96
96
|
return {key: d[key] for key in keys}
|
97
97
|
|
98
|
-
def dict_by(keys: Iterable[K], values: Iterable[T]) -> dict[K, T]:
|
98
|
+
def dict_by[T, K](keys: Iterable[K], values: Iterable[T]) -> dict[K, T]:
|
99
99
|
return dict(zip(keys, values))
|
100
100
|
|
101
|
-
def tuple_by(d: dict[K, T], keys: Iterable[K]) -> tuple[T, ...]:
|
101
|
+
def tuple_by[T, K](d: dict[K, T], keys: Iterable[K]) -> tuple[T, ...]:
|
102
102
|
return tuple(d[key] for key in keys)
|
103
103
|
|
104
|
-
def flatten(l: Iterable[Iterable[T]]) -> list[T]:
|
104
|
+
def flatten[T](l: Iterable[Iterable[T]]) -> list[T]:
|
105
105
|
return list(chain.from_iterable(l))
|
106
106
|
|
107
107
|
def transpose(tuples, default_num_returns=0):
|
@@ -110,27 +110,21 @@ def transpose(tuples, default_num_returns=0):
|
|
110
110
|
return ([],) * default_num_returns
|
111
111
|
return tuple(map(list, output))
|
112
112
|
|
113
|
-
def map_dict(fn: Callable[[T], U], d: dict[K, T]) -> dict[K, U]:
|
113
|
+
def map_dict[T, U, K](fn: Callable[[T], U], d: dict[K, T]) -> dict[K, U]:
|
114
114
|
return {key: fn(value) for key, value in d.items()}
|
115
115
|
|
116
116
|
@overload
|
117
|
-
def deepen_dict(d: dict[tuple[K1], U]) -> dict[K1, U]: ...
|
118
|
-
|
117
|
+
def deepen_dict[K1, U](d: dict[tuple[K1], U]) -> dict[K1, U]: ...
|
119
118
|
@overload
|
120
|
-
def deepen_dict(d: dict[tuple[K1, K2], U]) -> dict[K1, dict[K2, U]]: ...
|
121
|
-
|
119
|
+
def deepen_dict[K1, K2, U](d: dict[tuple[K1, K2], U]) -> dict[K1, dict[K2, U]]: ...
|
122
120
|
@overload
|
123
|
-
def deepen_dict(d: dict[tuple[K1, K2, K3], U]) -> dict[K1, dict[K2, dict[K3, U]]]: ...
|
124
|
-
|
121
|
+
def deepen_dict[K1, K2, K3, U](d: dict[tuple[K1, K2, K3], U]) -> dict[K1, dict[K2, dict[K3, U]]]: ...
|
125
122
|
@overload
|
126
|
-
def deepen_dict(d: dict[tuple[K1, K2, K3, K4], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, U]]]]: ...
|
127
|
-
|
123
|
+
def deepen_dict[K1, K2, K3, K4, U](d: dict[tuple[K1, K2, K3, K4], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, U]]]]: ...
|
128
124
|
@overload
|
129
|
-
def deepen_dict(d: dict[tuple[K1, K2, K3, K4, K5], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, U]]]]]: ...
|
130
|
-
|
125
|
+
def deepen_dict[K1, K2, K3, K4, K5, U](d: dict[tuple[K1, K2, K3, K4, K5], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, U]]]]]: ...
|
131
126
|
@overload
|
132
|
-
def deepen_dict(d: dict[tuple[K1, K2, K3, K4, K5, K6], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, dict[K6, U]]]]]]: ...
|
133
|
-
|
127
|
+
def deepen_dict[K1, K2, K3, K4, K5, K6, U](d: dict[tuple[K1, K2, K3, K4, K5, K6], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, dict[K6, U]]]]]]: ...
|
134
128
|
def deepen_dict(d: dict[tuple[Any, ...], Any]) -> dict:
|
135
129
|
output = {}
|
136
130
|
if () in d:
|
@@ -152,45 +146,47 @@ def flatten_dict_inner(d, prefix=()):
|
|
152
146
|
def flatten_dict(deep_dict: dict, prefix=()) -> dict:
|
153
147
|
return dict(flatten_dict_inner(deep_dict, prefix))
|
154
148
|
|
155
|
-
def group(pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
|
149
|
+
def group[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
|
156
150
|
values_by_key = {}
|
157
151
|
for key, value in pairs:
|
158
152
|
values_by_key.setdefault(key, []).append(value)
|
159
153
|
return values_by_key
|
160
154
|
|
161
|
-
def reversed_enumerate(l: list[T] | tuple[T, ...]) -> Iterable[tuple[int, T]]:
|
155
|
+
def reversed_enumerate[T](l: list[T] | tuple[T, ...]) -> Iterable[tuple[int, T]]:
|
162
156
|
return zip(reversed(range(len(l))), reversed(l))
|
163
157
|
|
164
|
-
def get_at(d: dict, keys: Iterable[Any], default: T) -> T:
|
158
|
+
def get_at[T](d: dict, keys: Iterable[Any], default: T) -> T:
|
165
159
|
try:
|
166
160
|
for key in keys:
|
167
161
|
d = d[key]
|
168
162
|
except KeyError:
|
169
163
|
return default
|
170
|
-
return
|
164
|
+
return as_any(d)
|
171
165
|
|
172
|
-
def sized_partitions(values: Iterable[T], part_size: int) -> list[list[T]]:
|
166
|
+
def sized_partitions[T](values: Iterable[T], part_size: int) -> list[list[T]]:
|
173
167
|
# "chunk"
|
174
168
|
if not isinstance(values, list):
|
175
169
|
values = list(values)
|
176
170
|
num_parts = (len(values) / part_size).__ceil__()
|
177
171
|
return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
|
178
172
|
|
179
|
-
def num_partitions(values: Iterable[T], num_parts: int) -> list[list[T]]:
|
173
|
+
def num_partitions[T](values: Iterable[T], num_parts: int) -> list[list[T]]:
|
180
174
|
if not isinstance(values, list):
|
181
175
|
values = list(values)
|
182
176
|
part_size = (len(values) / num_parts).__ceil__()
|
183
177
|
return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
|
184
178
|
|
185
179
|
def _cat_tile(cats, n_tile):
|
180
|
+
import numpy as np
|
186
181
|
return cats[np.tile(np.arange(len(cats)), n_tile)]
|
187
182
|
|
188
183
|
def df_from_array(
|
189
|
-
value_cols: dict[str,
|
184
|
+
value_cols: dict[str, Any],
|
190
185
|
dim_labels: list[tuple[str, list[str | int | float]]],
|
191
186
|
indexed=False,
|
192
187
|
):
|
193
188
|
import pandas as pd
|
189
|
+
import numpy as np
|
194
190
|
dim_sizes = np.array([len(labels) for _, labels in dim_labels])
|
195
191
|
assert all(array.shape == tuple(dim_sizes) for array in value_cols.values())
|
196
192
|
array_offsets = [
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: relib
|
3
|
-
Version: 1.1
|
3
|
+
Version: 1.2.1
|
4
4
|
Project-URL: Repository, https://github.com/Reddan/relib.git
|
5
5
|
Author: Hampus Hallman
|
6
6
|
License: Copyright 2023 Hampus Hallman
|
@@ -12,5 +12,3 @@ License: Copyright 2023 Hampus Hallman
|
|
12
12
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
13
13
|
License-File: LICENSE
|
14
14
|
Requires-Python: >=3.12
|
15
|
-
Requires-Dist: numpy
|
16
|
-
Requires-Dist: termcolor
|
@@ -0,0 +1,8 @@
|
|
1
|
+
relib/__init__.py,sha256=TPvX5ey_D4bjzIqp5HqAUX4V7IXreJhY7T2-GuATHLg,577
|
2
|
+
relib/hashing.py,sha256=DB_fnkj0ls01FgZbf4nPFHl4EBU8X_0OrmDvty4HlRE,6020
|
3
|
+
relib/measure_duration.py,sha256=LCTo_D_qReNprD3fhtJ0daeWycS6xQE_cwxeg2_h0xo,456
|
4
|
+
relib/utils.py,sha256=tFO2FnisciRCrcTO45sm8SnzxGXzHd6u6r4efOtOQ7k,6906
|
5
|
+
relib-1.2.1.dist-info/METADATA,sha256=3izGnxrJlKPmUX-ezjdM6rUyCflyVyBToMfXb4pnCBs,1290
|
6
|
+
relib-1.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
7
|
+
relib-1.2.1.dist-info/licenses/LICENSE,sha256=t9LfkVbmcvZjP0x3Sq-jR38UfTNbNtRQvc0Q8HWmLak,1054
|
8
|
+
relib-1.2.1.dist-info/RECORD,,
|
relib/raypipe.py
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
|
3
|
-
class Raypipe():
|
4
|
-
def __init__(self, handlers=[]):
|
5
|
-
self.handlers = handlers
|
6
|
-
|
7
|
-
def __add_to_pipeline(self, handler_type, fn, kwargs={}):
|
8
|
-
handler = (handler_type, fn, kwargs)
|
9
|
-
return Raypipe(self.handlers + [handler])
|
10
|
-
|
11
|
-
def map(self, fn):
|
12
|
-
return self.__add_to_pipeline('map', fn)
|
13
|
-
|
14
|
-
def flatten(self):
|
15
|
-
return self.__add_to_pipeline('flatten', None)
|
16
|
-
|
17
|
-
def flat_map(self, fn):
|
18
|
-
return self.map(fn).flatten()
|
19
|
-
|
20
|
-
def filter(self, fn):
|
21
|
-
return self.__add_to_pipeline('filter', fn)
|
22
|
-
|
23
|
-
def sort(self, fn=None, reverse=False):
|
24
|
-
return self.__add_to_pipeline('sort', fn, dict(reverse=reverse))
|
25
|
-
|
26
|
-
def distinct(self):
|
27
|
-
return self.__add_to_pipeline('distinct', None)
|
28
|
-
|
29
|
-
def sort_distinct(self, fn=None, reverse=False):
|
30
|
-
return self.distinct().sort(fn, reverse=reverse)
|
31
|
-
|
32
|
-
def do(self, fn):
|
33
|
-
return self.__add_to_pipeline('do', fn)
|
34
|
-
|
35
|
-
def shuffle(self, random_state=42):
|
36
|
-
return self.__add_to_pipeline('shuffle', None, dict(random_state=random_state))
|
37
|
-
|
38
|
-
def to_numpy(self):
|
39
|
-
return self.__add_to_pipeline('do', np.array)
|
40
|
-
|
41
|
-
def compute(self, values):
|
42
|
-
for handler_type, handler_fn, handler_kwargs in self.handlers:
|
43
|
-
if handler_type == 'map':
|
44
|
-
values = [handler_fn(val) for val in values]
|
45
|
-
elif handler_type == 'flatten':
|
46
|
-
values = [item for sublist in values for item in sublist]
|
47
|
-
elif handler_type == 'filter':
|
48
|
-
values = [val for val in values if handler_fn(val)]
|
49
|
-
elif handler_type == 'sort':
|
50
|
-
values.sort(key=handler_fn, reverse=handler_kwargs['reverse'])
|
51
|
-
elif handler_type == 'distinct':
|
52
|
-
values = list(set(values))
|
53
|
-
elif handler_type == 'do':
|
54
|
-
values = handler_fn(values)
|
55
|
-
elif handler_type == 'shuffle':
|
56
|
-
from sklearn.utils import shuffle
|
57
|
-
values = shuffle(values, random_state=handler_kwargs['random_state'])
|
58
|
-
return values
|
59
|
-
|
60
|
-
raypipe = Raypipe()
|
relib-1.1.2.dist-info/RECORD
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
relib/__init__.py,sha256=j1aTW1Xm9U5DlkHVR_1-k80TEZSClojoRNb_fa4rkuQ,570
|
2
|
-
relib/hashing.py,sha256=6iAPRiJI_4jaSooZRFJnqK2limXqTmErzcwpd050LAA,8943
|
3
|
-
relib/measure_duration.py,sha256=mTFvqGxKN2vTuHXEaWGHqZ-zm68Gbynxt1u6BHzKEQ8,511
|
4
|
-
relib/raypipe.py,sha256=ynEoXs1dnD-360_uQC8v89xjiilt3knpocXpFaQ3plA,1905
|
5
|
-
relib/utils.py,sha256=WezIji5azJ5A6SsWjYeUCRZApi-a1CVtDqfRaAVhWu4,6753
|
6
|
-
relib-1.1.2.dist-info/METADATA,sha256=x3I9Pn8wm7EGEF7YVtxb0eLqzf_AIHaS9kqYrJtdW2k,1336
|
7
|
-
relib-1.1.2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
8
|
-
relib-1.1.2.dist-info/licenses/LICENSE,sha256=t9LfkVbmcvZjP0x3Sq-jR38UfTNbNtRQvc0Q8HWmLak,1054
|
9
|
-
relib-1.1.2.dist-info/RECORD,,
|
File without changes
|