relib 1.2.0__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: relib
3
- Version: 1.2.0
3
+ Version: 1.2.1
4
4
  Project-URL: Repository, https://github.com/Reddan/relib.git
5
5
  Author: Hampus Hallman
6
6
  License: Copyright 2023 Hampus Hallman
@@ -10,4 +10,5 @@ License: Copyright 2023 Hampus Hallman
10
10
  The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11
11
 
12
12
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13
+ License-File: LICENSE
13
14
  Requires-Python: >=3.12
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "relib"
3
- version = "1.2.0"
3
+ version = "1.2.1"
4
4
  requires-python = ">=3.12"
5
5
  dependencies = []
6
6
  authors = [
@@ -1,6 +1,8 @@
1
1
  from .utils import (
2
2
  clear_console,
3
+ console_link,
3
4
  non_none,
5
+ as_any,
4
6
  list_split,
5
7
  drop_none,
6
8
  distinct,
@@ -31,5 +33,5 @@ from .utils import (
31
33
  StrFilter,
32
34
  str_filterer,
33
35
  )
34
- from .hashing import hash
36
+ from .hashing import hash, hash_obj
35
37
  from .measure_duration import measure_duration
@@ -0,0 +1,179 @@
1
+ # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
2
+ # Copyright (c) 2009 Gael Varoquaux
3
+ # License: BSD Style, 3 clauses.
4
+
5
+ import pickle
6
+ import hashlib
7
+ import sys
8
+ import types
9
+ import io
10
+ import decimal
11
+
12
+ try:
13
+ import numpy
14
+ except:
15
+ has_numpy = False
16
+ else:
17
+ has_numpy = True
18
+
19
+ Pickler = pickle._Pickler
20
+
21
+
22
+ class _ConsistentSet(object):
23
+ def __init__(self, set_sequence):
24
+ try:
25
+ self._sequence = sorted(set_sequence)
26
+ except (TypeError, decimal.InvalidOperation):
27
+ self._sequence = sorted(map(hash_obj, set_sequence))
28
+
29
+
30
+ class _MyHash(object):
31
+ """ Class used to hash objects that won't normally pickle """
32
+
33
+ def __init__(self, *args):
34
+ self.args = args
35
+
36
+
37
+ class Hasher(Pickler):
38
+ """ A subclass of pickler, to do cryptographic hashing, rather than pickling. """
39
+
40
+ def __init__(self, hash_name="md5"):
41
+ self.stream = io.BytesIO()
42
+ # We want a pickle protocol that only changes with major Python versions
43
+ protocol = pickle.HIGHEST_PROTOCOL
44
+ Pickler.__init__(self, self.stream, protocol=protocol)
45
+ self._hash = hashlib.new(hash_name)
46
+
47
+ def hash(self, obj) -> str:
48
+ try:
49
+ self.dump(obj)
50
+ except pickle.PicklingError as e:
51
+ e.args += ("PicklingError while hashing %r: %r" % (obj, e),)
52
+ raise
53
+ dumps = self.stream.getvalue()
54
+ self._hash.update(dumps)
55
+ return self._hash.hexdigest()
56
+
57
+ def save(self, obj):
58
+ if isinstance(obj, (types.MethodType, type({}.pop))):
59
+ # the Pickler cannot pickle instance methods; here we decompose
60
+ # them into components that make them uniquely identifiable
61
+ if hasattr(obj, "__func__"):
62
+ func_name = obj.__func__.__name__
63
+ else:
64
+ func_name = obj.__name__
65
+ inst = obj.__self__
66
+ if type(inst) == type(pickle):
67
+ obj = _MyHash(func_name, inst.__name__)
68
+ elif inst is None:
69
+ # type(None) or type(module) do not pickle
70
+ obj = _MyHash(func_name, inst)
71
+ else:
72
+ cls = obj.__self__.__class__
73
+ obj = _MyHash(func_name, inst, cls)
74
+ Pickler.save(self, obj)
75
+
76
+ def memoize(self, obj):
77
+ # We want hashing to be sensitive to value instead of reference.
78
+ # For example we want ["aa", "aa"] and ["aa", "aaZ"[:2]]
79
+ # to hash to the same value and that's why we disable memoization
80
+ # for strings
81
+ if isinstance(obj, (bytes, str)):
82
+ return
83
+ Pickler.memoize(self, obj)
84
+
85
+ # The dispatch table of the pickler is not accessible in Python
86
+ # 3, as these lines are only bugware for IPython, we skip them.
87
+ def save_global(self, obj, name=None):
88
+ # We have to override this method in order to deal with objects
89
+ # defined interactively in IPython that are not injected in
90
+ # __main__
91
+ try:
92
+ Pickler.save_global(self, obj, name=name)
93
+ except pickle.PicklingError:
94
+ Pickler.save_global(self, obj, name=name)
95
+ module = getattr(obj, "__module__", None)
96
+ if module == "__main__":
97
+ my_name = name
98
+ if my_name is None:
99
+ my_name = obj.__name__
100
+ mod = sys.modules[module]
101
+ if not hasattr(mod, my_name):
102
+ # IPython doesn't inject the variables define
103
+ # interactively in __main__
104
+ setattr(mod, my_name, obj)
105
+
106
+ def _batch_setitems(self, items):
107
+ try:
108
+ Pickler._batch_setitems(self, iter(sorted(items)))
109
+ except TypeError:
110
+ Pickler._batch_setitems(self, iter(sorted((hash_obj(k), v) for k, v in items)))
111
+
112
+ def save_set(self, set_items):
113
+ Pickler.save(self, _ConsistentSet(set_items))
114
+
115
+ dispatch = Pickler.dispatch.copy()
116
+ dispatch[type(len)] = save_global # builtin
117
+ dispatch[type(object)] = save_global # type
118
+ dispatch[type(Pickler)] = save_global # classobj
119
+ dispatch[type(pickle.dump)] = save_global # function
120
+ dispatch[type(set())] = save_set
121
+
122
+
123
+ class NumpyHasher(Hasher):
124
+ def __init__(self, hash_name="md5"):
125
+ Hasher.__init__(self, hash_name=hash_name)
126
+
127
+ def save(self, obj):
128
+ """ Subclass the save method, to hash ndarray subclass, rather
129
+ than pickling them. Off course, this is a total abuse of
130
+ the Pickler class.
131
+ """
132
+ import numpy as np
133
+
134
+ if isinstance(obj, np.ndarray) and not obj.dtype.hasobject:
135
+ # Compute a hash of the object
136
+ # The update function of the hash requires a c_contiguous buffer.
137
+ if obj.shape == ():
138
+ # 0d arrays need to be flattened because viewing them as bytes
139
+ # raises a ValueError exception.
140
+ obj_c_contiguous = obj.flatten()
141
+ elif obj.flags.c_contiguous:
142
+ obj_c_contiguous = obj
143
+ elif obj.flags.f_contiguous:
144
+ obj_c_contiguous = obj.T
145
+ else:
146
+ # Cater for non-single-segment arrays: this creates a
147
+ # copy, and thus aleviates this issue.
148
+ # XXX: There might be a more efficient way of doing this
149
+ obj_c_contiguous = obj.flatten()
150
+
151
+ # View the array as bytes to support dtypes like datetime64
152
+ self._hash.update(memoryview(obj_c_contiguous.view(np.uint8)))
153
+
154
+ # The object will be pickled by the pickler hashed at the end.
155
+ obj = (obj.__class__, ("HASHED", obj.dtype, obj.shape, obj.strides))
156
+ elif isinstance(obj, np.dtype):
157
+ # Atomic dtype objects are interned by their default constructor:
158
+ # np.dtype("f8") is np.dtype("f8")
159
+ # This interning is not maintained by a
160
+ # pickle.loads + pickle.dumps cycle, because __reduce__
161
+ # uses copy=True in the dtype constructor. This
162
+ # non-deterministic behavior causes the internal memoizer
163
+ # of the hasher to generate different hash values
164
+ # depending on the history of the dtype object.
165
+ # To prevent the hash from being sensitive to this, we use
166
+ # .descr which is a full (and never interned) description of
167
+ # the array dtype according to the numpy doc.
168
+ obj = (obj.__class__, ("HASHED", obj.descr))
169
+
170
+ Hasher.save(self, obj)
171
+
172
+
173
+ def hash_obj(obj, hash_name="md5") -> str:
174
+ if has_numpy:
175
+ return NumpyHasher(hash_name=hash_name).hash(obj)
176
+ else:
177
+ return Hasher(hash_name=hash_name).hash(obj)
178
+
179
+ hash = hash_obj
@@ -1,21 +1,22 @@
1
1
  import os
2
2
  import re
3
- from typing import TypeVar, Iterable, Callable, Any, cast, overload
3
+ from typing import Iterable, Callable, Any, overload
4
4
  from itertools import chain
5
5
 
6
- T = TypeVar('T')
7
- U = TypeVar('U')
8
- K = TypeVar('K')
9
- K1, K2, K3, K4, K5, K6 = TypeVar('K1'), TypeVar('K2'), TypeVar('K3'), TypeVar('K4'), TypeVar('K5'), TypeVar('K6')
10
-
11
6
  def clear_console():
12
7
  os.system("cls" if os.name == "nt" else "clear")
13
8
 
14
- def non_none(obj: T | None) -> T:
9
+ def console_link(text, url):
10
+ return f"\033]8;;{url}\033\\{text}\033]8;;\033\\"
11
+
12
+ def non_none[T](obj: T | None) -> T:
15
13
  assert obj is not None
16
14
  return obj
17
15
 
18
- def list_split(l: list[T], sep: T) -> list[list[T]]:
16
+ def as_any(obj: Any) -> Any:
17
+ return obj
18
+
19
+ def list_split[T](l: list[T], sep: T) -> list[list[T]]:
19
20
  l = [sep, *l, sep]
20
21
  split_at = [i for i, x in enumerate(l) if x is sep]
21
22
  ranges = list(zip(split_at[0:-1], split_at[1:]))
@@ -24,16 +25,16 @@ def list_split(l: list[T], sep: T) -> list[list[T]]:
24
25
  for start, end in ranges
25
26
  ]
26
27
 
27
- def drop_none(l: Iterable[T | None]) -> list[T]:
28
+ def drop_none[T](l: Iterable[T | None]) -> list[T]:
28
29
  return [x for x in l if x is not None]
29
30
 
30
- def distinct(items: Iterable[T]) -> list[T]:
31
- return list(set(items))
31
+ def distinct[T](items: Iterable[T]) -> list[T]:
32
+ return list(dict.fromkeys(items))
32
33
 
33
- def first(iterable: Iterable[T]) -> T | None:
34
+ def first[T](iterable: Iterable[T]) -> T | None:
34
35
  return next(iter(iterable), None)
35
36
 
36
- def move_value(l: Iterable[T], from_i: int, to_i: int) -> list[T]:
37
+ def move_value[T](l: Iterable[T], from_i: int, to_i: int) -> list[T]:
37
38
  l = list(l)
38
39
  l.insert(to_i, l.pop(from_i))
39
40
  return l
@@ -53,7 +54,7 @@ def transpose_dict(des):
53
54
  {key: des[key][i] for key in keys}
54
55
  for i in range(length)
55
56
  ]
56
- raise ValueError('transpose_dict only accepts dict or list')
57
+ raise ValueError("transpose_dict only accepts dict or list")
57
58
 
58
59
  def make_combinations_by_dict(des, keys=None, pairs=[]):
59
60
  keys = sorted(des.keys()) if keys == None else keys
@@ -67,7 +68,7 @@ def make_combinations_by_dict(des, keys=None, pairs=[]):
67
68
  for pair in new_pairs
68
69
  ])
69
70
 
70
- def merge_dicts(*dicts: dict[K, T]) -> dict[K, T]:
71
+ def merge_dicts[T, K](*dicts: dict[K, T]) -> dict[K, T]:
71
72
  if len(dicts) == 1:
72
73
  return dicts[0]
73
74
  result = {}
@@ -75,32 +76,32 @@ def merge_dicts(*dicts: dict[K, T]) -> dict[K, T]:
75
76
  result.update(d)
76
77
  return result
77
78
 
78
- def intersect(*lists: Iterable[T]) -> list[T]:
79
+ def intersect[T](*lists: Iterable[T]) -> list[T]:
79
80
  return list(set.intersection(*map(set, lists)))
80
81
 
81
- def ensure_tuple(value: T | tuple[T, ...]) -> tuple[T, ...]:
82
+ def ensure_tuple[T](value: T | tuple[T, ...]) -> tuple[T, ...]:
82
83
  return value if isinstance(value, tuple) else (value,)
83
84
 
84
- def key_of(dicts: Iterable[dict[T, U]], key: T) -> list[U]:
85
+ def key_of[T, U](dicts: Iterable[dict[T, U]], key: T) -> list[U]:
85
86
  return [d[key] for d in dicts]
86
87
 
87
- def omit(d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
88
+ def omit[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
88
89
  if keys:
89
90
  d = dict(d)
90
91
  for key in keys:
91
92
  del d[key]
92
93
  return d
93
94
 
94
- def pick(d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
95
+ def pick[T, K](d: dict[K, T], keys: Iterable[K]) -> dict[K, T]:
95
96
  return {key: d[key] for key in keys}
96
97
 
97
- def dict_by(keys: Iterable[K], values: Iterable[T]) -> dict[K, T]:
98
+ def dict_by[T, K](keys: Iterable[K], values: Iterable[T]) -> dict[K, T]:
98
99
  return dict(zip(keys, values))
99
100
 
100
- def tuple_by(d: dict[K, T], keys: Iterable[K]) -> tuple[T, ...]:
101
+ def tuple_by[T, K](d: dict[K, T], keys: Iterable[K]) -> tuple[T, ...]:
101
102
  return tuple(d[key] for key in keys)
102
103
 
103
- def flatten(l: Iterable[Iterable[T]]) -> list[T]:
104
+ def flatten[T](l: Iterable[Iterable[T]]) -> list[T]:
104
105
  return list(chain.from_iterable(l))
105
106
 
106
107
  def transpose(tuples, default_num_returns=0):
@@ -109,27 +110,21 @@ def transpose(tuples, default_num_returns=0):
109
110
  return ([],) * default_num_returns
110
111
  return tuple(map(list, output))
111
112
 
112
- def map_dict(fn: Callable[[T], U], d: dict[K, T]) -> dict[K, U]:
113
+ def map_dict[T, U, K](fn: Callable[[T], U], d: dict[K, T]) -> dict[K, U]:
113
114
  return {key: fn(value) for key, value in d.items()}
114
115
 
115
116
  @overload
116
- def deepen_dict(d: dict[tuple[K1], U]) -> dict[K1, U]: ...
117
-
117
+ def deepen_dict[K1, U](d: dict[tuple[K1], U]) -> dict[K1, U]: ...
118
118
  @overload
119
- def deepen_dict(d: dict[tuple[K1, K2], U]) -> dict[K1, dict[K2, U]]: ...
120
-
119
+ def deepen_dict[K1, K2, U](d: dict[tuple[K1, K2], U]) -> dict[K1, dict[K2, U]]: ...
121
120
  @overload
122
- def deepen_dict(d: dict[tuple[K1, K2, K3], U]) -> dict[K1, dict[K2, dict[K3, U]]]: ...
123
-
121
+ def deepen_dict[K1, K2, K3, U](d: dict[tuple[K1, K2, K3], U]) -> dict[K1, dict[K2, dict[K3, U]]]: ...
124
122
  @overload
125
- def deepen_dict(d: dict[tuple[K1, K2, K3, K4], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, U]]]]: ...
126
-
123
+ def deepen_dict[K1, K2, K3, K4, U](d: dict[tuple[K1, K2, K3, K4], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, U]]]]: ...
127
124
  @overload
128
- def deepen_dict(d: dict[tuple[K1, K2, K3, K4, K5], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, U]]]]]: ...
129
-
125
+ def deepen_dict[K1, K2, K3, K4, K5, U](d: dict[tuple[K1, K2, K3, K4, K5], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, U]]]]]: ...
130
126
  @overload
131
- def deepen_dict(d: dict[tuple[K1, K2, K3, K4, K5, K6], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, dict[K6, U]]]]]]: ...
132
-
127
+ def deepen_dict[K1, K2, K3, K4, K5, K6, U](d: dict[tuple[K1, K2, K3, K4, K5, K6], U]) -> dict[K1, dict[K2, dict[K3, dict[K4, dict[K5, dict[K6, U]]]]]]: ...
133
128
  def deepen_dict(d: dict[tuple[Any, ...], Any]) -> dict:
134
129
  output = {}
135
130
  if () in d:
@@ -151,31 +146,31 @@ def flatten_dict_inner(d, prefix=()):
151
146
  def flatten_dict(deep_dict: dict, prefix=()) -> dict:
152
147
  return dict(flatten_dict_inner(deep_dict, prefix))
153
148
 
154
- def group(pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
149
+ def group[T, K](pairs: Iterable[tuple[K, T]]) -> dict[K, list[T]]:
155
150
  values_by_key = {}
156
151
  for key, value in pairs:
157
152
  values_by_key.setdefault(key, []).append(value)
158
153
  return values_by_key
159
154
 
160
- def reversed_enumerate(l: list[T] | tuple[T, ...]) -> Iterable[tuple[int, T]]:
155
+ def reversed_enumerate[T](l: list[T] | tuple[T, ...]) -> Iterable[tuple[int, T]]:
161
156
  return zip(reversed(range(len(l))), reversed(l))
162
157
 
163
- def get_at(d: dict, keys: Iterable[Any], default: T) -> T:
158
+ def get_at[T](d: dict, keys: Iterable[Any], default: T) -> T:
164
159
  try:
165
160
  for key in keys:
166
161
  d = d[key]
167
162
  except KeyError:
168
163
  return default
169
- return cast(Any, d)
164
+ return as_any(d)
170
165
 
171
- def sized_partitions(values: Iterable[T], part_size: int) -> list[list[T]]:
166
+ def sized_partitions[T](values: Iterable[T], part_size: int) -> list[list[T]]:
172
167
  # "chunk"
173
168
  if not isinstance(values, list):
174
169
  values = list(values)
175
170
  num_parts = (len(values) / part_size).__ceil__()
176
171
  return [values[i * part_size:(i + 1) * part_size] for i in range(num_parts)]
177
172
 
178
- def num_partitions(values: Iterable[T], num_parts: int) -> list[list[T]]:
173
+ def num_partitions[T](values: Iterable[T], num_parts: int) -> list[list[T]]:
179
174
  if not isinstance(values, list):
180
175
  values = list(values)
181
176
  part_size = (len(values) / num_parts).__ceil__()
@@ -3,5 +3,5 @@ requires-python = ">=3.12"
3
3
 
4
4
  [[package]]
5
5
  name = "relib"
6
- version = "1.2.0"
6
+ version = "1.2.1"
7
7
  source = { editable = "." }
@@ -1,255 +0,0 @@
1
- """
2
- Fast cryptographic hash of Python objects, with a special case for fast
3
- hashing of numpy arrays.
4
- """
5
-
6
- # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
7
- # Copyright (c) 2009 Gael Varoquaux
8
- # License: BSD Style, 3 clauses.
9
-
10
- import pickle
11
- import hashlib
12
- import sys
13
- import types
14
- import struct
15
- import io
16
- import decimal
17
-
18
- Pickler = pickle._Pickler
19
- _bytes_or_unicode = (bytes, str)
20
-
21
-
22
- class _ConsistentSet(object):
23
- """ Class used to ensure the hash of Sets is preserved
24
- whatever the order of its items.
25
- """
26
- def __init__(self, set_sequence):
27
- # Forces order of elements in set to ensure consistent hash.
28
- try:
29
- # Trying first to order the set assuming the type of elements is
30
- # consistent and orderable.
31
- # This fails on python 3 when elements are unorderable
32
- # but we keep it in a try as it's faster.
33
- self._sequence = sorted(set_sequence)
34
- except (TypeError, decimal.InvalidOperation):
35
- # If elements are unorderable, sorting them using their hash.
36
- # This is slower but works in any case.
37
- self._sequence = sorted((hash(e) for e in set_sequence))
38
-
39
-
40
- class _MyHash(object):
41
- """ Class used to hash objects that won't normally pickle """
42
-
43
- def __init__(self, *args):
44
- self.args = args
45
-
46
-
47
- class Hasher(Pickler):
48
- """ A subclass of pickler, to do cryptographic hashing, rather than
49
- pickling.
50
- """
51
-
52
- def __init__(self, hash_name='md5'):
53
- self.stream = io.BytesIO()
54
- # By default we want a pickle protocol that only changes with
55
- # the major python version and not the minor one
56
- protocol = pickle.HIGHEST_PROTOCOL
57
- Pickler.__init__(self, self.stream, protocol=protocol)
58
- # Initialise the hash obj
59
- self._hash = hashlib.new(hash_name)
60
-
61
- def hash(self, obj, return_digest=True):
62
- try:
63
- self.dump(obj)
64
- except pickle.PicklingError as e:
65
- e.args += ('PicklingError while hashing %r: %r' % (obj, e),)
66
- raise
67
- dumps = self.stream.getvalue()
68
- self._hash.update(dumps)
69
- if return_digest:
70
- return self._hash.hexdigest()
71
-
72
- def save(self, obj):
73
- if isinstance(obj, (types.MethodType, type({}.pop))):
74
- # the Pickler cannot pickle instance methods; here we decompose
75
- # them into components that make them uniquely identifiable
76
- if hasattr(obj, '__func__'):
77
- func_name = obj.__func__.__name__
78
- else:
79
- func_name = obj.__name__
80
- inst = obj.__self__
81
- if type(inst) == type(pickle):
82
- obj = _MyHash(func_name, inst.__name__)
83
- elif inst is None:
84
- # type(None) or type(module) do not pickle
85
- obj = _MyHash(func_name, inst)
86
- else:
87
- cls = obj.__self__.__class__
88
- obj = _MyHash(func_name, inst, cls)
89
- Pickler.save(self, obj)
90
-
91
- def memoize(self, obj):
92
- # We want hashing to be sensitive to value instead of reference.
93
- # For example we want ['aa', 'aa'] and ['aa', 'aaZ'[:2]]
94
- # to hash to the same value and that's why we disable memoization
95
- # for strings
96
- if isinstance(obj, _bytes_or_unicode):
97
- return
98
- Pickler.memoize(self, obj)
99
-
100
- # The dispatch table of the pickler is not accessible in Python
101
- # 3, as these lines are only bugware for IPython, we skip them.
102
- def save_global(self, obj, name=None, pack=struct.pack):
103
- # We have to override this method in order to deal with objects
104
- # defined interactively in IPython that are not injected in
105
- # __main__
106
- kwargs = dict(name=name, pack=pack)
107
- if sys.version_info >= (3, 4):
108
- del kwargs['pack']
109
- try:
110
- Pickler.save_global(self, obj, **kwargs)
111
- except pickle.PicklingError:
112
- Pickler.save_global(self, obj, **kwargs)
113
- module = getattr(obj, "__module__", None)
114
- if module == '__main__':
115
- my_name = name
116
- if my_name is None:
117
- my_name = obj.__name__
118
- mod = sys.modules[module]
119
- if not hasattr(mod, my_name):
120
- # IPython doesn't inject the variables define
121
- # interactively in __main__
122
- setattr(mod, my_name, obj)
123
-
124
- dispatch = Pickler.dispatch.copy()
125
- # builtin
126
- dispatch[type(len)] = save_global
127
- # type
128
- dispatch[type(object)] = save_global
129
- # classobj
130
- dispatch[type(Pickler)] = save_global
131
- # function
132
- dispatch[type(pickle.dump)] = save_global
133
-
134
- def _batch_setitems(self, items):
135
- # forces order of keys in dict to ensure consistent hash.
136
- try:
137
- # Trying first to compare dict assuming the type of keys is
138
- # consistent and orderable.
139
- # This fails on python 3 when keys are unorderable
140
- # but we keep it in a try as it's faster.
141
- Pickler._batch_setitems(self, iter(sorted(items)))
142
- except TypeError:
143
- # If keys are unorderable, sorting them using their hash. This is
144
- # slower but works in any case.
145
- Pickler._batch_setitems(self, iter(sorted((hash(k), v)
146
- for k, v in items)))
147
-
148
- def save_set(self, set_items):
149
- # forces order of items in Set to ensure consistent hash
150
- Pickler.save(self, _ConsistentSet(set_items))
151
-
152
- dispatch[type(set())] = save_set
153
-
154
-
155
- class NumpyHasher(Hasher):
156
- """ Special case the hasher for when numpy is loaded.
157
- """
158
-
159
- def __init__(self, hash_name='md5', coerce_mmap=False):
160
- """
161
- Parameters
162
- ----------
163
- hash_name: string
164
- The hash algorithm to be used
165
- coerce_mmap: boolean
166
- Make no difference between np.memmap and np.ndarray
167
- objects.
168
- """
169
- self.coerce_mmap = coerce_mmap
170
- Hasher.__init__(self, hash_name=hash_name)
171
- # delayed import of numpy, to avoid tight coupling
172
- import numpy as np
173
- self.np = np
174
- if hasattr(np, 'getbuffer'):
175
- self._getbuffer = np.getbuffer
176
- else:
177
- self._getbuffer = memoryview
178
-
179
- def save(self, obj):
180
- """ Subclass the save method, to hash ndarray subclass, rather
181
- than pickling them. Off course, this is a total abuse of
182
- the Pickler class.
183
- """
184
- if isinstance(obj, self.np.ndarray) and not obj.dtype.hasobject:
185
- # Compute a hash of the object
186
- # The update function of the hash requires a c_contiguous buffer.
187
- if obj.shape == ():
188
- # 0d arrays need to be flattened because viewing them as bytes
189
- # raises a ValueError exception.
190
- obj_c_contiguous = obj.flatten()
191
- elif obj.flags.c_contiguous:
192
- obj_c_contiguous = obj
193
- elif obj.flags.f_contiguous:
194
- obj_c_contiguous = obj.T
195
- else:
196
- # Cater for non-single-segment arrays: this creates a
197
- # copy, and thus aleviates this issue.
198
- # XXX: There might be a more efficient way of doing this
199
- obj_c_contiguous = obj.flatten()
200
-
201
- # memoryview is not supported for some dtypes, e.g. datetime64, see
202
- # https://github.com/numpy/numpy/issues/4983. The
203
- # workaround is to view the array as bytes before
204
- # taking the memoryview.
205
- self._hash.update(
206
- self._getbuffer(obj_c_contiguous.view(self.np.uint8)))
207
-
208
- # We store the class, to be able to distinguish between
209
- # Objects with the same binary content, but different
210
- # classes.
211
- if self.coerce_mmap and isinstance(obj, self.np.memmap):
212
- # We don't make the difference between memmap and
213
- # normal ndarrays, to be able to reload previously
214
- # computed results with memmap.
215
- klass = self.np.ndarray
216
- else:
217
- klass = obj.__class__
218
- # We also return the dtype and the shape, to distinguish
219
- # different views on the same data with different dtypes.
220
-
221
- # The object will be pickled by the pickler hashed at the end.
222
- obj = (klass, ('HASHED', obj.dtype, obj.shape, obj.strides))
223
- elif isinstance(obj, self.np.dtype):
224
- # Atomic dtype objects are interned by their default constructor:
225
- # np.dtype('f8') is np.dtype('f8')
226
- # This interning is not maintained by a
227
- # pickle.loads + pickle.dumps cycle, because __reduce__
228
- # uses copy=True in the dtype constructor. This
229
- # non-deterministic behavior causes the internal memoizer
230
- # of the hasher to generate different hash values
231
- # depending on the history of the dtype object.
232
- # To prevent the hash from being sensitive to this, we use
233
- # .descr which is a full (and never interned) description of
234
- # the array dtype according to the numpy doc.
235
- klass = obj.__class__
236
- obj = (klass, ('HASHED', obj.descr))
237
- Hasher.save(self, obj)
238
-
239
-
240
- def hash(obj, hash_name='md5', coerce_mmap=False) -> str:
241
- """ Quick calculation of a hash to identify uniquely Python objects
242
- containing numpy arrays.
243
- Parameters
244
- -----------
245
- hash_name: 'md5' or 'sha1'
246
- Hashing algorithm used. sha1 is supposedly safer, but md5 is
247
- faster.
248
- coerce_mmap: boolean
249
- Make no difference between np.memmap and np.ndarray
250
- """
251
- if 'numpy' in sys.modules:
252
- hasher = NumpyHasher(hash_name=hash_name, coerce_mmap=coerce_mmap)
253
- else:
254
- hasher = Hasher(hash_name=hash_name)
255
- return hasher.hash(obj)
File without changes
File without changes
File without changes
File without changes