cdxcore 0.1.6__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cdxcore might be problematic. Click here for more details.
- cdxcore/__init__.py +1 -9
- cdxcore/config.py +1188 -521
- cdxcore/crman.py +95 -25
- cdxcore/err.py +371 -0
- cdxcore/pretty.py +468 -0
- cdxcore/pretty.py_bak.py +750 -0
- cdxcore/subdir.py +2238 -1339
- cdxcore/uniquehash.py +515 -363
- cdxcore/util.py +358 -417
- cdxcore/verbose.py +683 -248
- cdxcore/version.py +399 -140
- cdxcore-0.1.10.dist-info/METADATA +27 -0
- cdxcore-0.1.10.dist-info/RECORD +35 -0
- {cdxcore-0.1.6.dist-info → cdxcore-0.1.10.dist-info}/top_level.txt +2 -1
- docs/source/conf.py +123 -0
- tests/test_config.py +500 -0
- tests/test_crman.py +54 -0
- tests/test_err.py +86 -0
- tests/test_pretty.py +404 -0
- tests/test_subdir.py +289 -0
- tests/test_uniquehash.py +159 -144
- tests/test_util.py +122 -83
- tests/test_verbose.py +119 -0
- tests/test_version.py +153 -0
- up/git_message.py +2 -2
- cdxcore/logger.py +0 -319
- cdxcore/prettydict.py +0 -388
- cdxcore/prettyobject.py +0 -64
- cdxcore-0.1.6.dist-info/METADATA +0 -1418
- cdxcore-0.1.6.dist-info/RECORD +0 -30
- conda/conda_exists.py +0 -10
- conda/conda_modify_yaml.py +0 -42
- tests/_cdxbasics.py +0 -1086
- {cdxcore-0.1.6.dist-info → cdxcore-0.1.10.dist-info}/WHEEL +0 -0
- {cdxcore-0.1.6.dist-info → cdxcore-0.1.10.dist-info}/licenses/LICENSE +0 -0
- {cdxcore → tmp}/deferred.py +0 -0
- {cdxcore → tmp}/dynaplot.py +0 -0
- {cdxcore → tmp}/filelock.py +0 -0
- {cdxcore → tmp}/np.py +0 -0
- {cdxcore → tmp}/npio.py +0 -0
- {cdxcore → tmp}/sharedarray.py +0 -0
cdxcore/uniquehash.py
CHANGED
|
@@ -1,6 +1,54 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
Overview
|
|
3
|
+
--------
|
|
4
|
+
|
|
5
|
+
Framework for producing unique hashes for various Python elements. Hashing is key for caching strategies and managing data pipelines effectively.
|
|
6
|
+
The module contains a range of utility functions to ease implementation of pipelines and other tasks where hashes of data are required.
|
|
7
|
+
|
|
8
|
+
The functionality here follows by default important design principles which are discussed in :func:`cdxcore.uniquehash.UniqueHash.__init__`,
|
|
9
|
+
such as
|
|
10
|
+
|
|
11
|
+
* Members of objects, and elements of dictionaries which start with "_" are ignored.
|
|
12
|
+
* Member functions of objects or dictionaries are ignored.
|
|
13
|
+
* Dictionaries are assumed to be order-invariant, even though Python now
|
|
14
|
+
`maintains construction order for objects <https://docs.python.org/3/whatsnew/3.6.html#whatsnew36-compactdict>`__
|
|
15
|
+
and therefore also objects.
|
|
16
|
+
|
|
17
|
+
Example::
|
|
18
|
+
|
|
19
|
+
class A(object):
|
|
20
|
+
def __init__(self, x):
|
|
21
|
+
self.x = x
|
|
22
|
+
self._y = x*2 # protected member will not be hashed by default
|
|
23
|
+
|
|
24
|
+
from cdxcore.uniquehash import UniqueHash
|
|
25
|
+
uniqueHash = UniqueHash(length=12)
|
|
26
|
+
a = A(2)
|
|
27
|
+
print( uniqueHash(a) ) # --> "2d1dc3767730"
|
|
28
|
+
|
|
29
|
+
The module contains a few pre-defined hash functions with different hash lengths:
|
|
30
|
+
|
|
31
|
+
* :func:`cdxcore.uniquehash.unique_hash8`
|
|
32
|
+
* :func:`cdxcore.uniquehash.unique_hash16`
|
|
33
|
+
* :func:`cdxcore.uniquehash.unique_hash32`
|
|
34
|
+
* :func:`cdxcore.uniquehash.unique_hash48`
|
|
35
|
+
* :func:`cdxcore.uniquehash.unique_hash64`
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
Related functionality
|
|
39
|
+
---------------------
|
|
40
|
+
|
|
41
|
+
:func:`cdxcore.subdir.SubDir.cache` implements a lightweight versioned, hash-based caching mechanism
|
|
42
|
+
using :class:`cdxcore.uniquehash.UniqueHash`.
|
|
43
|
+
|
|
44
|
+
Import
|
|
45
|
+
------
|
|
46
|
+
.. code-block:: python
|
|
47
|
+
|
|
48
|
+
import cdxcore.uniquehash as uniquehash
|
|
49
|
+
|
|
50
|
+
Documentation
|
|
51
|
+
-------------
|
|
4
52
|
"""
|
|
5
53
|
|
|
6
54
|
import datetime as datetime
|
|
@@ -12,10 +60,16 @@ from collections import OrderedDict
|
|
|
12
60
|
import numpy as np
|
|
13
61
|
import pandas as pd
|
|
14
62
|
import struct as struct
|
|
15
|
-
from .util import
|
|
16
|
-
from .
|
|
63
|
+
from .util import is_function, DEF_FILE_NAME_MAP, fmt_filename
|
|
64
|
+
from .pretty import PrettyObject
|
|
65
|
+
from .verbose import Context
|
|
17
66
|
|
|
18
67
|
def _qual_name(x, with_mod=False):
|
|
68
|
+
"""
|
|
69
|
+
Obtain a descriptive name of qualified name and module name
|
|
70
|
+
|
|
71
|
+
:meta private:
|
|
72
|
+
"""#@private
|
|
19
73
|
q = getattr(x, '__qualname__', x.__name__)
|
|
20
74
|
if with_mod:
|
|
21
75
|
m = getattr(x, "__module__", None)
|
|
@@ -23,14 +77,140 @@ def _qual_name(x, with_mod=False):
|
|
|
23
77
|
q += "@" + m
|
|
24
78
|
return q
|
|
25
79
|
|
|
80
|
+
class DebugTrace(object):
|
|
81
|
+
"""
|
|
82
|
+
Base class for tracing hashing operations.
|
|
83
|
+
|
|
84
|
+
Use either :class:`cdxcore.uniquehash.DebugTraceCollect` or
|
|
85
|
+
:class:`cdxcore.uniquehash.DebugTraceVerbose` for debugging. The latter prints out tracing during the computation
|
|
86
|
+
of a hash, while to former collects all this information in a simplistic data structure. Note that this can be quite memory intensive.
|
|
87
|
+
"""
|
|
88
|
+
def _update( self, x, msg : str = None ):
|
|
89
|
+
""" Notify processing of `x`, with an optional process `msg`
|
|
90
|
+
:meta private:
|
|
91
|
+
"""#@private
|
|
92
|
+
raise NotImplementedError()
|
|
93
|
+
def _update_topic( self, x, msg : str = None ):
|
|
94
|
+
""" Notify processing of a topc `x` with message `msg`, and return a sub-trace context
|
|
95
|
+
:meta private:
|
|
96
|
+
"""#@private
|
|
97
|
+
raise NotImplementedError()
|
|
98
|
+
def _warning( self, msg : str):
|
|
99
|
+
""" Issue warning `msg`
|
|
100
|
+
:meta private:
|
|
101
|
+
"""#@private
|
|
102
|
+
raise NotImplementedError()
|
|
103
|
+
|
|
26
104
|
# =============================================================================
|
|
27
105
|
# Hashing
|
|
28
106
|
# =============================================================================
|
|
29
107
|
|
|
30
108
|
class UniqueHash( object ):
|
|
31
109
|
"""
|
|
32
|
-
|
|
33
|
-
|
|
110
|
+
A calculator class which computes unique hashes of a fixed length.
|
|
111
|
+
|
|
112
|
+
There are a number of parameters which control the exact semantics
|
|
113
|
+
of the hashing algorithm as it iterates through collections and objects which are are
|
|
114
|
+
discussed with :class:`cdxcore.uniquehash.UniqueHash`.
|
|
115
|
+
|
|
116
|
+
The base use case is to only specify the length of the unique ID string to be computed::
|
|
117
|
+
|
|
118
|
+
class A(object):
|
|
119
|
+
def __init__(self, x):
|
|
120
|
+
self.x = x
|
|
121
|
+
self._y = x*2 # protected member will not be hashed by default
|
|
122
|
+
|
|
123
|
+
from cdxcore.uniquehash import UniqueHash
|
|
124
|
+
uniqueHash = UniqueHash(length=12)
|
|
125
|
+
a = A(2)
|
|
126
|
+
print( uniqueHash(a) ) # --> "2d1dc3767730"
|
|
127
|
+
|
|
128
|
+
The callable ``uniquehash`` can be applied to "any" Python construct.
|
|
129
|
+
|
|
130
|
+
**Private and Protected members**
|
|
131
|
+
|
|
132
|
+
When an object is passed to this functional its members are iterated using ``__dict__`` or ``__slots__``, respectively.
|
|
133
|
+
By default this process ignores any fields in objects or dictionaries which starts with "_". The idea here is
|
|
134
|
+
that "functional" parameters are stored as members, but any derived data is stored in protected members.
|
|
135
|
+
This behaviour can be changed with `parse_underscore`.
|
|
136
|
+
|
|
137
|
+
Objects can optionally implement their own hashing scheme by implementing:
|
|
138
|
+
|
|
139
|
+
.. code-block:: python
|
|
140
|
+
|
|
141
|
+
__unique_hash__( self, uniqueHash : UniqueHash, debug_trace : DebugTrace )
|
|
142
|
+
|
|
143
|
+
This function may return a unique string, or any other non-None Python object which will then again be hashed.
|
|
144
|
+
A common use case is to ignore the parameters to this function and return a tuple of members of the class which are
|
|
145
|
+
pertinent for hashing.
|
|
146
|
+
|
|
147
|
+
**Dictionaries**
|
|
148
|
+
|
|
149
|
+
Since Python 3.6 `dictionaries preserve the order <https://docs.python.org/3/whatsnew/3.6.html#whatsnew36-compactdict>`__
|
|
150
|
+
in which they were constructed.
|
|
151
|
+
However, Python semantics remain otherwise order-invariant, i.e. ``{'x':1, 'y':2}`` tests equal to ``{'y':2',x':1}``.
|
|
152
|
+
For this reasom the default behaviour here for dictonaries is to sort them before hasing their content. This also applies
|
|
153
|
+
to objects processed via their ``__dict__``.
|
|
154
|
+
|
|
155
|
+
This can be turned off with `sort_dicts`.
|
|
156
|
+
OrderedDicts or any classes derived from them (such as :class:`cdxcore.prettydict.pdct`)
|
|
157
|
+
are processed in order and not sorted in any case.
|
|
158
|
+
|
|
159
|
+
**Functions**
|
|
160
|
+
|
|
161
|
+
By default function members of objects and dictionaries (which include @properties) are
|
|
162
|
+
ignored. You can set `parse_functions` to True to parse a reduced text of the function code.
|
|
163
|
+
There are a number of additional expert settings for handling functions, see below.
|
|
164
|
+
|
|
165
|
+
**Numpy, Pandas**
|
|
166
|
+
|
|
167
|
+
Hashing of large datasets is not advised. Use hashes on the generating parameter set instead
|
|
168
|
+
where possible.
|
|
169
|
+
|
|
170
|
+
Parameters
|
|
171
|
+
----------
|
|
172
|
+
length : int, optional
|
|
173
|
+
Intended length of the hash function. Default is ``32``.
|
|
174
|
+
|
|
175
|
+
parse_underscore : bool, optional
|
|
176
|
+
How to handle object members starting with "_".
|
|
177
|
+
|
|
178
|
+
* ``"none"`` : ignore members starting with "_" (the default).
|
|
179
|
+
* ``"protected"`` : ignore 'private' members declared starting with "_" and containing "__".
|
|
180
|
+
* ``"private"`` : consider all members.
|
|
181
|
+
|
|
182
|
+
Default is ``none``.
|
|
183
|
+
|
|
184
|
+
sort_dicts : bool, optional
|
|
185
|
+
Since Python 3.6 `dictionaries are ordered <https://docs.python.org/3/whatsnew/3.6.html#whatsnew36-compactdict>`__.
|
|
186
|
+
That means that strictly speaking
|
|
187
|
+
the two dictionaries ``{'x':1, 'y':2}`` and ``{'y':2, 'x':1}`` are not indentical;
|
|
188
|
+
however Python will sematicallly still assume they are as ``==`` between the two will return True.
|
|
189
|
+
Accordingly, by default this hash function assumes the order of dictionaries does _not_
|
|
190
|
+
matter unless the are, or are derived from, :class:`OrderedDict` (as is :class:`cdxcore.prettydict.pdct`).
|
|
191
|
+
Practically that means the function first sorts the keys of mappings before
|
|
192
|
+
hashing their items.
|
|
193
|
+
|
|
194
|
+
This can be turned off by setting `sort_dicts=False`. Default is ``True``.
|
|
195
|
+
|
|
196
|
+
parse_functions : bool, optional
|
|
197
|
+
If True, then the function will attempt to generate unique hashes for functions. Default is ``False``.
|
|
198
|
+
|
|
199
|
+
pd_ignore_column_order : bool, optional
|
|
200
|
+
(Advanced parameter).
|
|
201
|
+
Whether to ingore the order of panda columns. The default is ``True``.
|
|
202
|
+
np_nan_equal : bool, optional
|
|
203
|
+
(Advanced parameter).
|
|
204
|
+
Whether to ignore the specific type of a NaN. The default is ``False``.
|
|
205
|
+
f_include_defaults : bool, optional
|
|
206
|
+
(Advanced parameter).
|
|
207
|
+
When parsing functions whether to include default values. Default is `True``.
|
|
208
|
+
f_include_closure : bool, optional
|
|
209
|
+
(Advanced parameter).
|
|
210
|
+
When parsing functions whether to include the function colusure. This can be expensive. Default is `True``.
|
|
211
|
+
f_include_globals : bool, optional
|
|
212
|
+
(Advanced parameter).
|
|
213
|
+
When parsing functions whether to include globals used by the function. This can be expensicve. Default is ``False``.
|
|
34
214
|
"""
|
|
35
215
|
|
|
36
216
|
def __init__(self, length : int = 32, *,
|
|
@@ -45,84 +225,8 @@ class UniqueHash( object ):
|
|
|
45
225
|
f_include_globals : bool = True,
|
|
46
226
|
):
|
|
47
227
|
"""
|
|
48
|
-
Initializes
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
Private and Protected members
|
|
52
|
-
-----------------------------
|
|
53
|
-
When an object is passed to this functional its members are iterated using __dict__ or __slots__, respectively.
|
|
54
|
-
By default this process ignores any fields in objects or dictionaries which start with "_". The idea here is
|
|
55
|
-
that 'functional' parameters are stored as members, but any derived data is stored in protected members.
|
|
56
|
-
This behaviour can be chanhed with 'parse_underscore'.
|
|
57
|
-
|
|
58
|
-
Objects can optionally implement their own hashing scheme by implementing
|
|
59
|
-
|
|
60
|
-
__unique_hash__( self, uniqueHash : UniqueHash, debug_trace : DebugTrace )
|
|
61
|
-
|
|
62
|
-
This function may return a unique string, or any other non-None Python object which will then be passed to
|
|
63
|
-
UniqueHash.__call__. A common use case is to return a tuple of the members of the class which are
|
|
64
|
-
pertinent for hashing.
|
|
65
|
-
|
|
66
|
-
Dictionaries
|
|
67
|
-
------------
|
|
68
|
-
Since Python 3.7 dictionaries preserve the order in which they were constructed https://mail.python.org/pipermail/python-dev/2017-December/151283.html.
|
|
69
|
-
However, Python semantics otherwise remain order invariant, i.e. {'x':1, 'y':2} tests equal to {'y':2',x':1}.
|
|
70
|
-
For this reasom the default behaviour for dictonaries is to sort them before hasing their content
|
|
71
|
-
(also recall that objects are typicall treated via their __dict__).
|
|
72
|
-
This can be turned off with 'sort_dicts'.
|
|
73
|
-
OrderedDicts are not sorted in any case.
|
|
74
|
-
|
|
75
|
-
Functions
|
|
76
|
-
---------
|
|
77
|
-
By default function members of objects and dictionaries (which include @properties) are
|
|
78
|
-
ignored. You can set 'parse_functions' = True to parse a reduced text of the function code.
|
|
79
|
-
There are a number of expert settings for handling functions, see below.
|
|
80
|
-
|
|
81
|
-
Numpy, Pandas
|
|
82
|
-
-------------
|
|
83
|
-
Hashing of large datasets is not advised. Use hashes on the generating parameter set instead.
|
|
84
|
-
|
|
85
|
-
Implementing custom object hashing
|
|
86
|
-
----------------------------------
|
|
87
|
-
An object may implement
|
|
88
|
-
__unique_hash__( self, uniqueHashExt : UniqueHash )
|
|
89
|
-
which is passed a unique hash object which contains all current run time parameters.
|
|
90
|
-
A good use case is:
|
|
91
|
-
|
|
92
|
-
Parameters
|
|
93
|
-
----------
|
|
94
|
-
length : int
|
|
95
|
-
Intended length of the hash function.
|
|
96
|
-
parse_underscore : bool
|
|
97
|
-
How to handle object members starting with '_'.
|
|
98
|
-
* 'none' : ignore members starting with '_' (the default)
|
|
99
|
-
* 'protected' : ignore 'private' members declared starting with '_' and containing '__' (*)
|
|
100
|
-
* 'private' : consider all members
|
|
101
|
-
sort_dicts : bool
|
|
102
|
-
From python 3.7 dictionaries are ordered. That means that strictly speaking
|
|
103
|
-
the two dictionaries {'x':1, 'y':2} and {'y':2, 'x':1} are not indentical;
|
|
104
|
-
however Python will sematicallly assume they are as == between the two will return True.
|
|
105
|
-
Accordingly, this function by default sorts first the keys of mappings before
|
|
106
|
-
hashing their items. (If dictionaries are derived from OrderedDict, the function will still process those
|
|
107
|
-
in order.)
|
|
108
|
-
This can be turned off by setting sort_dicts=False.
|
|
109
|
-
parse_functions : bool
|
|
110
|
-
If True, then the function will attempt to generate
|
|
111
|
-
unique hashes for function and property objects
|
|
112
|
-
using compress_function_code
|
|
113
|
-
|
|
114
|
-
Fine tuning
|
|
115
|
-
-----------
|
|
116
|
-
pd_ignore_column_order : bool
|
|
117
|
-
Whether to ingore the order of panda columns. The default is True
|
|
118
|
-
np_nan_equal : bool
|
|
119
|
-
Whether to ignore the specific type of a NaN. The default is False.
|
|
120
|
-
f_include_defaults : bool
|
|
121
|
-
When parsing functions whether to include default values. Default is True.
|
|
122
|
-
f_include_closure : bool
|
|
123
|
-
When parsing functions whether to include the function colusure. This can be expensive. Default is True.
|
|
124
|
-
f_include_globals : bool
|
|
125
|
-
When parsing functions whether to include globals used by the function. This can be expensicve. Default is False.
|
|
228
|
+
Initializes the hash calculator which can iteratively generate hashes of a given length for arbitrary input.
|
|
229
|
+
:meta public:
|
|
126
230
|
"""
|
|
127
231
|
self.length = int(length)
|
|
128
232
|
|
|
@@ -153,37 +257,50 @@ class UniqueHash( object ):
|
|
|
153
257
|
|
|
154
258
|
@property
|
|
155
259
|
def name(self) -> str:
|
|
260
|
+
""" Returns a descriptive name of `self`. """
|
|
156
261
|
return f"uniqueHash({self.length};{self.parse_underscore},{self.sort_dicts},{self.parse_functions})"
|
|
157
262
|
|
|
158
263
|
def clone(self):
|
|
159
|
-
""" Return copy of
|
|
264
|
+
""" Return copy of `self`. """
|
|
160
265
|
return UniqueHash( **{ k:v for k,v in self.__dict__.items() if not k[:1] == "_"} )
|
|
161
266
|
|
|
162
|
-
def __call__(self, *args, debug_trace = None, **kwargs):
|
|
267
|
+
def __call__(self, *args, debug_trace : DebugTrace = None, **kwargs) -> str:
|
|
163
268
|
"""
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
For this reason this function allows tracing all hashing activity using its debug_trace
|
|
168
|
-
parameter.
|
|
269
|
+
:meta public:
|
|
270
|
+
|
|
271
|
+
Returns a unique hash for the `arg` and `kwargs` parameters passed to this function.
|
|
169
272
|
|
|
273
|
+
Example::
|
|
274
|
+
|
|
275
|
+
class A(object):
|
|
276
|
+
def __init__(self, x):
|
|
277
|
+
self.x = x
|
|
278
|
+
self._y = x*2 # protected member will not be hashed by default
|
|
279
|
+
|
|
280
|
+
from cdxcore.uniquehash import UniqueHash
|
|
281
|
+
uniqueHash = UniqueHash(12)
|
|
282
|
+
a = A(2)
|
|
283
|
+
print( uniqueHash(a) ) # --> "2d1dc3767730"
|
|
284
|
+
|
|
170
285
|
Parameters
|
|
171
286
|
----------
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
287
|
+
args, kwargs:
|
|
288
|
+
Parameters to hash.
|
|
289
|
+
|
|
290
|
+
debug_trace : :class:`cdxcore.uniquehash.DebugTrace`
|
|
291
|
+
Allows tracing of hashing activity for debugging purposes.
|
|
292
|
+
Two implementations of ``DebugTrace`` are available:
|
|
293
|
+
|
|
294
|
+
* :class:`cdxcore.uniquehash.DebugTraceVerbose` simply prints out hashing activity to stdout.
|
|
295
|
+
|
|
296
|
+
* :class:`cdxcore.uniquehash.DebugTraceCollect` collects an array of tracing information.
|
|
297
|
+
The object itself is an iterable which contains the respective tracing information
|
|
298
|
+
once the hash function has returned.
|
|
183
299
|
|
|
184
300
|
Returns
|
|
185
301
|
-------
|
|
186
|
-
|
|
302
|
+
Hash : str
|
|
303
|
+
String of at most `length`
|
|
187
304
|
"""
|
|
188
305
|
h, _ = self._mk_blake( h=self.length//2 )
|
|
189
306
|
if len(args) > 0:
|
|
@@ -197,7 +314,7 @@ class UniqueHash( object ):
|
|
|
197
314
|
|
|
198
315
|
@staticmethod
|
|
199
316
|
def _mk_blake( h ):
|
|
200
|
-
""" utility function to allow passing a hash 'h' or an 'int' """
|
|
317
|
+
""" utility function to allow passing a hash 'h' or an 'int' :meta private: """
|
|
201
318
|
if not isinstance(h, int):
|
|
202
319
|
return h, False
|
|
203
320
|
h = int(h)
|
|
@@ -234,32 +351,32 @@ class UniqueHash( object ):
|
|
|
234
351
|
assert sz==8, ("Cannot handle itemsize",sz,"for numpy generic", type(x), "with value", x)
|
|
235
352
|
x = x.view(np.int64)
|
|
236
353
|
h.update(x.tobytes())
|
|
237
|
-
if not debug_trace is None: debug_trace.
|
|
354
|
+
if not debug_trace is None: debug_trace._update( x )
|
|
238
355
|
return
|
|
239
356
|
# basic elements
|
|
240
357
|
if isinstance( x, bool ):
|
|
241
358
|
h.update( x.to_bytes(1,'little', signed=True) )
|
|
242
|
-
if not debug_trace is None: debug_trace.
|
|
359
|
+
if not debug_trace is None: debug_trace._update( x )
|
|
243
360
|
return
|
|
244
361
|
if isinstance( x, int ):
|
|
245
362
|
h.update( x.to_bytes(8,'little', signed=True) )
|
|
246
|
-
if not debug_trace is None: debug_trace.
|
|
363
|
+
if not debug_trace is None: debug_trace._update( x )
|
|
247
364
|
return
|
|
248
365
|
if isinstance( x, ( float, complex ) ):
|
|
249
366
|
h.update( struct.pack('<d', x) ) # little-endian double
|
|
250
|
-
if not debug_trace is None: debug_trace.
|
|
367
|
+
if not debug_trace is None: debug_trace._update( x )
|
|
251
368
|
return
|
|
252
369
|
if isinstance( x, bytes ):
|
|
253
370
|
h.update( x )
|
|
254
|
-
if not debug_trace is None: debug_trace.
|
|
371
|
+
if not debug_trace is None: debug_trace._update( x )
|
|
255
372
|
return
|
|
256
373
|
if isinstance( x, str ):
|
|
257
374
|
h.update( x.encode('utf-8') )
|
|
258
|
-
if not debug_trace is None: debug_trace.
|
|
375
|
+
if not debug_trace is None: debug_trace._update( x )
|
|
259
376
|
return
|
|
260
377
|
# datetime etc
|
|
261
378
|
if isinstance(x,datetime.datetime):
|
|
262
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
379
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x )
|
|
263
380
|
ts = float( x.timestamp() )
|
|
264
381
|
td = x.tzinfo.utcoffset(x) if not x.tzinfo is None else None
|
|
265
382
|
self._hash_any(h, ts, debug_trace=debug_trace)
|
|
@@ -274,7 +391,7 @@ class UniqueHash( object ):
|
|
|
274
391
|
else:
|
|
275
392
|
h.update( int(0).to_bytes(4,'little', signed=True) )
|
|
276
393
|
"""
|
|
277
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
394
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x )
|
|
278
395
|
total_seconds = float(x.hour*60*60+x.minute*60+x.second) +\
|
|
279
396
|
float(x.microsecond) / 1000000.
|
|
280
397
|
self._hash_any(h, total_seconds, debug_trace=debug_trace)
|
|
@@ -284,17 +401,17 @@ class UniqueHash( object ):
|
|
|
284
401
|
h.update( x.second.to_bytes(2,'little', signed=True) )
|
|
285
402
|
h.update( x.microsecond.to_bytes(4,'little', signed=True))
|
|
286
403
|
if not debug_trace is None:
|
|
287
|
-
debug_trace = debug_trace.
|
|
288
|
-
debug_trace.
|
|
289
|
-
debug_trace.
|
|
290
|
-
debug_trace.
|
|
291
|
-
debug_trace.
|
|
404
|
+
debug_trace = debug_trace._update_topic( x )
|
|
405
|
+
debug_trace._update( x.hour, "hour")
|
|
406
|
+
debug_trace._update( x.minute, "minute")
|
|
407
|
+
debug_trace._update( x.second, "second")
|
|
408
|
+
debug_trace._update( x.microsecond, "microsecond")
|
|
292
409
|
if not x.tzinfo is None:
|
|
293
|
-
debug_trace.
|
|
410
|
+
debug_trace._warning( "datetime.time support for tzinfo is not working well. Use datetime.datetime")
|
|
294
411
|
"""
|
|
295
412
|
return
|
|
296
413
|
if isinstance(x,datetime.date):
|
|
297
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
414
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x )
|
|
298
415
|
full = x.year * 10000 + x.month * 100 + x.day
|
|
299
416
|
self._hash_any(h, full, debug_trace=debug_trace)
|
|
300
417
|
"""
|
|
@@ -302,26 +419,26 @@ class UniqueHash( object ):
|
|
|
302
419
|
h.update( x.month.to_bytes(1,'little', signed=True) )
|
|
303
420
|
h.update( x.day.to_bytes(2,'little', signed=True) )
|
|
304
421
|
if not debug_trace is None:
|
|
305
|
-
debug_trace = debug_trace.
|
|
306
|
-
debug_trace.
|
|
307
|
-
debug_trace.
|
|
308
|
-
debug_trace.
|
|
422
|
+
debug_trace = debug_trace._update_topic( x )
|
|
423
|
+
debug_trace._update( x.year, "year" )
|
|
424
|
+
debug_trace._update( x.month, "month" )
|
|
425
|
+
debug_trace._update( x.day, "day" )
|
|
309
426
|
"""
|
|
310
427
|
return
|
|
311
428
|
if isinstance(x,datetime.timedelta):
|
|
312
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
429
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x )
|
|
313
430
|
self._hash_any(h, x.total_seconds(), debug_trace=debug_trace )
|
|
314
431
|
return
|
|
315
432
|
# functions
|
|
316
|
-
if
|
|
433
|
+
if is_function(x) or isinstance(x,property):
|
|
317
434
|
if self.parse_functions:
|
|
318
435
|
self._hash_function( h, x, debug_trace=debug_trace )
|
|
319
436
|
elif not debug_trace is None:
|
|
320
|
-
debug_trace.
|
|
437
|
+
debug_trace._warning( f"Ignored function: {x.__qualname__}")
|
|
321
438
|
return
|
|
322
439
|
# slice -> tuple
|
|
323
440
|
if isinstance(x,slice):
|
|
324
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
441
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x )
|
|
325
442
|
self._hash_any(h, (x.start,x.stop,x.step), debug_trace=debug_trace )
|
|
326
443
|
return
|
|
327
444
|
# test presence of __unique_hash__()
|
|
@@ -331,12 +448,12 @@ class UniqueHash( object ):
|
|
|
331
448
|
if isinstance(unique_hash, str):
|
|
332
449
|
h.update(unique_hash.encode('utf-8') )
|
|
333
450
|
if not debug_trace is None:
|
|
334
|
-
debug_trace = debug_trace.
|
|
335
|
-
debug_trace.
|
|
451
|
+
debug_trace = debug_trace._update_topic( x, msg="__unique_hash__ str" )
|
|
452
|
+
debug_trace._update( unique_hash )
|
|
336
453
|
return
|
|
337
|
-
debug_trace = None if debug_trace is None else debug_trace.
|
|
454
|
+
debug_trace = None if debug_trace is None else debug_trace._update_topic( x, msg="__unique_hash__ function" )
|
|
338
455
|
try:
|
|
339
|
-
unique_hash = unique_hash( self.clone(), debug_trace=debug_trace )
|
|
456
|
+
unique_hash = unique_hash( unique_hash=self.clone(), debug_trace=debug_trace )
|
|
340
457
|
except Exception as e:
|
|
341
458
|
raise type(e)( e, f"Exception encountered while calling '__unique_hash__' of object of type {type(x)}.")
|
|
342
459
|
if unique_hash is None:
|
|
@@ -344,10 +461,10 @@ class UniqueHash( object ):
|
|
|
344
461
|
if isinstance(unique_hash, str):
|
|
345
462
|
h.update(unique_hash.encode('utf-8') )
|
|
346
463
|
if not debug_trace is None:
|
|
347
|
-
debug_trace.
|
|
464
|
+
debug_trace._update( unique_hash )
|
|
348
465
|
else:
|
|
349
466
|
if not debug_trace is None:
|
|
350
|
-
debug_trace = debug_trace.
|
|
467
|
+
debug_trace = debug_trace._update_topic( unique_hash )
|
|
351
468
|
self._hash_any(h, unique_hash, debug_trace=debug_trace )
|
|
352
469
|
return
|
|
353
470
|
# numpy
|
|
@@ -367,7 +484,7 @@ class UniqueHash( object ):
|
|
|
367
484
|
# however, we here assume here that unless they are
|
|
368
485
|
# specified as ordered, we can assume that the
|
|
369
486
|
# order does not matter.
|
|
370
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
487
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x )
|
|
371
488
|
keys = sorted(x) if self.sort_dicts and not isinstance(x,OrderedDict) else list(x)
|
|
372
489
|
for k in keys:
|
|
373
490
|
if isinstance(k,str):
|
|
@@ -381,14 +498,14 @@ class UniqueHash( object ):
|
|
|
381
498
|
# lists, tuples and everything which looks like it --> lists
|
|
382
499
|
if isinstance(x, (Sequence, Iterator)):
|
|
383
500
|
assert not isinstance(x, dict)
|
|
384
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
501
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x )
|
|
385
502
|
for k in x:
|
|
386
503
|
self._hash_any(h, k, debug_trace=debug_trace)
|
|
387
504
|
return
|
|
388
505
|
# all others such as sets need sorting first
|
|
389
506
|
if isinstance(x, Collection):
|
|
390
507
|
assert not isinstance(x, dict)
|
|
391
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
508
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x )
|
|
392
509
|
x = sorted(x)
|
|
393
510
|
for k in x:
|
|
394
511
|
self._hash_any(h, k, debug_trace=debug_trace)
|
|
@@ -412,7 +529,7 @@ class UniqueHash( object ):
|
|
|
412
529
|
print(x.__dict__)
|
|
413
530
|
A().f() will print '{'_A__p': 1}' even though 'x.__p' is a private member to X.
|
|
414
531
|
"""
|
|
415
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
532
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x, "object with __dict__" )
|
|
416
533
|
self._hash_any( h, _qual_name( type(x),False), debug_trace=debug_trace)
|
|
417
534
|
x = x.__dict__
|
|
418
535
|
keys = sorted(x) if self.sort_dicts else list(x)
|
|
@@ -429,7 +546,7 @@ class UniqueHash( object ):
|
|
|
429
546
|
self._hash_any(h, x.__dict__)
|
|
430
547
|
return
|
|
431
548
|
if hasattr(x,"__slots__"):
|
|
432
|
-
if not debug_trace is None: debug_trace = debug_trace.
|
|
549
|
+
if not debug_trace is None: debug_trace = debug_trace._update_topic( x, "object with __slots__" )
|
|
433
550
|
self._hash_any( h, _qual_name( type(x),False), debug_trace=debug_trace)
|
|
434
551
|
for k in x.__slots__:
|
|
435
552
|
if isinstance(k,str):
|
|
@@ -452,7 +569,7 @@ class UniqueHash( object ):
|
|
|
452
569
|
# Builtins: best we can do is identity by module + qualname
|
|
453
570
|
ident = _qual_name(fn,False)
|
|
454
571
|
h.update( ident.encode("utf-8") )
|
|
455
|
-
if not debug_trace is None: debug_trace.
|
|
572
|
+
if not debug_trace is None: debug_trace._update( ident, "builtin function" )
|
|
456
573
|
return
|
|
457
574
|
|
|
458
575
|
if not inspect.isfunction(fn):
|
|
@@ -460,12 +577,12 @@ class UniqueHash( object ):
|
|
|
460
577
|
obj_name = _qual_name(type(fn),False)
|
|
461
578
|
h.update( obj_name.encode("utf-8") )
|
|
462
579
|
if not debug_trace is None:
|
|
463
|
-
debug_trace = debug_trace.
|
|
464
|
-
debug_trace.
|
|
580
|
+
debug_trace = debug_trace._update_topic( fn, "using __call__" )
|
|
581
|
+
debug_trace._update( obj_name )
|
|
465
582
|
return self._hash_function(h, fn.__call__, debug_trace = debug_trace )
|
|
466
583
|
raise TypeError(f"'fn' is not a function but of type {type(fn)}.")
|
|
467
584
|
|
|
468
|
-
debug_trace = None if debug_trace is None else debug_trace.
|
|
585
|
+
debug_trace = None if debug_trace is None else debug_trace._update_topic( fn )
|
|
469
586
|
func_name = _qual_name(fn,False)
|
|
470
587
|
self._hash_any( h, func_name )
|
|
471
588
|
|
|
@@ -480,25 +597,25 @@ class UniqueHash( object ):
|
|
|
480
597
|
src = [ l.replace("\t"," ").replace(" ","").replace("\n","") for l in src ]
|
|
481
598
|
self._hash_any( h, src )
|
|
482
599
|
if not debug_trace is None:
|
|
483
|
-
debug_trace.
|
|
484
|
-
debug_trace.
|
|
600
|
+
debug_trace._update( func_name )
|
|
601
|
+
debug_trace._update( src, "reduced source code")
|
|
485
602
|
del src, func_name
|
|
486
603
|
|
|
487
604
|
if self.f_include_defaults:
|
|
488
605
|
# Defaults
|
|
489
606
|
if not fn.__defaults__ is None and len(fn.__defaults__) > 0:
|
|
490
|
-
def_debug_trace = None if debug_trace is None else debug_trace.
|
|
607
|
+
def_debug_trace = None if debug_trace is None else debug_trace._update_topic( fn.__defaults__, "position defaults")
|
|
491
608
|
self._hash_any( h, fn.__defaults__, debug_trace = def_debug_trace )
|
|
492
609
|
del def_debug_trace
|
|
493
610
|
|
|
494
611
|
if not fn.__kwdefaults__ is None and len(fn.__kwdefaults__) > 0:
|
|
495
|
-
def_debug_trace = None if debug_trace is None else debug_trace.
|
|
612
|
+
def_debug_trace = None if debug_trace is None else debug_trace._update_topic(fn.__kwdefaults__, "keyword defauls")
|
|
496
613
|
self._hash_any( h, fn.__kwdefaults__, debug_trace = def_debug_trace )
|
|
497
614
|
del def_debug_trace
|
|
498
615
|
|
|
499
616
|
if self.f_include_closure and not fn.__closure__ is None and len(fn.__closure__) > 0:
|
|
500
617
|
# Closure cells (can be large; disable if that’s a concern)
|
|
501
|
-
closure_debug_trace = None if debug_trace is None else debug_trace.
|
|
618
|
+
closure_debug_trace = None if debug_trace is None else debug_trace._update_topic( fn.__closure__, "closure" )
|
|
502
619
|
for cell in fn.__closure__:
|
|
503
620
|
self._hash_any( h, cell.cell_contents, debug_trace=closure_debug_trace )
|
|
504
621
|
del closure_debug_trace
|
|
@@ -506,7 +623,7 @@ class UniqueHash( object ):
|
|
|
506
623
|
if self.f_include_globals and len(fn.__globals__) > 0 and len(fn.__code__.co_names) > 0:
|
|
507
624
|
# Referenced globals (names actually used by the code)
|
|
508
625
|
g = fn.__globals__
|
|
509
|
-
glb_debug_trace = None if debug_trace is None else debug_trace.
|
|
626
|
+
glb_debug_trace = None if debug_trace is None else debug_trace._update_topic( fn.__code__.co_names, "linked globals" )
|
|
510
627
|
for name in sorted(fn.__code__.co_names):
|
|
511
628
|
if name in g:
|
|
512
629
|
self._hash_any( h, (name, g[name]), debug_trace=glb_debug_trace )
|
|
@@ -520,14 +637,14 @@ class UniqueHash( object ):
|
|
|
520
637
|
Does not hash attributes.
|
|
521
638
|
"""
|
|
522
639
|
assert isinstance(df, pd.DataFrame), ("DataFrame expected", type(df))
|
|
523
|
-
debug_trace = None if debug_trace is None else debug_trace.
|
|
640
|
+
debug_trace = None if debug_trace is None else debug_trace._update_topic( df )
|
|
524
641
|
if self.pd_ignore_column_order:
|
|
525
642
|
df = df.reindex(sorted(df.columns), axis=1)
|
|
526
643
|
|
|
527
644
|
# hash index
|
|
528
645
|
idx_h = pd.util.hash_pandas_object(df.index, index=False, categorize=True).values
|
|
529
646
|
h.update(idx_h.tobytes())
|
|
530
|
-
if not debug_trace is None: debug_trace.
|
|
647
|
+
if not debug_trace is None: debug_trace._update( idx_h )
|
|
531
648
|
|
|
532
649
|
# hash each column’s content + its name + dtype
|
|
533
650
|
for name, col in df.items():
|
|
@@ -536,15 +653,15 @@ class UniqueHash( object ):
|
|
|
536
653
|
col_h = pd.util.hash_pandas_object(col, index=False, categorize=True).values
|
|
537
654
|
h.update(col_h.tobytes())
|
|
538
655
|
if not debug_trace is None:
|
|
539
|
-
debug_trace.
|
|
540
|
-
debug_trace.
|
|
541
|
-
debug_trace.
|
|
656
|
+
debug_trace._update( str(name) )
|
|
657
|
+
debug_trace._update( str(col.dtype) )
|
|
658
|
+
debug_trace._update( col_h )
|
|
542
659
|
|
|
543
660
|
# attrs, if any
|
|
544
661
|
attrs = getattr(df, "attrs", None)
|
|
545
662
|
if not attrs is None:
|
|
546
663
|
self._hash_any(h, attrs)
|
|
547
|
-
if not debug_trace is None: debug_trace.
|
|
664
|
+
if not debug_trace is None: debug_trace._update( attrs, "attrs" )
|
|
548
665
|
|
|
549
666
|
def _hash_numpy( self, h, a : np.ndarray, *, debug_trace = None ):
|
|
550
667
|
"""
|
|
@@ -553,7 +670,7 @@ class UniqueHash( object ):
|
|
|
553
670
|
assert isinstance(a, np.ndarray), ("ndarray expected", type(a))
|
|
554
671
|
a = np.asarray(a)
|
|
555
672
|
|
|
556
|
-
debug_trace = None if debug_trace is None else debug_trace.
|
|
673
|
+
debug_trace = None if debug_trace is None else debug_trace._update_topic( a )
|
|
557
674
|
# Disallow arbitrary Python objects (define your own encoding first)
|
|
558
675
|
if a.dtype.kind == 'O':
|
|
559
676
|
raise TypeError("object-dtype array: map elements to bytes first (e.g., via str/utf-8).")
|
|
@@ -598,35 +715,56 @@ class UniqueHash( object ):
|
|
|
598
715
|
h.update(a.dtype.str.encode('utf-8'))
|
|
599
716
|
h.update(a.tobytes())
|
|
600
717
|
if not debug_trace is None:
|
|
601
|
-
debug_trace.
|
|
602
|
-
debug_trace.
|
|
603
|
-
debug_trace.
|
|
718
|
+
debug_trace._update( a.shape )
|
|
719
|
+
debug_trace._update( a.dtype.str )
|
|
720
|
+
debug_trace._update( a.tobytes() )
|
|
604
721
|
|
|
605
722
|
# Debugging
|
|
606
723
|
# =========
|
|
607
724
|
|
|
608
|
-
class DebugTrace(object):
|
|
609
|
-
def update( self, x, msg : str = None ):
|
|
610
|
-
""" Notify processing of 'x', with an optional process 'msg' """
|
|
611
|
-
raise NotImplementedError()
|
|
612
|
-
def update_topic( self, x, msg : str = None ):
|
|
613
|
-
""" Notify and return a sub-trace context """
|
|
614
|
-
raise NotImplementedError()
|
|
615
|
-
def warning( self, msg : str):
|
|
616
|
-
""" Issue warning """
|
|
617
|
-
raise NotImplementedError()
|
|
618
|
-
|
|
619
725
|
class DebugTraceCollect(DebugTrace):
|
|
620
726
|
"""
|
|
621
|
-
|
|
727
|
+
Keep track of everything parsed during hashing.
|
|
728
|
+
|
|
729
|
+
The result of the trace is contained in :attr:`cdxcore.uniquehash.DebugTraceCollect.trace`.
|
|
730
|
+
|
|
731
|
+
Note that `DebugTraceCollect` itself implements :class:`Collection` and :class:`Sequence` semantics
|
|
732
|
+
so you can iterate it directly.
|
|
733
|
+
|
|
734
|
+
Parameters
|
|
735
|
+
----------
|
|
736
|
+
tostr: int
|
|
737
|
+
If set to a positive integer, then any object encountered will be represented as a string with :func:`repr`,
|
|
738
|
+
and the length of the string will be limited to `tostr`. This avoids generation of large amounts
|
|
739
|
+
of data if the objects hashed are large (e.g. numpy arrays).
|
|
740
|
+
|
|
741
|
+
If set to ``None`` then the function collects the actual elements.
|
|
622
742
|
"""
|
|
623
743
|
def __init__(self, tostr : int = None ):
|
|
624
|
-
"""
|
|
744
|
+
"""
|
|
745
|
+
Initialize data collection
|
|
746
|
+
"""
|
|
625
747
|
if tostr and tostr<=0: raise ValueError("'tostr' must be None or a positive integer")
|
|
626
748
|
self.tostr = tostr
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
749
|
+
|
|
750
|
+
#: Trace of the hashing operation.
|
|
751
|
+
#: Upon completion of :meth:`cdxcore.uniquehash.UniqueHash.__call__` this list contains
|
|
752
|
+
#: elemenets of the following type:
|
|
753
|
+
#:
|
|
754
|
+
#: * if `tostr` is a positive integer:
|
|
755
|
+
#: * `typex`: type of the element
|
|
756
|
+
#: * `reprx`: `repr` of the element, up to `tostr` length.
|
|
757
|
+
#: * `msg`: message occured during hashing if any
|
|
758
|
+
#: * `child`: if the element was a container or object
|
|
759
|
+
#:
|
|
760
|
+
#: * if `tostr` is ``None``:
|
|
761
|
+
#: * `x`: the element
|
|
762
|
+
#: * `msg`: message occured during hashing if any
|
|
763
|
+
#: * `child`: if the element was a container or object
|
|
764
|
+
self.trace = []
|
|
765
|
+
def _mupdate( self, x, msg, child ):
|
|
766
|
+
""" Notify processing of 'x', with an optional process 'msg'
|
|
767
|
+
:meta private: """#@private
|
|
630
768
|
if self.tostr:
|
|
631
769
|
y = PrettyObject( typex = type(x),
|
|
632
770
|
reprx = repr(x)[:self.tostr],
|
|
@@ -637,17 +775,20 @@ class DebugTraceCollect(DebugTrace):
|
|
|
637
775
|
msg = msg,
|
|
638
776
|
child = child )
|
|
639
777
|
self.trace.append( y )
|
|
640
|
-
def
|
|
641
|
-
""" Notify processing of
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
778
|
+
def _update( self, x, msg : str = None ):
|
|
779
|
+
""" Notify processing of `x`, with an optional process `msg`
|
|
780
|
+
:meta private: """#@private
|
|
781
|
+
self._mupdate( x=x, msg=msg, child=None )
|
|
782
|
+
def _update_topic( self, x, msg : str = None ):
|
|
783
|
+
""" Notify and return a sub-trace context
|
|
784
|
+
:meta private: """#@private
|
|
645
785
|
child = DebugTraceCollect(tostr=self.tostr)
|
|
646
|
-
self.
|
|
786
|
+
self._mupdate( x=x, msg=msg, child=child )
|
|
647
787
|
return child
|
|
648
|
-
def
|
|
649
|
-
""" Issue warning
|
|
650
|
-
|
|
788
|
+
def _warning( self, msg : str):
|
|
789
|
+
""" Issue warning
|
|
790
|
+
:meta private: """
|
|
791
|
+
self._mupdate( x=None, msg=msg, child=None )
|
|
651
792
|
|
|
652
793
|
# results
|
|
653
794
|
# -------
|
|
@@ -666,27 +807,32 @@ class DebugTraceCollect(DebugTrace):
|
|
|
666
807
|
|
|
667
808
|
class DebugTraceVerbose(DebugTrace):
|
|
668
809
|
"""
|
|
669
|
-
Live printing of tracing information with
|
|
810
|
+
Live printing of tracing information with :class:`cdxcore.verbose.Context`.
|
|
670
811
|
for some formatting. All objects will be reported by type and
|
|
671
812
|
their string representation, sufficiently reduced if necessary.
|
|
813
|
+
|
|
814
|
+
Parameters
|
|
815
|
+
----------
|
|
816
|
+
strsize : int, optional
|
|
817
|
+
Maximum string size when using :func:`repr` on reported objects.
|
|
818
|
+
Default is ``50``.
|
|
819
|
+
|
|
820
|
+
verbose : :class:`cdxcore.verbose.Context`, optional
|
|
821
|
+
Context object or ``None`` for a new context object
|
|
822
|
+
with full visibility (it prints everything).
|
|
672
823
|
"""
|
|
673
|
-
def __init__(self, strsize : int = 50, verbose = None ):
|
|
824
|
+
def __init__(self, strsize : int = 50, verbose : Context = None ):
|
|
674
825
|
"""
|
|
675
826
|
Initialize tracer.
|
|
676
|
-
|
|
677
|
-
Parameters
|
|
678
|
-
----------
|
|
679
|
-
strsize : int
|
|
680
|
-
Maximum string size when using repr() on reported objects.
|
|
681
|
-
verbose :
|
|
682
|
-
Context object or None for a new context object.
|
|
683
827
|
"""
|
|
684
828
|
from .verbose import Context
|
|
685
829
|
if strsize<=3: ValueError("'strsize' must exceed 3")
|
|
686
830
|
self.strsize = strsize
|
|
687
831
|
self.verbose = Context("all") if verbose is None else verbose
|
|
688
|
-
def
|
|
689
|
-
""" Notify processing of 'x', with an optional process 'msg'
|
|
832
|
+
def _update( self, x, msg : str = None ):
|
|
833
|
+
""" Notify processing of 'x', with an optional process 'msg'
|
|
834
|
+
:meta private:
|
|
835
|
+
"""#@private
|
|
690
836
|
xstr = repr(x)
|
|
691
837
|
if xstr[:1] == "'" and xstr[-1] == "'":
|
|
692
838
|
xstr = xstr[1:-1]
|
|
@@ -696,72 +842,85 @@ class DebugTraceVerbose(DebugTrace):
|
|
|
696
842
|
self.verbose.write( f"{type(x).__name__}: '{xstr}'" )
|
|
697
843
|
else:
|
|
698
844
|
self.verbose.write( f"{msg} {type(x).__name__}: '{xstr}'" )
|
|
699
|
-
def
|
|
700
|
-
""" Notify and return a sub-trace context
|
|
701
|
-
|
|
845
|
+
def _update_topic( self, x, msg : str = None ):
|
|
846
|
+
""" Notify and return a sub-trace context
|
|
847
|
+
:meta private:
|
|
848
|
+
"""#@private
|
|
849
|
+
self._update( x, msg )
|
|
702
850
|
return DebugTraceVerbose( self.strsize, self.verbose(1) )
|
|
703
|
-
def
|
|
704
|
-
""" Issue warning
|
|
851
|
+
def _warning( self, msg : str):
|
|
852
|
+
""" Issue warning
|
|
853
|
+
:meta private:"""#@private
|
|
705
854
|
self.verbose.write( msg )
|
|
706
855
|
|
|
707
856
|
# =============================================================================
|
|
708
857
|
# Utility wrappers
|
|
709
858
|
# =============================================================================
|
|
710
859
|
|
|
711
|
-
def
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
separator : str = ' ',
|
|
718
|
-
filename_by : str = None,
|
|
719
|
-
**unique_hash_arguments
|
|
720
|
-
):
|
|
860
|
+
def NamedUniqueHash( max_length : int = 60,
|
|
861
|
+
id_length : int = 16, *,
|
|
862
|
+
separator : str = ' ',
|
|
863
|
+
filename_by : str = None,
|
|
864
|
+
**unique_hash_arguments
|
|
865
|
+
) -> Callable:
|
|
721
866
|
"""
|
|
722
|
-
|
|
867
|
+
Generate user-readable unique hashes and filenames.
|
|
723
868
|
|
|
724
|
-
|
|
869
|
+
Returns a function::
|
|
870
|
+
|
|
871
|
+
f( label, *args, **kwargs )
|
|
872
|
+
|
|
873
|
+
which generates unique strings of at most a length of `max_length` of the format ``label + separator + ID``
|
|
874
|
+
where ID has length `id_length`. Since `label` heads the resulting string this function is suited for
|
|
875
|
+
use cases where a user might want an indication what a hash refers to.
|
|
876
|
+
|
|
877
|
+
This function does not suppose that `label` is unqiue, hence the ID is prioritized.
|
|
878
|
+
See :func:`cdxcore.uniquehash.UniqueLabel` for a function which assumes the label is unique.
|
|
725
879
|
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
where ID has length id_length.
|
|
880
|
+
The maximum length of the returned string is `max_length`; if need be `label` will be truncated:
|
|
881
|
+
the returned string will always end in `ID`.
|
|
729
882
|
|
|
730
|
-
The
|
|
731
|
-
|
|
883
|
+
The function optionally makes sure that the returned string is a valid file name using
|
|
884
|
+
:func:`cdxcore.util.fmt_filename`.
|
|
732
885
|
|
|
733
|
-
|
|
734
|
-
See uniqueLabelExt() for a function which assumes the label is unique.
|
|
886
|
+
**Short Cut**
|
|
735
887
|
|
|
736
|
-
|
|
888
|
+
Consider :func:`cdxcore.verbose.named_unique_filename48_8` if the defaults used
|
|
889
|
+
for that function are suitable for your use case.
|
|
737
890
|
|
|
738
|
-
Important
|
|
739
|
-
|
|
740
|
-
It is strongly recommended to read the documentation for
|
|
891
|
+
**Important**
|
|
892
|
+
|
|
893
|
+
It is *strongly recommended* to read the documentation for
|
|
894
|
+
:class:`cdxcore.uniquehash.UniqueHash` for details on hashing logic
|
|
741
895
|
and the available parameters
|
|
742
896
|
|
|
743
897
|
Parameters
|
|
744
898
|
----------
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
899
|
+
max_length : int, optional
|
|
900
|
+
Total length of the returned string including the ID.
|
|
901
|
+
Defaults to ``60`` to allow file names with extensions of up to three letters.
|
|
902
|
+
|
|
903
|
+
id_length : int, optional
|
|
904
|
+
Intended length of the hash `ID`, default ``16``.
|
|
905
|
+
|
|
906
|
+
separator : str, optional
|
|
907
|
+
Separator between `label` and `id_length`.
|
|
908
|
+
Note that the separator will be included in the ID calculation, hence different separators
|
|
909
|
+
lead to different IDs. Default ``' '``.
|
|
910
|
+
|
|
911
|
+
filename_by : str, optional
|
|
912
|
+
If not ``None``, use :class:`cdxcore.util.fmt_filename` with ``by=filename_by`` to ensure the returned string is a valid
|
|
913
|
+
filename for both windows and linux, of at most `max_length` size.
|
|
914
|
+
If set to the string ``default``, use :data:`cdxcore.util.DEF_FILE_NAME_MAP`
|
|
915
|
+
as the default mapping for :func:`cdxcore.util.fmt_filename`.
|
|
916
|
+
|
|
917
|
+
** unique_hash_arguments, optional
|
|
918
|
+
Parameters passed to :class:`cdxcore.uniquehash.UniqueHash`.
|
|
760
919
|
|
|
761
920
|
Returns
|
|
762
921
|
-------
|
|
763
|
-
|
|
764
|
-
|
|
922
|
+
uniqueHash : :class:`Callable`
|
|
923
|
+
hash function with signature ``(label, *args, **kwargs)``.
|
|
765
924
|
"""
|
|
766
925
|
if id_length < 4: raise ValueError("'id_length' must be at least 4. Found {id_length}")
|
|
767
926
|
if id_length > max_length: raise ValueError(f"'max_length' must not be less than 'id_length'. Founb {max_length} and {id_length}, respectivelty")
|
|
@@ -786,48 +945,63 @@ def namedUniqueHashExt( max_length : int = 60,
|
|
|
786
945
|
return label
|
|
787
946
|
return named_unique_hash
|
|
788
947
|
|
|
789
|
-
def
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
948
|
+
def UniqueLabel( max_length : int = 60,
|
|
949
|
+
id_length : int = 8,
|
|
950
|
+
separator : str = ' ',
|
|
951
|
+
filename_by : str = None ) -> Callable:
|
|
793
952
|
"""
|
|
794
|
-
Returns a function
|
|
795
|
-
|
|
953
|
+
Returns a function::
|
|
954
|
+
|
|
796
955
|
f( unique_label )
|
|
956
|
+
|
|
957
|
+
which generates strings of at most ``max_length``
|
|
958
|
+
based on a provided ``unique_label``; essentially::
|
|
797
959
|
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
where
|
|
960
|
+
If len(unique_label) <= max_length:
|
|
961
|
+
unique_label
|
|
962
|
+
else:
|
|
963
|
+
unique_label + separator + ID
|
|
964
|
+
|
|
965
|
+
where ``ID`` is a unqiue hash computed from ``unique_label`` of maximum length ``id_length``.
|
|
804
966
|
|
|
805
|
-
This function assumes that
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
967
|
+
This function assumes that ``unique_label`` is unique, hence the ID is dropped if ``unique_label``
|
|
968
|
+
is less than ``max_length``.
|
|
969
|
+
Use :func:`cdxcore.uniquehash.NamedUniqueHash` if the label is not unique, and which therefore always appends the
|
|
970
|
+
dynamically calculated unique ID.
|
|
971
|
+
|
|
972
|
+
Note that if ``filename_by`` conversion is used, then this function will always attach the unique ID
|
|
973
|
+
to the filename because
|
|
974
|
+
after the conversion of the label to a filename it is no longer guaranteed that the result is unique.
|
|
975
|
+
If your label is unique as a filename, do not
|
|
976
|
+
use ``filename_by``. The function will return valid file names if ``label`` is a valid file name.
|
|
810
977
|
|
|
811
978
|
Parameters
|
|
812
979
|
----------
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
980
|
+
max_length : int
|
|
981
|
+
Total length of the returned string including the ID.
|
|
982
|
+
Defaults to 60 to allow file names with extensions with three letters.
|
|
983
|
+
|
|
984
|
+
id_length : int
|
|
985
|
+
Indicative length of the hash function, default 8.
|
|
986
|
+
id_length will be reduced to `max_length` if neccessary.
|
|
987
|
+
|
|
988
|
+
separator : str
|
|
989
|
+
Separator between the label and the unique ID.
|
|
990
|
+
|
|
991
|
+
Note that the separator will be included in the ID calculation, hence different separators
|
|
992
|
+
lead to different IDs.
|
|
993
|
+
|
|
994
|
+
filename_by : str
|
|
995
|
+
If not ``None``, use :func:`cdxcore.util.fmt_filename` with ``by=filename_by``
|
|
996
|
+
to ensure the returned string is a valid
|
|
997
|
+
filename for both windows and linux, of at most ``max_length`` size.
|
|
998
|
+
If set to the string ``"default"``, :data:`cdxcore.util.DEF_FILE_NAME_MAP`
|
|
999
|
+
as the default mapping for :func:`cdxcore.util.fmt_filename`.
|
|
827
1000
|
|
|
828
1001
|
Returns
|
|
829
1002
|
-------
|
|
830
|
-
|
|
1003
|
+
Hash function : :class:`Callable`
|
|
1004
|
+
Hash function with signature ``(unique_label)``.
|
|
831
1005
|
"""
|
|
832
1006
|
if id_length < 4: raise ValueError("'id_length' must be at least 4. Found {id_length}")
|
|
833
1007
|
if id_length > max_length: raise ValueError(f"'max_length' must not be less than 'id_length'. Founb {max_length} and {id_length}, respectivelty")
|
|
@@ -861,110 +1035,88 @@ def uniqueLabelExt( max_length : int = 60,
|
|
|
861
1035
|
# Short cuts
|
|
862
1036
|
# =============================================================================
|
|
863
1037
|
|
|
864
|
-
def
|
|
1038
|
+
def unique_hash8( *args, **kwargs ) -> str:
|
|
865
1039
|
"""
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
4) Functions and properties are ignored (*)
|
|
875
|
-
(*) you can create a hash function with different behaviour by using uniqueHashExt()
|
|
876
|
-
|
|
877
|
-
To support hashing directly in one of your objects, implement
|
|
878
|
-
|
|
879
|
-
__unique_hash__( length : int, parse_functions : bool, parse_underscore : str )
|
|
880
|
-
|
|
881
|
-
The parameters are the same as for uniqueHashExt.
|
|
882
|
-
The function is expected to return a hashable object, ideally a string.
|
|
1040
|
+
Short-cut for the hash function returned by :class:`cdxcore.uniquehash.UniqueHash`
|
|
1041
|
+
with parameter ``length=8``.
|
|
1042
|
+
|
|
1043
|
+
*Important* please make sure you aware of the functional considerations
|
|
1044
|
+
discussed in :class:`cdxcore.uniquehash.UniqueHash` around
|
|
1045
|
+
elements starting with `_` or function members.
|
|
1046
|
+
|
|
1047
|
+
:meta private:
|
|
883
1048
|
"""
|
|
884
|
-
return UniqueHash(8)(*args,**
|
|
1049
|
+
return UniqueHash(8)(*args,**kwargs)
|
|
885
1050
|
|
|
886
|
-
def
|
|
1051
|
+
def unique_hash16( *args, **kwargs ) -> str:
|
|
887
1052
|
"""
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
3) Members with leading '_' are ignored (*)
|
|
895
|
-
4) Functions and properties are ignored (*)
|
|
896
|
-
(*) you can create a hash function with different behaviour by using uniqueHashExt()
|
|
897
|
-
|
|
898
|
-
To support hashing directly in one of your objects, implement
|
|
899
|
-
|
|
900
|
-
__unique_hash__( length : int, parse_functions : bool, parse_underscore : str )
|
|
901
|
-
|
|
902
|
-
The parameters are the same as for uniqueHashExt.
|
|
903
|
-
The function is expected to return a hashable object, ideally a string.
|
|
1053
|
+
Short-cut for the hash function returned by :class:`cdxcore.uniquehash.UniqueHash`
|
|
1054
|
+
with parameter ``length=16``.
|
|
1055
|
+
|
|
1056
|
+
*Important* please make sure you aware of the functional considerations
|
|
1057
|
+
discussed in :class:`cdxcore.uniquehash.UniqueHash` around
|
|
1058
|
+
elements starting with `_` or function members.
|
|
904
1059
|
"""
|
|
905
|
-
return UniqueHash(16)(*args,**
|
|
1060
|
+
return UniqueHash(16)(*args,**kwargs)
|
|
906
1061
|
|
|
907
|
-
def
|
|
1062
|
+
def unique_hash32( *args, **kwargs ) -> str:
|
|
908
1063
|
"""
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
3) Members with leading '_' are ignored (*)
|
|
916
|
-
4) Functions and properties are ignored (*)
|
|
917
|
-
(*) you can create a hash function with different behaviour by using uniqueHashExt()
|
|
918
|
-
|
|
919
|
-
To support hashing directly in one of your objects, implement
|
|
920
|
-
|
|
921
|
-
__unique_hash__( length : int, parse_functions : bool, parse_underscore : str )
|
|
922
|
-
|
|
923
|
-
The parameters are the same as for uniqueHashExt.
|
|
924
|
-
The function is expected to return a hashable object, ideally a string.
|
|
1064
|
+
Short-cut for the hash function returned by :class:`cdxcore.uniquehash.UniqueHash`
|
|
1065
|
+
with parameter ``length=32``.
|
|
1066
|
+
|
|
1067
|
+
*Important* please make sure you aware of the functional considerations
|
|
1068
|
+
discussed in :class:`cdxcore.uniquehash.UniqueHash` around
|
|
1069
|
+
elements starting with `_` or function members.
|
|
925
1070
|
"""
|
|
926
|
-
return UniqueHash(32)(*args,**
|
|
1071
|
+
return UniqueHash(32)(*args,**kwargs)
|
|
927
1072
|
|
|
928
|
-
|
|
1073
|
+
def unique_hash48( *args, **kwargs ) -> str:
|
|
1074
|
+
"""
|
|
1075
|
+
Short-cut for the hash function returned by :class:`cdxcore.uniquehash.UniqueHash`
|
|
1076
|
+
with parameter ``length=48``.
|
|
1077
|
+
|
|
1078
|
+
*Important* please make sure you aware of the functional considerations
|
|
1079
|
+
discussed in :class:`cdxcore.uniquehash.UniqueHash` around
|
|
1080
|
+
elements starting with `_` or function members.
|
|
1081
|
+
"""
|
|
1082
|
+
return UniqueHash(48)(*args,**kwargs)
|
|
929
1083
|
|
|
930
|
-
def
|
|
1084
|
+
def unique_hash64( *args, **kwargs ) -> str:
|
|
931
1085
|
"""
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
3) Members with leading '_' are ignored (*)
|
|
939
|
-
4) Functions and properties are ignored (*)
|
|
940
|
-
(*) you can create a hash function with different behaviour by using uniqueHashExt()
|
|
941
|
-
|
|
942
|
-
To support hashing directly in one of your objects, implement
|
|
943
|
-
|
|
944
|
-
__unique_hash__( length : int, parse_functions : bool, parse_underscore : str )
|
|
945
|
-
|
|
946
|
-
The parameters are the same as for uniqueHashExt.
|
|
947
|
-
The function is expected to return a hashable object, ideally a string.
|
|
1086
|
+
Short-cut for the hash function returned by :class:`cdxcore.uniquehash.UniqueHash`
|
|
1087
|
+
with parameter ``length=64``.
|
|
1088
|
+
|
|
1089
|
+
*Important* please make sure you aware of the functional considerations
|
|
1090
|
+
discussed in :class:`cdxcore.uniquehash.UniqueHash` around
|
|
1091
|
+
elements starting with `_` or function members.
|
|
948
1092
|
"""
|
|
949
|
-
return UniqueHash(
|
|
1093
|
+
return UniqueHash(64)(*args,**kwargs)
|
|
950
1094
|
|
|
951
|
-
def
|
|
1095
|
+
def named_unique_filename48_8( label : str, *args, **kwargs ) -> str:
|
|
952
1096
|
"""
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
1097
|
+
Returns a unique and valid filename which is composed of `label` and a unique ID
|
|
1098
|
+
computed using all of `label`, `args`, and `kwargs`.
|
|
1099
|
+
|
|
1100
|
+
Consider a use cases where an experiment defined by ``definition``
|
|
1101
|
+
has produced ``results`` which we wish to :mod:`pickle` to disk.
|
|
1102
|
+
Assume further that ``str(definition)`` provides an
|
|
1103
|
+
informative user-readable but
|
|
1104
|
+
not necessarily unique description of ``definition``.
|
|
1105
|
+
|
|
1106
|
+
Pseudo-Code::
|
|
1107
|
+
|
|
1108
|
+
def store_experiment( num : int, definition : object, results : object ):
|
|
1109
|
+
label = f"Experiment {str(definition)}"
|
|
1110
|
+
filename = named_unique_hash48_8( label, (num, definition) )
|
|
1111
|
+
with open(filename, "wb") as f:
|
|
1112
|
+
pickle.dumps(results)
|
|
1113
|
+
|
|
1114
|
+
This is the hash function returned by :class:`cdxcore.uniquehash.NamedUniqueHash`
|
|
1115
|
+
with parameters ``max_length=48, id_length=8, filename_by="default"``.
|
|
1116
|
+
|
|
1117
|
+
*Important* please make sure you aware of the functional considerations
|
|
1118
|
+
discussed in :class:`cdxcore.uniquehash.UniqueHash` around
|
|
1119
|
+
elements starting with `_` or function members.
|
|
969
1120
|
"""
|
|
970
|
-
return
|
|
1121
|
+
return NamedUniqueHash( max_length=48, id_length=8, filename_by="default" )
|
|
1122
|
+
|