cdxcore 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cdxcore might be problematic. Click here for more details.
- cdxcore/__init__.py +15 -0
- cdxcore/config.py +1633 -0
- cdxcore/crman.py +105 -0
- cdxcore/deferred.py +220 -0
- cdxcore/dynaplot.py +1155 -0
- cdxcore/filelock.py +430 -0
- cdxcore/jcpool.py +411 -0
- cdxcore/logger.py +319 -0
- cdxcore/np.py +1098 -0
- cdxcore/npio.py +270 -0
- cdxcore/prettydict.py +388 -0
- cdxcore/prettyobject.py +64 -0
- cdxcore/sharedarray.py +285 -0
- cdxcore/subdir.py +2963 -0
- cdxcore/uniquehash.py +970 -0
- cdxcore/util.py +1041 -0
- cdxcore/verbose.py +403 -0
- cdxcore/version.py +402 -0
- cdxcore-0.1.5.dist-info/METADATA +1418 -0
- cdxcore-0.1.5.dist-info/RECORD +30 -0
- cdxcore-0.1.5.dist-info/WHEEL +5 -0
- cdxcore-0.1.5.dist-info/licenses/LICENSE +21 -0
- cdxcore-0.1.5.dist-info/top_level.txt +4 -0
- conda/conda_exists.py +10 -0
- conda/conda_modify_yaml.py +42 -0
- tests/_cdxbasics.py +1086 -0
- tests/test_uniquehash.py +469 -0
- tests/test_util.py +329 -0
- up/git_message.py +7 -0
- up/pip_modify_setup.py +55 -0
cdxcore/subdir.py
ADDED
|
@@ -0,0 +1,2963 @@
|
|
|
1
|
+
"""
|
|
2
|
+
subdir
|
|
3
|
+
Simple class to keep track of directory sturctures and for automated caching on disk
|
|
4
|
+
Hans Buehler 2020
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import os.path
|
|
10
|
+
import uuid
|
|
11
|
+
import threading
|
|
12
|
+
import pickle
|
|
13
|
+
import tempfile
|
|
14
|
+
import shutil
|
|
15
|
+
import datetime
|
|
16
|
+
import inspect
|
|
17
|
+
from collections import OrderedDict
|
|
18
|
+
from collections.abc import Collection, Mapping, Callable
|
|
19
|
+
from enum import Enum
|
|
20
|
+
import json as json
|
|
21
|
+
import platform as platform
|
|
22
|
+
from functools import update_wrapper
|
|
23
|
+
import warnings as warnings
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
import jsonpickle as jsonpickle
|
|
27
|
+
import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
28
|
+
import zlib as zlib
|
|
29
|
+
import gzip as gzip
|
|
30
|
+
import blosc as blosc
|
|
31
|
+
|
|
32
|
+
from .prettydict import pdct
|
|
33
|
+
from .verbose import Context
|
|
34
|
+
from .version import Version, version as version_decorator
|
|
35
|
+
from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP, fmt as txtfmt, plain
|
|
36
|
+
from .uniquehash import uniqueHash48, uniqueLabelExt, namedUniqueHashExt
|
|
37
|
+
|
|
38
|
+
def error( text, *args, exception = RuntimeError, **kwargs ):
|
|
39
|
+
raise exception( txtfmt(text, *args, **kwargs) )
|
|
40
|
+
def verify( cond, text, *args, exception = RuntimeError, **kwargs ):
|
|
41
|
+
if not cond:
|
|
42
|
+
error( text, *args, **kwargs, exception=exception )
|
|
43
|
+
def warn( text, *args, warning=warnings.RuntimeWarning, stack_level=1, **kwargs ):
|
|
44
|
+
warnings.warn( txtfmt(text, *args, **kwargs), warning, stack_level=stack_level )
|
|
45
|
+
|
|
46
|
+
"""
|
|
47
|
+
compression
|
|
48
|
+
"""
|
|
49
|
+
jsonpickle_numpy.register_handlers()
|
|
50
|
+
BLOSC_MAX_BLOCK = 2147483631
|
|
51
|
+
BLOSC_MAX_USE = 1147400000 # ... blosc really cannot handle large files
|
|
52
|
+
|
|
53
|
+
"""
|
|
54
|
+
Hashing
|
|
55
|
+
"""
|
|
56
|
+
uniqueFileName48 = uniqueHash48
|
|
57
|
+
uniqueNamedFileName48_16 = namedUniqueHashExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
|
|
58
|
+
uniqueLabelledFileName48_16 = uniqueLabelExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
|
|
59
|
+
|
|
60
|
+
def _remove_trailing( path ):
|
|
61
|
+
if len(path) > 0:
|
|
62
|
+
if path[-1] in ['/' or '\\']:
|
|
63
|
+
return _remove_trailing(path[:-1])
|
|
64
|
+
return path
|
|
65
|
+
|
|
66
|
+
class Format(Enum):
|
|
67
|
+
""" File formats for SubDir """
|
|
68
|
+
PICKLE = 0
|
|
69
|
+
JSON_PICKLE = 1
|
|
70
|
+
JSON_PLAIN = 2
|
|
71
|
+
BLOSC = 3
|
|
72
|
+
GZIP = 4
|
|
73
|
+
|
|
74
|
+
PICKLE = Format.PICKLE
|
|
75
|
+
JSON_PICKLE = Format.JSON_PICKLE
|
|
76
|
+
JSON_PLAIN = Format.JSON_PLAIN
|
|
77
|
+
BLOSC = Format.BLOSC
|
|
78
|
+
GZIP = Format.GZIP
|
|
79
|
+
|
|
80
|
+
"""
|
|
81
|
+
Use the following for config calls:
|
|
82
|
+
format = subdir.mkFormat( config("format", "pickle", subdir.FORMAT_NAMES, "File format") )
|
|
83
|
+
"""
|
|
84
|
+
FORMAT_NAMES = [ s.lower() for s in Format.__members__ ]
|
|
85
|
+
def mkFormat( name ):
|
|
86
|
+
if not name in FORMAT_NAMES:
|
|
87
|
+
raise LookupError(f"Unknown format name '{name}'. Must be one of: {fmt_list(name)}")
|
|
88
|
+
return Format[name.upper()]
|
|
89
|
+
|
|
90
|
+
class CacheMode(object):
|
|
91
|
+
"""
|
|
92
|
+
CacheMode
|
|
93
|
+
A class which encodes standard behaviour of a caching strategy:
|
|
94
|
+
|
|
95
|
+
on gen off update clear readonly
|
|
96
|
+
load cache from disk if exists x x - - - x
|
|
97
|
+
write updates to disk x x - x - -
|
|
98
|
+
delete existing object - - - - x -
|
|
99
|
+
delete existing object if incompatible x - - x x -
|
|
100
|
+
|
|
101
|
+
See cdxbasics.subdir for functions to manage files.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
ON = "on"
|
|
105
|
+
GEN = "gen"
|
|
106
|
+
OFF = "off"
|
|
107
|
+
UPDATE = "update"
|
|
108
|
+
CLEAR = "clear"
|
|
109
|
+
READONLY = "readonly"
|
|
110
|
+
|
|
111
|
+
MODES = [ ON, GEN, OFF, UPDATE, CLEAR, READONLY ]
|
|
112
|
+
HELP = "'on' for standard caching; 'gen' for caching but keep existing incompatible files; 'off' to turn off; 'update' to overwrite any existing cache; 'clear' to clear existing caches; 'readonly' to read existing caches but not write new ones"
|
|
113
|
+
|
|
114
|
+
def __init__(self, mode : str = None ):
|
|
115
|
+
"""
|
|
116
|
+
Encodes standard behaviour of a caching strategy:
|
|
117
|
+
|
|
118
|
+
on gen off update clear readonly
|
|
119
|
+
load upon start from disk if exists x x - - - x
|
|
120
|
+
write updates to disk x x - x - -
|
|
121
|
+
delete existing object upon start - - - - x -
|
|
122
|
+
delete existing object if incompatible x - - x x -
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
mode : str
|
|
127
|
+
Which mode to use.
|
|
128
|
+
"""
|
|
129
|
+
if isinstance( mode, CacheMode ):
|
|
130
|
+
return# id copy constuctor
|
|
131
|
+
mode = self.ON if mode is None else mode
|
|
132
|
+
self.mode = mode.mode if isinstance(mode, CacheMode) else str(mode)
|
|
133
|
+
if not self.mode in self.MODES:
|
|
134
|
+
raise KeyError( self.mode, "Caching mode must be 'on', 'off', 'update', 'clear', or 'readonly'. Found " + self.mode )
|
|
135
|
+
self._read = self.mode in [self.ON, self.READONLY, self.GEN]
|
|
136
|
+
self._write = self.mode in [self.ON, self.UPDATE, self.GEN]
|
|
137
|
+
self._delete = self.mode in [self.UPDATE, self.CLEAR]
|
|
138
|
+
self._del_in = self.mode in [self.UPDATE, self.CLEAR, self.ON]
|
|
139
|
+
|
|
140
|
+
def __new__(cls, *kargs, **kwargs):
|
|
141
|
+
""" Copy constructor """
|
|
142
|
+
if len(kargs) == 1 and len(kwargs) == 0 and isinstance( kargs[0], CacheMode):
|
|
143
|
+
return kargs[0]
|
|
144
|
+
return super().__new__(cls)
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def read(self) -> bool:
|
|
148
|
+
""" Whether to load any existing data when starting """
|
|
149
|
+
return self._read
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def write(self) -> bool:
|
|
153
|
+
""" Whether to write cache data to disk """
|
|
154
|
+
return self._write
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def delete(self) -> bool:
|
|
158
|
+
""" Whether to delete existing data """
|
|
159
|
+
return self._delete
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def del_incomp(self) -> bool:
|
|
163
|
+
""" Whether to delete existing data if it is not compatible """
|
|
164
|
+
return self._del_in
|
|
165
|
+
|
|
166
|
+
def __str__(self) -> str:# NOQA
|
|
167
|
+
return self.mode
|
|
168
|
+
def __repr__(self) -> str:# NOQA
|
|
169
|
+
return self.mode
|
|
170
|
+
|
|
171
|
+
def __eq__(self, other) -> bool:# NOQA
|
|
172
|
+
return self.mode == other
|
|
173
|
+
def __neq__(self, other) -> bool:# NOQA
|
|
174
|
+
return self.mode != other
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def is_off(self) -> bool:
|
|
178
|
+
""" Whether this cache mode is OFF """
|
|
179
|
+
return self.mode == self.OFF
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def is_on(self) -> bool:
|
|
183
|
+
""" Whether this cache mode is ON """
|
|
184
|
+
return self.mode == self.ON
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def is_gen(self) -> bool:
|
|
188
|
+
""" Whether this cache mode is GEN """
|
|
189
|
+
return self.mode == self.GEN
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def is_update(self) -> bool:
|
|
193
|
+
""" Whether this cache mode is UPDATE """
|
|
194
|
+
return self.mode == self.UPDATE
|
|
195
|
+
|
|
196
|
+
@property
|
|
197
|
+
def is_clear(self) -> bool:
|
|
198
|
+
""" Whether this cache mode is CLEAR """
|
|
199
|
+
return self.mode == self.CLEAR
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def is_readonly(self) -> bool:
|
|
203
|
+
""" Whether this cache mode is READONLY """
|
|
204
|
+
return self.mode == self.READONLY
|
|
205
|
+
|
|
206
|
+
class CacheController( object ):
|
|
207
|
+
"""
|
|
208
|
+
Central control for versioning.
|
|
209
|
+
Enabes to to turn on/off caching, debugging and tracks all versions
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
def __init__(self, *,
|
|
213
|
+
exclude_arg_types : list[type] = [Context],
|
|
214
|
+
cache_mode : CacheMode = CacheMode.ON,
|
|
215
|
+
max_filename_length: int = 48,
|
|
216
|
+
hash_length : int = 16,
|
|
217
|
+
debug_verbose : Context = None,
|
|
218
|
+
keep_last_arguments: bool = False
|
|
219
|
+
):
|
|
220
|
+
"""
|
|
221
|
+
Background parameters to control caching
|
|
222
|
+
|
|
223
|
+
Parameters
|
|
224
|
+
----------
|
|
225
|
+
exclude_arg_types :
|
|
226
|
+
List of types to exclude from producing unique ids from function arguments. Defaults to [SubDir, Context]
|
|
227
|
+
cache_mode :
|
|
228
|
+
Top level cache control. Set to "OFF" to turn off all caching. Default is "ON"
|
|
229
|
+
max_filename_length :
|
|
230
|
+
Maximum filename length. If unique id's exceed the file name a hash of length 'hash_length' will be intergated into the file name.
|
|
231
|
+
See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
|
|
232
|
+
hash_length :
|
|
233
|
+
Length of the hash used to make sure each filename is unique
|
|
234
|
+
See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
|
|
235
|
+
debug_verbose :
|
|
236
|
+
If non-None print caching process messages to this object.
|
|
237
|
+
keep_last_arguments :
|
|
238
|
+
keep a dictionary of all parameters as string representations after each function call.
|
|
239
|
+
If the function F was decorated using SubDir.cache(), you can access this information via
|
|
240
|
+
F.cache_info.last_arguments
|
|
241
|
+
Note that strings are limited to 100 characters per argument to avoid memory
|
|
242
|
+
overload when large objects are passed.
|
|
243
|
+
"""
|
|
244
|
+
max_filename_length = int(max_filename_length)
|
|
245
|
+
hash_length = int(hash_length)
|
|
246
|
+
assert max_filename_length>0, ("'max_filename_length' must be positive")
|
|
247
|
+
assert hash_length>0 and hash_length<=max_filename_length, ("'hash_length' must be positive and at most 'max_filename_length'")
|
|
248
|
+
assert max_filename_length>=hash_length, ("'hash_length' must not exceed 'max_filename_length")
|
|
249
|
+
self.cache_mode = CacheMode(cache_mode if not cache_mode is None else CacheMode.ON)
|
|
250
|
+
self.debug_verbose = debug_verbose
|
|
251
|
+
self.exclude_arg_types = set(exclude_arg_types) if not exclude_arg_types is None else None
|
|
252
|
+
self.versioned = pdct() # list
|
|
253
|
+
self.uniqueNamedFileName = namedUniqueHashExt(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
|
|
254
|
+
self.uniqueLabelledFileName = uniqueLabelExt(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
|
|
255
|
+
self.keep_last_arguments = keep_last_arguments
|
|
256
|
+
|
|
257
|
+
default_cacheController = CacheController()
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class CacheTracker(object):
|
|
261
|
+
"""
|
|
262
|
+
Utility class to track caching and be able to delete all dependent objects
|
|
263
|
+
|
|
264
|
+
"""
|
|
265
|
+
def __init__(self):
|
|
266
|
+
""" track cache files """
|
|
267
|
+
self._files = []
|
|
268
|
+
def __iadd__(self, new_file):
|
|
269
|
+
""" Add a new file to the tracker """
|
|
270
|
+
self._files.append( new_file )
|
|
271
|
+
def delete_cache_files(self):
|
|
272
|
+
""" Delete all tracked files """
|
|
273
|
+
for file in self._files:
|
|
274
|
+
if os.path.exists(file):
|
|
275
|
+
os.remove(file)
|
|
276
|
+
self._files = []
|
|
277
|
+
def __str__(self) -> str:#NOQA
|
|
278
|
+
return f"Tracked: {self._files}"
|
|
279
|
+
def __repr__(self) -> str:#NOQA
|
|
280
|
+
return f"Tracked: {self._files}"
|
|
281
|
+
|
|
282
|
+
class InitCacheInfo(object):
|
|
283
|
+
pass
|
|
284
|
+
|
|
285
|
+
class CacheInfo(object):
|
|
286
|
+
pass
|
|
287
|
+
|
|
288
|
+
# SubDir
|
|
289
|
+
# ======
|
|
290
|
+
|
|
291
|
+
class SubDir(object):
|
|
292
|
+
"""
|
|
293
|
+
SubDir implements a transparent interface for storing data in files, with a common extension.
|
|
294
|
+
The generic pattern is:
|
|
295
|
+
|
|
296
|
+
1) create a root 'parentDir':
|
|
297
|
+
Absolute: parentDir = SubDir("C:/temp/root")
|
|
298
|
+
In system temp directory: parentDir = SubDir("!/root")
|
|
299
|
+
In user directory: parentDir = SubDir("~/root")
|
|
300
|
+
Relative to current directory: parentDir = SubDir("./root")
|
|
301
|
+
|
|
302
|
+
2) Use SubDirs to transparently create hierachies of stored data:
|
|
303
|
+
assume f() will want to store some data:
|
|
304
|
+
|
|
305
|
+
def f(parentDir, ...):
|
|
306
|
+
|
|
307
|
+
subDir = parentDir('subdir') <-- note that the call () operator is overloaded: if a second argument is provided, the directory will try to read the respective file.
|
|
308
|
+
or
|
|
309
|
+
subDir = SubDir('subdir', parentDir)
|
|
310
|
+
:
|
|
311
|
+
:
|
|
312
|
+
Write data:
|
|
313
|
+
|
|
314
|
+
subDir['item1'] = item1 <-- dictionary style
|
|
315
|
+
subDir.item2 = item2 <-- member style
|
|
316
|
+
subDir.write('item3',item3) <-- explicit
|
|
317
|
+
|
|
318
|
+
Note that write() can write to multiple files at the same time.
|
|
319
|
+
|
|
320
|
+
3) Reading is similar
|
|
321
|
+
|
|
322
|
+
def readF(parentDir,...):
|
|
323
|
+
|
|
324
|
+
subDir = parentDir('subdir')
|
|
325
|
+
|
|
326
|
+
item = subDir('item', 'i1') <-- returns 'i1' if not found.
|
|
327
|
+
item = subdir.read('item') <-- returns None if not found
|
|
328
|
+
item = subdir.read('item','i2') <-- returns 'i2' if not found
|
|
329
|
+
item = subDir['item'] <-- throws a KeyError if not found
|
|
330
|
+
item = subDir.item <-- throws an AttributeError if not found
|
|
331
|
+
|
|
332
|
+
4) Treating data like dictionaries
|
|
333
|
+
|
|
334
|
+
def scanF(parentDir,...)
|
|
335
|
+
|
|
336
|
+
subDir = parentDir('f')
|
|
337
|
+
|
|
338
|
+
for item in subDir:
|
|
339
|
+
data = subDir[item]
|
|
340
|
+
|
|
341
|
+
Delete items:
|
|
342
|
+
|
|
343
|
+
del subDir['item'] <-- silently fails if 'item' does not exist
|
|
344
|
+
del subDir.item <-- silently fails if 'item' does not exist
|
|
345
|
+
subDir.delete('item') <-- silently fails if 'item' does not exist
|
|
346
|
+
subDir.delete('item', True) <-- throw a KeyError if 'item' does not exit
|
|
347
|
+
|
|
348
|
+
5) Cleaning up
|
|
349
|
+
|
|
350
|
+
parentDir.deleteAllContent() <-- silently deletes all files and sub directories.
|
|
351
|
+
|
|
352
|
+
6) As of version 0.2.59 subdir supports json file formats. Those can be controlled with the 'fmt' keyword in various functions.
|
|
353
|
+
The most straightfoward way is to specify the format of the directory itself:
|
|
354
|
+
|
|
355
|
+
subdir = SubDir("!/.test", fmt=SubDir.JSON_PICKLE)
|
|
356
|
+
|
|
357
|
+
The following formats are supported:
|
|
358
|
+
|
|
359
|
+
SubDir.PICKLE:
|
|
360
|
+
Use pickle
|
|
361
|
+
SubDir.JSON_PLAIN:
|
|
362
|
+
Uses cdxbasics.util.plain() to convert data into plain Python objects and writes
|
|
363
|
+
this to disk as text. Loading back such files will result in plain Python objects,
|
|
364
|
+
but *not* the original objects
|
|
365
|
+
SubDir.JSON_PICKLE:
|
|
366
|
+
Uses the jsonpickle package to load/write data in somewhat readable text formats.
|
|
367
|
+
Data can be loaded back from such a file, but files may not be readable (e.g. numpy arrays
|
|
368
|
+
are written in compressed form).
|
|
369
|
+
SubDir.BLOSC:
|
|
370
|
+
Uses https://www.blosc.org/python-blosc/ to compress data on-the-fly.
|
|
371
|
+
BLOSC is much faster than GZIP or ZLIB but is limited to 2GB data, sadly.
|
|
372
|
+
SubDir.ZLIB:
|
|
373
|
+
Uses https://docs.python.org/3/library/zlib.html to compress data on-the-fly
|
|
374
|
+
using, essentially, GZIP.
|
|
375
|
+
|
|
376
|
+
Summary of properties:
|
|
377
|
+
|
|
378
|
+
| Restores objects | Human readable | Speed | Compression
|
|
379
|
+
PICKLE | yes | no | high | no
|
|
380
|
+
JSON_PLAIN | no | yes | low | no
|
|
381
|
+
JSON_PICKLE | yes | limited | low | no
|
|
382
|
+
BLOSC | yes | no | high | yes
|
|
383
|
+
GZIP | yes | no | high | yes
|
|
384
|
+
|
|
385
|
+
Several other operations are supported; see help()
|
|
386
|
+
|
|
387
|
+
Hans Buehler May 2020
|
|
388
|
+
"""
|
|
389
|
+
|
|
390
|
+
class __RETURN_SUB_DIRECTORY(object):
|
|
391
|
+
pass
|
|
392
|
+
|
|
393
|
+
Format = Format
|
|
394
|
+
PICKLE = Format.PICKLE
|
|
395
|
+
JSON_PICKLE = Format.JSON_PICKLE
|
|
396
|
+
JSON_PLAIN = Format.JSON_PLAIN
|
|
397
|
+
BLOSC = Format.BLOSC
|
|
398
|
+
GZIP = Format.GZIP
|
|
399
|
+
|
|
400
|
+
DEFAULT_RAISE_ON_ERROR = False
|
|
401
|
+
RETURN_SUB_DIRECTORY = __RETURN_SUB_DIRECTORY
|
|
402
|
+
DEFAULT_FORMAT = Format.PICKLE
|
|
403
|
+
DEFAULT_CREATE_DIRECTORY = False # legacy behaviour so that self.path is a valid path
|
|
404
|
+
EXT_FMT_AUTO = "*"
|
|
405
|
+
|
|
406
|
+
MAX_VERSION_BINARY_LEN = 128
|
|
407
|
+
|
|
408
|
+
VER_NORMAL = 0
|
|
409
|
+
VER_CHECK = 1
|
|
410
|
+
VER_RETURN = 2
|
|
411
|
+
|
|
412
|
+
def __init__(self, name : str,
|
|
413
|
+
parent = None, *,
|
|
414
|
+
ext : str = None,
|
|
415
|
+
fmt : Format = None,
|
|
416
|
+
eraseEverything : bool = False,
|
|
417
|
+
createDirectory : bool = None,
|
|
418
|
+
cacheController : CacheController = None
|
|
419
|
+
):
|
|
420
|
+
"""
|
|
421
|
+
Instantiates a sub directory which contains pickle files with a common extension.
|
|
422
|
+
By default the directory is created.
|
|
423
|
+
|
|
424
|
+
Absolute directories
|
|
425
|
+
sd = SubDir("!/subdir") - relative to system temp directory
|
|
426
|
+
sd = SubDir("~/subdir") - relative to user home directory
|
|
427
|
+
sd = SubDir("./subdir") - relative to current working directory (explicit)
|
|
428
|
+
sd = SubDir("subdir") - relative to current working directory (implicit)
|
|
429
|
+
sd = SubDir("/tmp/subdir") - absolute path (linux)
|
|
430
|
+
sd = SubDir("C:/temp/subdir") - absolute path (windows)
|
|
431
|
+
Short-cut
|
|
432
|
+
sd = SubDir("") - current working directory
|
|
433
|
+
|
|
434
|
+
It is often desired that the user specifies a sub-directory name under some common parent directory.
|
|
435
|
+
You can create sub directories if you provide a 'parent' directory:
|
|
436
|
+
sd2 = SubDir("subdir2", parent=sd) - relative to other sub directory
|
|
437
|
+
sd2 = sd("subdir2") - using call operator
|
|
438
|
+
Works with strings, too:
|
|
439
|
+
sd2 = SubDir("subdir2", parent="~/my_config") - relative to ~/my_config
|
|
440
|
+
|
|
441
|
+
All files managed by SubDir will have the same extension.
|
|
442
|
+
The extension can be specified with 'ext', or as part of the directory string:
|
|
443
|
+
sd = SubDir("~/subdir;*.bin") - set extension to 'bin'
|
|
444
|
+
|
|
445
|
+
COPY CONSTRUCTION
|
|
446
|
+
This function also allows copy construction and constrution from a repr() string.
|
|
447
|
+
|
|
448
|
+
HANDLING KEYS
|
|
449
|
+
SubDirs allows reading data using the item and attribute notation, i.e. we may use
|
|
450
|
+
sd = SubDir("~/subdir")
|
|
451
|
+
x = sd.x
|
|
452
|
+
y = sd['y']
|
|
453
|
+
If the respective keys are not found, exceptions are thrown.
|
|
454
|
+
|
|
455
|
+
NONE OBJECTS
|
|
456
|
+
It is possible to set the directory name to 'None'. In this case the directory will behave as if:
|
|
457
|
+
No files exist
|
|
458
|
+
Writing fails with a EOFError.
|
|
459
|
+
|
|
460
|
+
Parameters
|
|
461
|
+
----------
|
|
462
|
+
name - Name of the directory.
|
|
463
|
+
'.' for current directory
|
|
464
|
+
'~' for home directory
|
|
465
|
+
'!' for system default temp directory
|
|
466
|
+
May contain a formatting string for defining 'ext' on the fly:
|
|
467
|
+
Use "!/test;*.bin" to specify 'test' in the system temp directory as root directory with extension 'bin'
|
|
468
|
+
Can be set to None, see above.
|
|
469
|
+
parent - Parent directory. If provided, will also set defaults for 'ext' and 'raiseOnError'
|
|
470
|
+
ext - standard file extenson for data files. All files will share the same extension.
|
|
471
|
+
If None, use the parent extension, or if that is not specified use an extension depending on 'fmt':
|
|
472
|
+
'pck' for the default PICKLE format
|
|
473
|
+
'json' for JSON_PLAIN
|
|
474
|
+
'jpck' for JSON_PICKLE
|
|
475
|
+
Set to "" to turn off managing extensions.
|
|
476
|
+
fmt - format, current pickle or json
|
|
477
|
+
eraseEverything - delete all contents in the newly defined subdir
|
|
478
|
+
createDirectory - whether to create the directory.
|
|
479
|
+
Otherwise it will be created upon first write().
|
|
480
|
+
Set to None to use the setting of the parent directory
|
|
481
|
+
"""
|
|
482
|
+
createDirectory = bool(createDirectory) if not createDirectory is None else None
|
|
483
|
+
|
|
484
|
+
# copy constructor support
|
|
485
|
+
if isinstance(name, SubDir):
|
|
486
|
+
assert parent is None, "Internal error: copy construction does not accept 'parent' keyword"
|
|
487
|
+
self._path = name._path
|
|
488
|
+
self._ext = name._ext if ext is None else ext
|
|
489
|
+
self._fmt = name._fmt if fmt is None else fmt
|
|
490
|
+
self._crt = name._crt if createDirectory is None else createDirectory
|
|
491
|
+
self._cctrl = name._cctrl if cacheController is None else cacheController
|
|
492
|
+
if eraseEverything: raise ValueError( "Cannot use 'eraseEverything' when cloning a directory")
|
|
493
|
+
return
|
|
494
|
+
|
|
495
|
+
# reconstruction from a dictionary
|
|
496
|
+
if isinstance(name, Mapping):
|
|
497
|
+
assert parent is None, "Internal error: dictionary construction does not accept 'parent keyword"
|
|
498
|
+
self._path = name['_path']
|
|
499
|
+
self._ext = name['_ext'] if ext is None else ext
|
|
500
|
+
self._fmt = name['_fmt'] if fmt is None else fmt
|
|
501
|
+
self._crt = name['_crt'] if createDirectory is None else createDirectory
|
|
502
|
+
self._cctrl = name['_cctrl'] if cacheController is None else cacheController
|
|
503
|
+
if eraseEverything: raise ValueError( "Cannot use 'eraseEverything' when cloning a directory")
|
|
504
|
+
return
|
|
505
|
+
|
|
506
|
+
# parent
|
|
507
|
+
if isinstance(parent, str):
|
|
508
|
+
parent = SubDir( parent, ext=ext, fmt=fmt, createDirectory=createDirectory, cacheController=cacheController )
|
|
509
|
+
if not parent is None and not isinstance(parent, SubDir):
|
|
510
|
+
raise ValueError( "'parent' must be SubDir, str, or None. Found object of type '{type(parent)}'")
|
|
511
|
+
|
|
512
|
+
# operational flags
|
|
513
|
+
_name = name if not name is None else "(none)"
|
|
514
|
+
|
|
515
|
+
# format
|
|
516
|
+
if fmt is None:
|
|
517
|
+
assert parent is None or not parent._fmt is None
|
|
518
|
+
self._fmt = parent._fmt if not parent is None else self.DEFAULT_FORMAT
|
|
519
|
+
assert not self._fmt is None
|
|
520
|
+
else:
|
|
521
|
+
self._fmt = fmt
|
|
522
|
+
assert not self._fmt is None
|
|
523
|
+
|
|
524
|
+
# extension
|
|
525
|
+
if not name is None:
|
|
526
|
+
if not isinstance(name, str): raise ValueError( txtfmt("'name' must be string. Found object of type %s", type(name) ))
|
|
527
|
+
name = name.replace('\\','/')
|
|
528
|
+
|
|
529
|
+
# avoid windows file names on Linux
|
|
530
|
+
if platform.system() != "Windows" and name[1:3] == ":/":
|
|
531
|
+
raise ValueError( txtfmt("Detected use of windows-style drive declaration %s in path %s.", name[:3], name ))
|
|
532
|
+
|
|
533
|
+
# extract extension information
|
|
534
|
+
ext_i = name.find(";*.")
|
|
535
|
+
if ext_i >= 0:
|
|
536
|
+
_ext = name[ext_i+3:]
|
|
537
|
+
if not ext is None and ext != _ext:
|
|
538
|
+
raise ValueError( txtfmt("Canot specify an extension both in the name string ('%s') and as 'ext' ('%s')", _name, ext))
|
|
539
|
+
ext = _ext
|
|
540
|
+
name = name[:ext_i]
|
|
541
|
+
if ext is None:
|
|
542
|
+
self._ext = self.EXT_FMT_AUTO if parent is None else parent._ext
|
|
543
|
+
else:
|
|
544
|
+
self._ext = SubDir._extract_ext(ext)
|
|
545
|
+
|
|
546
|
+
# createDirectory
|
|
547
|
+
if createDirectory is None:
|
|
548
|
+
self._crt = self.DEFAULT_CREATE_DIRECTORY if parent is None else parent._crt
|
|
549
|
+
else:
|
|
550
|
+
self._crt = bool(createDirectory)
|
|
551
|
+
|
|
552
|
+
# cache controller
|
|
553
|
+
assert type(cacheController).__name__ == CacheController.__name__, ("'cacheController' should be of type 'CacheController'", type(cacheController))
|
|
554
|
+
self._cctrl = cacheController
|
|
555
|
+
|
|
556
|
+
# name
|
|
557
|
+
if name is None:
|
|
558
|
+
if not parent is None and not parent._path is None:
|
|
559
|
+
name = parent._path[:-1]
|
|
560
|
+
else:
|
|
561
|
+
# expand name
|
|
562
|
+
name = _remove_trailing(name)
|
|
563
|
+
if name == "" and parent is None:
|
|
564
|
+
name = "."
|
|
565
|
+
if name[:1] in ['!', '~'] or name[:2] == "./" or name == ".":
|
|
566
|
+
if len(name) > 1 and name[1] != '/':
|
|
567
|
+
raise ValueError( txtfmt("If 'name' starts with '%s', then the second character must be '/' (or '\\' on windows). Found 'name' set to '%s'", name[:1], _name ))
|
|
568
|
+
if name[0] == '!':
|
|
569
|
+
name = SubDir.tempDir()[:-1] + name[1:]
|
|
570
|
+
elif name[0] == ".":
|
|
571
|
+
name = SubDir.workingDir()[:-1] + name[1:]
|
|
572
|
+
else:
|
|
573
|
+
assert name[0] == "~", ("Internal error", name[0] )
|
|
574
|
+
name = SubDir.userDir()[:-1] + name[1:]
|
|
575
|
+
elif name == "..":
|
|
576
|
+
error("Cannot use name '..'")
|
|
577
|
+
elif not parent is None:
|
|
578
|
+
# path relative to 'parent'
|
|
579
|
+
if not parent.is_none:
|
|
580
|
+
name = os.path.join( parent._path, name )
|
|
581
|
+
|
|
582
|
+
# create directory/clean up
|
|
583
|
+
if name is None:
|
|
584
|
+
self._path = None
|
|
585
|
+
else:
|
|
586
|
+
# expand path
|
|
587
|
+
self._path = os.path.abspath(name) + '/'
|
|
588
|
+
self._path = self._path.replace('\\','/')
|
|
589
|
+
|
|
590
|
+
if eraseEverything:
|
|
591
|
+
self.eraseEverything(keepDirectory=self._crt)
|
|
592
|
+
if self._crt:
|
|
593
|
+
self.createDirectory()
|
|
594
|
+
|
|
595
|
+
@staticmethod
|
|
596
|
+
def expandStandardRoot( name ):
|
|
597
|
+
"""
|
|
598
|
+
Expands 'name' by a standardized root directory if provided:
|
|
599
|
+
If 'name' starts with -> return
|
|
600
|
+
! -> tempDir()
|
|
601
|
+
. -> workingDir()
|
|
602
|
+
~ -> userDir()
|
|
603
|
+
"""
|
|
604
|
+
if len(name) < 2 or name[0] not in ['.','!','~'] or name[1] not in ["\\","/"]:
|
|
605
|
+
return name
|
|
606
|
+
if name[0] == '!':
|
|
607
|
+
return SubDir.tempDir() + name[2:]
|
|
608
|
+
elif name[0] == ".":
|
|
609
|
+
return SubDir.workingDir() + name[2:]
|
|
610
|
+
else:
|
|
611
|
+
return SubDir.userDir() + name[2:]
|
|
612
|
+
|
|
613
|
+
def createDirectory( self ):
|
|
614
|
+
"""
|
|
615
|
+
Creates the directory if it doesn't exist yet.
|
|
616
|
+
Does not do anything if is_none.
|
|
617
|
+
"""
|
|
618
|
+
# create directory/clean up
|
|
619
|
+
if self._path is None:
|
|
620
|
+
return
|
|
621
|
+
# create directory
|
|
622
|
+
if not os.path.exists( self._path[:-1] ):
|
|
623
|
+
try:
|
|
624
|
+
os.makedirs( self._path[:-1] )
|
|
625
|
+
return
|
|
626
|
+
except FileExistsError:
|
|
627
|
+
pass
|
|
628
|
+
if not os.path.isdir(self._path[:-1]):
|
|
629
|
+
raise NotADirectoryError(txtfmt( "Cannot use sub directory %s: object exists but is not a directory", self._path[:-1] ))
|
|
630
|
+
|
|
631
|
+
def pathExists(self) -> bool:
|
|
632
|
+
""" Returns True if the current directory exists """
|
|
633
|
+
return os.path.exists( self._path[:-1] ) if not self._path is None else False
|
|
634
|
+
|
|
635
|
+
# -- a few basic properties --
|
|
636
|
+
|
|
637
|
+
def __str__(self) -> str: # NOQA
|
|
638
|
+
if self._path is None: return "(none)"
|
|
639
|
+
ext = self.ext
|
|
640
|
+
return self._path if len(ext) == 0 else self._path + ";*" + ext
|
|
641
|
+
|
|
642
|
+
def __repr__(self) -> str: # NOQA
|
|
643
|
+
if self._path is None: return "SubDir(None)"
|
|
644
|
+
return "SubDir(%s)" % self.__str__()
|
|
645
|
+
|
|
646
|
+
def __eq__(self, other) -> bool: # NOQA
|
|
647
|
+
""" Tests equality between to SubDirs, or between a SubDir and a directory """
|
|
648
|
+
if isinstance(other,str):
|
|
649
|
+
return self._path == other
|
|
650
|
+
verify( isinstance(other,SubDir), "Cannot compare SubDir to object of type '%s'", type(other).__name__, exception=TypeError )
|
|
651
|
+
return self._path == other._path and self._ext == other._ext and self._fmt == other._fmt
|
|
652
|
+
|
|
653
|
+
def __bool__(self) -> bool:
|
|
654
|
+
""" Returns True if 'self' is set, or False if 'self' is a None directory """
|
|
655
|
+
return not self.is_none
|
|
656
|
+
|
|
657
|
+
def __hash__(self) -> str: #NOQA
|
|
658
|
+
return hash( (self._path, self._ext, self._fmt) )
|
|
659
|
+
|
|
660
|
+
@property
|
|
661
|
+
def is_none(self) -> bool:
|
|
662
|
+
""" Whether this object is 'None' or not """
|
|
663
|
+
return self._path is None
|
|
664
|
+
|
|
665
|
+
@property
|
|
666
|
+
def path(self) -> str:
|
|
667
|
+
"""
|
|
668
|
+
Return current path, including trailing '/'
|
|
669
|
+
Note that the path may not exist yet. If this is required, consider using existing_path
|
|
670
|
+
"""
|
|
671
|
+
return self._path
|
|
672
|
+
|
|
673
|
+
@property
|
|
674
|
+
def existing_path(self) -> str:
|
|
675
|
+
"""
|
|
676
|
+
Return current path, including training '/'.
|
|
677
|
+
In addition to self.path this property ensures that the directory structure exists (or raises an exception)
|
|
678
|
+
"""
|
|
679
|
+
self.createDirectory()
|
|
680
|
+
return self.path
|
|
681
|
+
|
|
682
|
+
@property
|
|
683
|
+
def fmt(self) -> Format:
|
|
684
|
+
""" Returns current format """
|
|
685
|
+
return self._fmt
|
|
686
|
+
|
|
687
|
+
@property
|
|
688
|
+
def ext(self) -> str:
|
|
689
|
+
"""
|
|
690
|
+
Returns the common extension of the files in this directory, including leading '.'
|
|
691
|
+
Resolves '*' into the extension associated with the current format.
|
|
692
|
+
"""
|
|
693
|
+
return self._ext if self._ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
|
|
694
|
+
|
|
695
|
+
def autoExt( self, ext : str = None ) -> str:
|
|
696
|
+
"""
|
|
697
|
+
Computes the effective extension based on inputs 'ext', defaulting to the SubDir's extension.
|
|
698
|
+
Resolves '*' into the extension associated with the specified format.
|
|
699
|
+
This function allows setting 'ext' also as a Format.
|
|
700
|
+
|
|
701
|
+
Returns the extension with leading '.'
|
|
702
|
+
"""
|
|
703
|
+
if isinstance(ext, Format):
|
|
704
|
+
return self._auto_ext(ext)
|
|
705
|
+
else:
|
|
706
|
+
ext = self._ext if ext is None else SubDir._extract_ext(ext)
|
|
707
|
+
return ext if ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
|
|
708
|
+
|
|
709
|
+
def autoExtFmt( self, *, ext : str = None, fmt : Format = None ) -> str:
|
|
710
|
+
"""
|
|
711
|
+
Computes the effective extension and format based on inputs 'ext' and 'fmt', each of which defaults to the SubDir's current settings.
|
|
712
|
+
Resolves '*' into the extension associated with the specified format.
|
|
713
|
+
This function allows setting 'ext' also as a Format.
|
|
714
|
+
|
|
715
|
+
Returns (ext, fmt) where 'ext' contains the leading '.'
|
|
716
|
+
"""
|
|
717
|
+
if isinstance(ext, Format):
|
|
718
|
+
verify( fmt is None or fmt == ext, "If 'ext' is a Format, then 'fmt' must match 'ext' or be None. Found '%s' and '%s', respectively.", ext, fmt, exception=ValueError )
|
|
719
|
+
return self._auto_ext(ext), ext
|
|
720
|
+
|
|
721
|
+
fmt = fmt if not fmt is None else self._fmt
|
|
722
|
+
ext = self._ext if ext is None else SubDir._extract_ext(ext)
|
|
723
|
+
ext = ext if ext != self.EXT_FMT_AUTO else self._auto_ext(fmt)
|
|
724
|
+
return ext, fmt
|
|
725
|
+
|
|
726
|
+
@property
|
|
727
|
+
def cacheController(self):
|
|
728
|
+
""" Returns an assigned CacheController, or None """
|
|
729
|
+
return self._cctrl if not self._cctrl is None else default_cacheController
|
|
730
|
+
|
|
731
|
+
# -- static helpers --
|
|
732
|
+
|
|
733
|
+
@staticmethod
|
|
734
|
+
def _auto_ext( fmt : Format ) -> str:
|
|
735
|
+
""" Default extension for a given format, including leading '.' """
|
|
736
|
+
if fmt == Format.PICKLE:
|
|
737
|
+
return ".pck"
|
|
738
|
+
if fmt == Format.JSON_PLAIN:
|
|
739
|
+
return ".json"
|
|
740
|
+
if fmt == Format.JSON_PICKLE:
|
|
741
|
+
return ".jpck"
|
|
742
|
+
if fmt == Format.BLOSC:
|
|
743
|
+
return ".zbsc"
|
|
744
|
+
if fmt == Format.GZIP:
|
|
745
|
+
return ".pgz"
|
|
746
|
+
error("Unknown format '%s'", str(fmt))
|
|
747
|
+
|
|
748
|
+
@staticmethod
|
|
749
|
+
def _version_to_bytes( version : str ) -> bytearray:
|
|
750
|
+
""" Convert string version to byte string of at most size MAX_VERSION_BINARY_LEN + 1 """
|
|
751
|
+
if version is None:
|
|
752
|
+
return None
|
|
753
|
+
version_ = bytearray(version,'utf-8')
|
|
754
|
+
if len(version_) >= SubDir.MAX_VERSION_BINARY_LEN:
|
|
755
|
+
raise ValueError(txtfmt("Cannot use version '%s': when translated into a bytearray it exceeds the maximum version lengths of '%ld' (byte string is '%s')", version, SubDir.MAX_VERSION_BINARY_LEN-1, version_ ))
|
|
756
|
+
ver_ = bytearray(SubDir.MAX_VERSION_BINARY_LEN)
|
|
757
|
+
l = len(version_)
|
|
758
|
+
ver_[0] = l
|
|
759
|
+
ver_[1:1+l] = version_
|
|
760
|
+
assert len(ver_) == SubDir.MAX_VERSION_BINARY_LEN, ("Internal error", len(ver_), ver_)
|
|
761
|
+
return ver_
|
|
762
|
+
|
|
763
|
+
@staticmethod
|
|
764
|
+
def _extract_ext( ext : str ) -> str:
|
|
765
|
+
"""
|
|
766
|
+
Checks that 'ext' is an extension, and returns .ext.
|
|
767
|
+
-- Accepts '.ext' and 'ext'
|
|
768
|
+
-- Detects use of directories
|
|
769
|
+
-- Returns '*' if ext='*'
|
|
770
|
+
"""
|
|
771
|
+
assert not ext is None, ("'ext' should not be None here")
|
|
772
|
+
verify( isinstance(ext,str), "Extension 'ext' must be a string. Found type %s", type(ext).__name__, exception=ValueError )
|
|
773
|
+
# auto?
|
|
774
|
+
if ext == SubDir.EXT_FMT_AUTO:
|
|
775
|
+
return SubDir.EXT_FMT_AUTO
|
|
776
|
+
# remove leading '.'s
|
|
777
|
+
while ext[:1] == ".":
|
|
778
|
+
ext = ext[1:]
|
|
779
|
+
# empty extension -> match all files
|
|
780
|
+
if ext == "":
|
|
781
|
+
return ""
|
|
782
|
+
# ensure extension has no directiory information
|
|
783
|
+
sub, _ = os.path.split(ext)
|
|
784
|
+
verify( len(sub) == 0, "Extension '%s' contains directory information", ext)
|
|
785
|
+
|
|
786
|
+
# remove internal characters
|
|
787
|
+
verify( ext[0] != "!", "Extension '%s' cannot start with '!' (this symbol indicates the temp directory)", ext, exception=ValueError )
|
|
788
|
+
verify( ext[0] != "~", "Extension '%s' cannot start with '~' (this symbol indicates the user's directory)", ext, exception=ValueError )
|
|
789
|
+
return "." + ext
|
|
790
|
+
|
|
791
|
+
# -- public utilities --
|
|
792
|
+
|
|
793
|
+
def fullFileName(self, key : str, *, ext : str = None) -> str:
|
|
794
|
+
"""
|
|
795
|
+
Returns fully qualified file name.
|
|
796
|
+
The function tests that 'key' does not contain directory information.
|
|
797
|
+
|
|
798
|
+
If 'self' is None, then this function returns None
|
|
799
|
+
If key is None then this function returns None
|
|
800
|
+
|
|
801
|
+
Parameters
|
|
802
|
+
----------
|
|
803
|
+
key : str
|
|
804
|
+
Core file name, e.g. the 'key' in a data base sense
|
|
805
|
+
ext : str
|
|
806
|
+
If not None, use this extension rather than self.ext
|
|
807
|
+
|
|
808
|
+
Returns
|
|
809
|
+
-------
|
|
810
|
+
Fully qualified system file name
|
|
811
|
+
|
|
812
|
+
[This function has an alias 'fullKeyName' for backward compatibility]
|
|
813
|
+
"""
|
|
814
|
+
if self._path is None or key is None:
|
|
815
|
+
return None
|
|
816
|
+
key = str(key)
|
|
817
|
+
verify( len(key) > 0, "'key' cannot be empty")
|
|
818
|
+
|
|
819
|
+
sub, _ = os.path.split(key)
|
|
820
|
+
verify( len(sub) == 0, "Key '%s' contains directory information", key)
|
|
821
|
+
|
|
822
|
+
verify( key[0] != "!", "Key '%s' cannot start with '!' (this symbol indicates the temp directory)", key, exception=ValueError )
|
|
823
|
+
verify( key[0] != "~", "Key '%s' cannot start with '~' (this symbol indicates the user's directory)", key, exception=ValueError )
|
|
824
|
+
|
|
825
|
+
ext = self.autoExt( ext )
|
|
826
|
+
if len(ext) > 0 and key[-len(ext):] != ext:
|
|
827
|
+
return self._path + key + ext
|
|
828
|
+
return self._path + key
|
|
829
|
+
fullKeyName = fullFileName # backwards compatibility
|
|
830
|
+
|
|
831
|
+
@staticmethod
|
|
832
|
+
def tempDir() -> str:
|
|
833
|
+
"""
|
|
834
|
+
Return system temp directory. Short cut to tempfile.gettempdir()
|
|
835
|
+
Result contains trailing '/'
|
|
836
|
+
"""
|
|
837
|
+
d = tempfile.gettempdir()
|
|
838
|
+
assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-1", d)
|
|
839
|
+
return d + "/"
|
|
840
|
+
|
|
841
|
+
@staticmethod
|
|
842
|
+
def workingDir() -> str:
|
|
843
|
+
"""
|
|
844
|
+
Return current working directory. Short cut for os.getcwd()
|
|
845
|
+
Result contains trailing '/'
|
|
846
|
+
"""
|
|
847
|
+
d = os.getcwd()
|
|
848
|
+
assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-2", d)
|
|
849
|
+
return d + "/"
|
|
850
|
+
|
|
851
|
+
@staticmethod
|
|
852
|
+
def userDir() -> str:
|
|
853
|
+
"""
|
|
854
|
+
Return current working directory. Short cut for os.path.expanduser('~')
|
|
855
|
+
Result contains trailing '/'
|
|
856
|
+
"""
|
|
857
|
+
d = os.path.expanduser('~')
|
|
858
|
+
assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-3", d)
|
|
859
|
+
return d + "/"
|
|
860
|
+
|
|
861
|
+
# -- read --
|
|
862
|
+
|
|
863
|
+
def _read_reader( self, reader, key : str, default, raiseOnError : bool, *, ext : str = None ):
|
|
864
|
+
"""
|
|
865
|
+
Utility function for read() and readLine()
|
|
866
|
+
|
|
867
|
+
Parameters
|
|
868
|
+
----------
|
|
869
|
+
reader( key, fullFileName, default )
|
|
870
|
+
A function which is called to read the file once the correct directory is identified
|
|
871
|
+
key : key (for error messages, might include '/')
|
|
872
|
+
fullFileName : full file name
|
|
873
|
+
default value
|
|
874
|
+
key : str or list
|
|
875
|
+
str: fully qualified key
|
|
876
|
+
list: list of fully qualified names
|
|
877
|
+
default :
|
|
878
|
+
default value. None is a valid default value
|
|
879
|
+
list : list of defaults for a list of keys
|
|
880
|
+
raiseOnError : bool
|
|
881
|
+
If True, and the file does not exist, throw exception
|
|
882
|
+
ext :
|
|
883
|
+
Extension or None for current extension.
|
|
884
|
+
list : list of extensions for a list of keys
|
|
885
|
+
"""
|
|
886
|
+
# vector version
|
|
887
|
+
if not isinstance(key,str):
|
|
888
|
+
if not isinstance(key, Collection): raise ValueError(txtfmt( "'key' must be a string, or an interable object. Found type %s", type(key)))
|
|
889
|
+
l = len(key)
|
|
890
|
+
if default is None or isinstance(default,str) or not isinstance(default, Collection):
|
|
891
|
+
default = [ default ] * l
|
|
892
|
+
else:
|
|
893
|
+
if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(default), l ))
|
|
894
|
+
if ext is None or isinstance(ext, str) or not isinstance(ext, Collection):
|
|
895
|
+
ext = [ ext ] * l
|
|
896
|
+
else:
|
|
897
|
+
if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l ))
|
|
898
|
+
return [ self._read_reader(reader=reader,key=k,default=d,raiseOnError=raiseOnError,ext=e) for k, d, e in zip(key,default,ext) ]
|
|
899
|
+
|
|
900
|
+
# deleted directory?
|
|
901
|
+
if self._path is None:
|
|
902
|
+
verify( not raiseOnError, "Trying to read '%s' from an empty directory object", key, exception=NotADirectoryError)
|
|
903
|
+
return default
|
|
904
|
+
|
|
905
|
+
# single key
|
|
906
|
+
if len(key) == 0: raise ValueError(txtfmt("'key' missing (the filename)" ))
|
|
907
|
+
sub, key_ = os.path.split(key)
|
|
908
|
+
if len(sub) > 0:
|
|
909
|
+
return self(sub)._read_reader(reader=reader,key=key_,default=default,raiseOnError=raiseOnError,ext=ext)
|
|
910
|
+
if len(key_) == 0: ValueError(txtfmt("'key' %s indicates a directory, not a file", key))
|
|
911
|
+
|
|
912
|
+
# don't try if directory doesn't exist
|
|
913
|
+
fullFileName = self.fullFileName(key,ext=ext)
|
|
914
|
+
if not self.pathExists():
|
|
915
|
+
if raiseOnError:
|
|
916
|
+
raise KeyError(key, fullFileName)
|
|
917
|
+
return default
|
|
918
|
+
|
|
919
|
+
# does file exit?
|
|
920
|
+
if not os.path.exists(fullFileName):
|
|
921
|
+
if raiseOnError:
|
|
922
|
+
raise KeyError(key,fullFileName)
|
|
923
|
+
return default
|
|
924
|
+
if not os.path.isfile(fullFileName):
|
|
925
|
+
raise IOError(txtfmt( "Cannot read %s: object exists, but is not a file (full path %s)", key, fullFileName ))
|
|
926
|
+
|
|
927
|
+
# read content
|
|
928
|
+
# delete existing files upon read error
|
|
929
|
+
try:
|
|
930
|
+
return reader( key, fullFileName, default )
|
|
931
|
+
except EOFError as e:
|
|
932
|
+
try:
|
|
933
|
+
os.remove(fullFileName)
|
|
934
|
+
warn("Cannot read %s; file deleted (full path %s).\nError: %s",key,fullFileName, str(e))
|
|
935
|
+
except Exception as e:
|
|
936
|
+
warn("Cannot read %s; attempt to delete file failed (full path %s): %s",key,fullFileName,str(e))
|
|
937
|
+
except FileNotFoundError as e:
|
|
938
|
+
if raiseOnError:
|
|
939
|
+
raise KeyError(key, fullFileName, str(e)) from e
|
|
940
|
+
except Exception as e:
|
|
941
|
+
if raiseOnError:
|
|
942
|
+
raise KeyError(key, fullFileName, str(e)) from e
|
|
943
|
+
except (ImportError, BaseException) as e:
|
|
944
|
+
e.add_note( key )
|
|
945
|
+
e.add_note( fullFileName )
|
|
946
|
+
raise e
|
|
947
|
+
return default
|
|
948
|
+
|
|
949
|
+
def _read( self, key : str,
|
|
950
|
+
default = None,
|
|
951
|
+
raiseOnError : bool = False,
|
|
952
|
+
*,
|
|
953
|
+
version : str = None,
|
|
954
|
+
ext : str = None,
|
|
955
|
+
fmt : Format = None,
|
|
956
|
+
delete_wrong_version : bool = True,
|
|
957
|
+
handle_version : int = 0
|
|
958
|
+
):
|
|
959
|
+
""" See read() """
|
|
960
|
+
ext, fmt = self.autoExtFmt(ext=ext, fmt=fmt)
|
|
961
|
+
version = str(version) if not version is None else None
|
|
962
|
+
version = version if handle_version != SubDir.VER_RETURN else ""
|
|
963
|
+
assert not fmt == self.EXT_FMT_AUTO, ("'fmt' is '*' ...?")
|
|
964
|
+
|
|
965
|
+
if version is None and fmt in [Format.BLOSC, Format.GZIP]:
|
|
966
|
+
version = ""
|
|
967
|
+
|
|
968
|
+
def reader( key, fullFileName, default ):
|
|
969
|
+
test_version = "(unknown)"
|
|
970
|
+
if fmt == Format.PICKLE or fmt == Format.BLOSC:
|
|
971
|
+
with open(fullFileName,"rb") as f:
|
|
972
|
+
# handle version as byte string
|
|
973
|
+
ok = True
|
|
974
|
+
if not version is None:
|
|
975
|
+
test_len = int( f.read( 1 )[0] )
|
|
976
|
+
test_version = f.read(test_len)
|
|
977
|
+
test_version = test_version.decode("utf-8")
|
|
978
|
+
if handle_version == SubDir.VER_RETURN:
|
|
979
|
+
return test_version
|
|
980
|
+
ok = (version == "*" or test_version == version)
|
|
981
|
+
if ok:
|
|
982
|
+
if handle_version == SubDir.VER_CHECK:
|
|
983
|
+
return True
|
|
984
|
+
if fmt == Format.PICKLE:
|
|
985
|
+
data = pickle.load(f)
|
|
986
|
+
elif fmt == Format.BLOSC:
|
|
987
|
+
if blosc is None:
|
|
988
|
+
raise ModuleNotFoundError("blosc", "'blosc' not found.")
|
|
989
|
+
nnbb = f.read(2)
|
|
990
|
+
num_blocks = int.from_bytes( nnbb, 'big', signed=False )
|
|
991
|
+
data = bytearray()
|
|
992
|
+
for i in range(num_blocks):
|
|
993
|
+
blockl = int.from_bytes( f.read(6), 'big', signed=False )
|
|
994
|
+
if blockl>0:
|
|
995
|
+
bdata = blosc.decompress( f.read(blockl) )
|
|
996
|
+
data += bdata
|
|
997
|
+
del bdata
|
|
998
|
+
data = pickle.loads(data)
|
|
999
|
+
else:
|
|
1000
|
+
raise NotImplementedError(fmt, txtfmt("Unkown format '%s'", fmt))
|
|
1001
|
+
return data
|
|
1002
|
+
|
|
1003
|
+
elif fmt == Format.GZIP:
|
|
1004
|
+
if gzip is None:
|
|
1005
|
+
raise ModuleNotFoundError("gzip", "'gzip' not found'")
|
|
1006
|
+
with gzip.open(fullFileName,"rb") as f:
|
|
1007
|
+
# handle version as byte string
|
|
1008
|
+
ok = True
|
|
1009
|
+
test_len = int( f.read( 1 )[0] )
|
|
1010
|
+
test_version = f.read(test_len)
|
|
1011
|
+
test_version = test_version.decode("utf-8")
|
|
1012
|
+
if handle_version == SubDir.VER_RETURN:
|
|
1013
|
+
return test_version
|
|
1014
|
+
ok = (version == "*" or test_version == version)
|
|
1015
|
+
if ok:
|
|
1016
|
+
if handle_version == SubDir.VER_CHECK:
|
|
1017
|
+
return True
|
|
1018
|
+
data = pickle.load(f)
|
|
1019
|
+
return data
|
|
1020
|
+
|
|
1021
|
+
elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
|
|
1022
|
+
with open(fullFileName,"rt",encoding="utf-8") as f:
|
|
1023
|
+
# handle versioning
|
|
1024
|
+
ok = True
|
|
1025
|
+
if not version is None:
|
|
1026
|
+
test_version = f.readline()
|
|
1027
|
+
if test_version[:2] != "# ":
|
|
1028
|
+
raise EnvironmentError("Error reading '%s': file does not appear to contain a version (it should start with '# ')" % fullFileName)
|
|
1029
|
+
test_version = test_version[2:]
|
|
1030
|
+
if test_version[-1:] == "\n":
|
|
1031
|
+
test_version = test_version[:-1]
|
|
1032
|
+
if handle_version == SubDir.VER_RETURN:
|
|
1033
|
+
return test_version
|
|
1034
|
+
ok = (version == "*" or test_version == version)
|
|
1035
|
+
if ok:
|
|
1036
|
+
if handle_version == SubDir.VER_CHECK:
|
|
1037
|
+
return ok
|
|
1038
|
+
# read
|
|
1039
|
+
if fmt == Format.JSON_PICKLE:
|
|
1040
|
+
if jsonpickle is None:
|
|
1041
|
+
raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found'")
|
|
1042
|
+
return jsonpickle.decode( f.read() )
|
|
1043
|
+
else:
|
|
1044
|
+
assert fmt == Format.JSON_PLAIN, ("Internal error: unknown Format", fmt)
|
|
1045
|
+
return json.loads( f.read() )
|
|
1046
|
+
else:
|
|
1047
|
+
raise NotImplementedError(fmt, txtfmt("Unknown format '%s'", fmt ))
|
|
1048
|
+
|
|
1049
|
+
# arrive here if version is wrong
|
|
1050
|
+
# delete a wrong version
|
|
1051
|
+
deleted = ""
|
|
1052
|
+
if delete_wrong_version:
|
|
1053
|
+
try:
|
|
1054
|
+
os.remove(fullFileName)
|
|
1055
|
+
e = None
|
|
1056
|
+
except Exception as e_:
|
|
1057
|
+
e = str(e_)
|
|
1058
|
+
if handle_version == SubDir.VER_CHECK:
|
|
1059
|
+
return False
|
|
1060
|
+
if not raiseOnError:
|
|
1061
|
+
return default
|
|
1062
|
+
deleted = " (file was deleted)" if e is None else " (attempt to delete file failed: %s)" % e
|
|
1063
|
+
raise EnvironmentError("Error reading '%s': found version '%s' not '%s'%s" % (fullFileName,str(test_version),str(version),deleted))
|
|
1064
|
+
|
|
1065
|
+
return self._read_reader( reader=reader, key=key, default=default, raiseOnError=raiseOnError, ext=ext )
|
|
1066
|
+
|
|
1067
|
+
def read( self, key : str,
|
|
1068
|
+
default = None,
|
|
1069
|
+
raiseOnError : bool = False,
|
|
1070
|
+
*,
|
|
1071
|
+
version : str = None,
|
|
1072
|
+
delete_wrong_version : bool = True,
|
|
1073
|
+
ext : str = None,
|
|
1074
|
+
fmt : Format = None
|
|
1075
|
+
):
|
|
1076
|
+
"""
|
|
1077
|
+
Read pickled data from 'key' if the file exists, or return 'default'
|
|
1078
|
+
-- Supports 'key' containing directories
|
|
1079
|
+
-- Supports 'key' (and default, ext) being iterable.
|
|
1080
|
+
In this case any any iterable 'default' except strings are considered accordingly.
|
|
1081
|
+
In order to have a unit default which is an iterable, you will have to wrap it in another iterable, e.g.
|
|
1082
|
+
E.g.:
|
|
1083
|
+
keys = ['file1', 'file2']
|
|
1084
|
+
|
|
1085
|
+
sd.read( keys )
|
|
1086
|
+
--> works, both are using default None
|
|
1087
|
+
|
|
1088
|
+
sd.read( keys, 1 )
|
|
1089
|
+
--> works, both are using default '1'
|
|
1090
|
+
|
|
1091
|
+
sd.read( keys, [1,2] )
|
|
1092
|
+
--> works, defaults 1 and 2, respectively
|
|
1093
|
+
|
|
1094
|
+
sd.read( keys, [1] )
|
|
1095
|
+
--> produces error as len(keys) != len(default)
|
|
1096
|
+
|
|
1097
|
+
Strings are iterable but are treated as single value.
|
|
1098
|
+
Therefore
|
|
1099
|
+
sd.read( keys, '12' )
|
|
1100
|
+
means the default value '12' is used for both files.
|
|
1101
|
+
Use
|
|
1102
|
+
sd.read( keys, ['1','2'] )
|
|
1103
|
+
in case the intention was using '1' and '2', respectively.
|
|
1104
|
+
|
|
1105
|
+
Returns the read object, or a list of objects if 'key' was iterable.
|
|
1106
|
+
If the current directory is 'None', then behaviour is as if the file did not exist.
|
|
1107
|
+
|
|
1108
|
+
Parameters
|
|
1109
|
+
----------
|
|
1110
|
+
key : str
|
|
1111
|
+
A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
|
|
1112
|
+
default :
|
|
1113
|
+
Default value, or default values if key is a list
|
|
1114
|
+
raiseOnError : bool
|
|
1115
|
+
Whether to raise an exception if reading an existing file failed.
|
|
1116
|
+
By default this function fails silently and returns the default.
|
|
1117
|
+
version : str
|
|
1118
|
+
If not None, specifies the version of the current code base.
|
|
1119
|
+
In this case, this version will be compared to the version of the file being read.
|
|
1120
|
+
If they do not match, read fails (either by returning default or throwing an exception).
|
|
1121
|
+
You can specify version "*" to read any version. This is distrinct from reading a file without version.
|
|
1122
|
+
delete_wrong_version : bool
|
|
1123
|
+
If True, and if a wrong version was found, delete the file.
|
|
1124
|
+
ext : str
|
|
1125
|
+
Extension overwrite, or a list thereof if key is a list
|
|
1126
|
+
Set to:
|
|
1127
|
+
-- None to use directory's default
|
|
1128
|
+
-- '*' to use the extension implied by 'fmt'
|
|
1129
|
+
-- for convenience 'ext' can also be a Format (in this case leave fmt to None)
|
|
1130
|
+
fmt : Format
|
|
1131
|
+
File format or None to use the directory's default.
|
|
1132
|
+
Note that 'fmt' cannot be a list even if 'key' is.
|
|
1133
|
+
Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
|
|
1134
|
+
|
|
1135
|
+
Returns
|
|
1136
|
+
-------
|
|
1137
|
+
For a single 'key': Content of the file if successfully read, or 'default' otherwise.
|
|
1138
|
+
If 'key' is a list: list of contents.
|
|
1139
|
+
"""
|
|
1140
|
+
return self._read( key=key,
|
|
1141
|
+
default=default,
|
|
1142
|
+
raiseOnError=raiseOnError,
|
|
1143
|
+
version=version,
|
|
1144
|
+
ext=ext,
|
|
1145
|
+
fmt=fmt,
|
|
1146
|
+
delete_wrong_version=delete_wrong_version,
|
|
1147
|
+
handle_version=SubDir.VER_NORMAL )
|
|
1148
|
+
|
|
1149
|
+
get = read # backwards compatibility
|
|
1150
|
+
|
|
1151
|
+
def is_version( self, key : str, version : str = None, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
|
|
1152
|
+
"""
|
|
1153
|
+
Compares the version of 'key' with 'version'.
|
|
1154
|
+
|
|
1155
|
+
Parameters
|
|
1156
|
+
----------
|
|
1157
|
+
key : str
|
|
1158
|
+
A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
|
|
1159
|
+
version : str
|
|
1160
|
+
Specifies the version of the current code base to compare with.
|
|
1161
|
+
You can use '*' to match any version
|
|
1162
|
+
|
|
1163
|
+
raiseOnError : bool
|
|
1164
|
+
Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
|
|
1165
|
+
By default this function fails silently and returns the default.
|
|
1166
|
+
delete_wrong_version : bool
|
|
1167
|
+
If True, and if a wrong version was found, delete the file.
|
|
1168
|
+
ext : str
|
|
1169
|
+
Extension overwrite, or a list thereof if key is a list.
|
|
1170
|
+
Set to:
|
|
1171
|
+
-- None to use directory's default
|
|
1172
|
+
-- '*' to use the extension implied by 'fmt'
|
|
1173
|
+
-- for convenience 'ext' can also be a Format (in this case leave fmt to None)
|
|
1174
|
+
fmt : Format
|
|
1175
|
+
File format or None to use the directory's default.
|
|
1176
|
+
Note that 'fmt' cannot be a list even if 'key' is.
|
|
1177
|
+
Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
|
|
1178
|
+
|
|
1179
|
+
Returns
|
|
1180
|
+
-------
|
|
1181
|
+
Returns True only if the file exists and has the correct version.
|
|
1182
|
+
"""
|
|
1183
|
+
return self._read( key=key,default=False,raiseOnError=raiseOnError,version=version,ext=ext,fmt=fmt,delete_wrong_version=delete_wrong_version,handle_version=SubDir.VER_CHECK )
|
|
1184
|
+
|
|
1185
|
+
def get_version( self, key : str, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None ):
|
|
1186
|
+
"""
|
|
1187
|
+
Returns the version ID stored in 'key'.
|
|
1188
|
+
This requires that the file has previously been saved with a version.
|
|
1189
|
+
Otherwise this function will return unpredictable results.
|
|
1190
|
+
|
|
1191
|
+
Parameters
|
|
1192
|
+
----------
|
|
1193
|
+
key : str
|
|
1194
|
+
A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
|
|
1195
|
+
raiseOnError : bool
|
|
1196
|
+
Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
|
|
1197
|
+
By default this function fails silently and returns the default.
|
|
1198
|
+
ext : str
|
|
1199
|
+
Extension overwrite, or a list thereof if key is a list.
|
|
1200
|
+
Set to:
|
|
1201
|
+
-- None to use directory's default
|
|
1202
|
+
-- '*' to use the extension implied by 'fmt'
|
|
1203
|
+
-- for convenience 'ext' can also be a Format (in this case leave fmt to None)
|
|
1204
|
+
fmt : Format
|
|
1205
|
+
File format or None to use the directory's default.
|
|
1206
|
+
Note that 'fmt' cannot be a list even if 'key' is.
|
|
1207
|
+
Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
|
|
1208
|
+
|
|
1209
|
+
Returns
|
|
1210
|
+
-------
|
|
1211
|
+
Version ID.
|
|
1212
|
+
"""
|
|
1213
|
+
return self._read( key=key,default=None,raiseOnError=raiseOnError,version="",ext=ext,fmt=fmt,delete_wrong_version=False,handle_version=SubDir.VER_RETURN )
|
|
1214
|
+
|
|
1215
|
+
def readString( self, key : str, default = None, raiseOnError : bool = False, *, ext : str = None ) -> str:
|
|
1216
|
+
"""
|
|
1217
|
+
Reads text from 'key' or returns 'default'. Removes trailing EOLs
|
|
1218
|
+
-- Supports 'key' containing directories#
|
|
1219
|
+
-- Supports 'key' being iterable. In this case any 'default' can be a list, too.
|
|
1220
|
+
|
|
1221
|
+
Returns the read string, or a list of strings if 'key' was iterable.
|
|
1222
|
+
If the current directory is 'None', then behaviour is as if the file did not exist.
|
|
1223
|
+
|
|
1224
|
+
Use 'ext' to specify the extension.
|
|
1225
|
+
You cannot use 'ext' to specify a format as the format is plain text.
|
|
1226
|
+
If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
|
|
1227
|
+
"""
|
|
1228
|
+
verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext)
|
|
1229
|
+
ext = ext if not ext is None else self._ext
|
|
1230
|
+
ext = ext if ext != self.EXT_FMT_AUTO else ".txt"
|
|
1231
|
+
|
|
1232
|
+
def reader( key, fullFileName, default ):
|
|
1233
|
+
with open(fullFileName,"rt",encoding="utf-8") as f:
|
|
1234
|
+
line = f.readline()
|
|
1235
|
+
if len(line) > 0 and line[-1] == '\n':
|
|
1236
|
+
line = line[:-1]
|
|
1237
|
+
return line
|
|
1238
|
+
return self._read_reader( reader=reader, key=key, default=default, raiseOnError=raiseOnError, ext=ext )
|
|
1239
|
+
|
|
1240
|
+
# -- write --
|
|
1241
|
+
|
|
1242
|
+
def _write( self, writer, key : str, obj, raiseOnError : bool, *, ext : str = None ) -> bool:
|
|
1243
|
+
""" Utility function for write() and writeLine() """
|
|
1244
|
+
if self._path is None:
|
|
1245
|
+
raise EOFError("Cannot write to '%s': current directory is not specified" % key)
|
|
1246
|
+
self.createDirectory()
|
|
1247
|
+
|
|
1248
|
+
# vector version
|
|
1249
|
+
if not isinstance(key,str):
|
|
1250
|
+
if not isinstance(key, Collection): error( "'key' must be a string or an interable object. Found type %s", type(key))
|
|
1251
|
+
l = len(key)
|
|
1252
|
+
if obj is None or isinstance(obj,str) or not isinstance(obj, Collection):
|
|
1253
|
+
obj = [ obj ] * l
|
|
1254
|
+
else:
|
|
1255
|
+
if len(obj) != l: error("'obj' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(obj), l )
|
|
1256
|
+
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1257
|
+
ext = [ ext ] * l
|
|
1258
|
+
else:
|
|
1259
|
+
if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
1260
|
+
ok = True
|
|
1261
|
+
for k,o,e in zip(key,obj,ext):
|
|
1262
|
+
ok |= self._write( writer, k, o, raiseOnError=raiseOnError, ext=e )
|
|
1263
|
+
return ok
|
|
1264
|
+
|
|
1265
|
+
# single key
|
|
1266
|
+
if not len(key) > 0: error("'key is empty (the filename)" )
|
|
1267
|
+
sub, key = os.path.split(key)
|
|
1268
|
+
if len(key) == 0: error("'key '%s' refers to a directory, not a file", key)
|
|
1269
|
+
if len(sub) > 0:
|
|
1270
|
+
return SubDir(sub,parent=self)._write(writer,key,obj, raiseOnError=raiseOnError,ext=ext )
|
|
1271
|
+
|
|
1272
|
+
# write to temp file, then rename into target file
|
|
1273
|
+
# this reduces collision when i/o operations are slow
|
|
1274
|
+
fullFileName = self.fullKeyName(key,ext=ext)
|
|
1275
|
+
tmp_file = uniqueHash48( [ key, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
|
|
1276
|
+
tmp_i = 0
|
|
1277
|
+
fullTmpFile = self.fullKeyName(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
|
|
1278
|
+
while os.path.exists(fullTmpFile):
|
|
1279
|
+
fullTmpFile = self.fullKeyName(tmp_file) + "." + str(tmp_i) + ".tmp"
|
|
1280
|
+
tmp_i += 1
|
|
1281
|
+
if tmp_i >= 10:
|
|
1282
|
+
raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % ( fullFileName, fullTmpFile ) )
|
|
1283
|
+
|
|
1284
|
+
# write
|
|
1285
|
+
if not writer( key, fullTmpFile, obj ):
|
|
1286
|
+
return False
|
|
1287
|
+
assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile, fullFileName)
|
|
1288
|
+
try:
|
|
1289
|
+
if os.path.exists(fullFileName):
|
|
1290
|
+
os.remove(fullFileName)
|
|
1291
|
+
os.rename(fullTmpFile, fullFileName)
|
|
1292
|
+
except Exception as e:
|
|
1293
|
+
os.remove(fullTmpFile)
|
|
1294
|
+
if raiseOnError:
|
|
1295
|
+
raise e
|
|
1296
|
+
return False
|
|
1297
|
+
return True
|
|
1298
|
+
|
|
1299
|
+
def write( self, key : str,
|
|
1300
|
+
obj,
|
|
1301
|
+
raiseOnError : bool = True,
|
|
1302
|
+
*,
|
|
1303
|
+
version : str = None,
|
|
1304
|
+
ext : str = None,
|
|
1305
|
+
fmt : Format = None ) -> bool:
|
|
1306
|
+
"""
|
|
1307
|
+
Pickles 'obj' into key.
|
|
1308
|
+
-- Supports 'key' containing directories
|
|
1309
|
+
-- Supports 'key' being a list.
|
|
1310
|
+
In this case, if obj is an iterable it is considered the list of values for the elements of 'keys'
|
|
1311
|
+
If 'obj' is not iterable, it will be written into all 'key's
|
|
1312
|
+
|
|
1313
|
+
keys = ['file1', 'file2']
|
|
1314
|
+
|
|
1315
|
+
sd.write( keys, 1 )
|
|
1316
|
+
--> works, writes '1' in both files.
|
|
1317
|
+
|
|
1318
|
+
sd.read( keys, [1,2] )
|
|
1319
|
+
--> works, writes 1 and 2, respectively
|
|
1320
|
+
|
|
1321
|
+
sd.read( keys, "12" )
|
|
1322
|
+
--> works, writes '12' in both files
|
|
1323
|
+
|
|
1324
|
+
sd.write( keys, [1] )
|
|
1325
|
+
--> produces error as len(keys) != len(obj)
|
|
1326
|
+
|
|
1327
|
+
If the current directory is 'None', then the function throws an EOFError exception
|
|
1328
|
+
|
|
1329
|
+
Parameters
|
|
1330
|
+
----------
|
|
1331
|
+
key : str
|
|
1332
|
+
Core filename ("key"), or list thereof
|
|
1333
|
+
obj :
|
|
1334
|
+
Object to write, or list thereof if 'key' is a list
|
|
1335
|
+
raiseOnError : bool
|
|
1336
|
+
If False, this function will return False upon failure
|
|
1337
|
+
version : str
|
|
1338
|
+
If not None, specifies the version of the code which generated 'obj'.
|
|
1339
|
+
This version will be written to the beginning of the file.
|
|
1340
|
+
ext : str
|
|
1341
|
+
Extension, or list thereof if 'key' is a list.
|
|
1342
|
+
Set to:
|
|
1343
|
+
-- None to use directory's default
|
|
1344
|
+
-- '*' to use the extension implied by 'fmt'
|
|
1345
|
+
-- for convenience 'ext' can also be a Format (in this case leave fmt to None)
|
|
1346
|
+
fmt : Format
|
|
1347
|
+
File format or None to use the directory's default.
|
|
1348
|
+
Note that 'fmt' cannot be a list even if 'key' is.
|
|
1349
|
+
Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
|
|
1350
|
+
|
|
1351
|
+
Returns
|
|
1352
|
+
-------
|
|
1353
|
+
Boolean to indicate success if raiseOnError is False.
|
|
1354
|
+
"""
|
|
1355
|
+
ext, fmt = self.autoExtFmt(ext=ext, fmt=fmt)
|
|
1356
|
+
version = str(version) if not version is None else None
|
|
1357
|
+
assert ext != self.EXT_FMT_AUTO, ("'ext' is '*'...?")
|
|
1358
|
+
|
|
1359
|
+
if version=='*': error("You cannot write version '*'. Use None to write a file without version.")
|
|
1360
|
+
if version is None and fmt in [Format.BLOSC, Format.GZIP]:
|
|
1361
|
+
version = ""
|
|
1362
|
+
|
|
1363
|
+
def writer( key, fullFileName, obj ):
|
|
1364
|
+
try:
|
|
1365
|
+
if fmt == Format.PICKLE or fmt == Format.BLOSC:
|
|
1366
|
+
with open(fullFileName,"wb") as f:
|
|
1367
|
+
# handle version as byte string
|
|
1368
|
+
if not version is None:
|
|
1369
|
+
version_ = bytearray(version, "utf-8")
|
|
1370
|
+
if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
|
|
1371
|
+
len8 = bytearray(1)
|
|
1372
|
+
len8[0] = len(version_)
|
|
1373
|
+
f.write(len8)
|
|
1374
|
+
f.write(version_)
|
|
1375
|
+
if fmt == Format.PICKLE:
|
|
1376
|
+
pickle.dump(obj,f,-1)
|
|
1377
|
+
else:
|
|
1378
|
+
assert fmt == fmt.BLOSC, ("Internal error: unknown format", fmt)
|
|
1379
|
+
if blosc is None:
|
|
1380
|
+
raise ModuleNotFoundError("blosc", "'blosc' not found")
|
|
1381
|
+
pdata = pickle.dumps(obj) # returns data as a bytes object
|
|
1382
|
+
del obj
|
|
1383
|
+
len_data = len(pdata)
|
|
1384
|
+
num_blocks = max(0,len_data-1) // BLOSC_MAX_USE + 1
|
|
1385
|
+
f.write(num_blocks.to_bytes(2, 'big', signed=False))
|
|
1386
|
+
for i in range(num_blocks):
|
|
1387
|
+
start = i*BLOSC_MAX_USE
|
|
1388
|
+
end = min(len_data,start+BLOSC_MAX_USE)
|
|
1389
|
+
assert end>start, ("Internal error; nothing to write")
|
|
1390
|
+
block = blosc.compress( pdata[start:end] )
|
|
1391
|
+
blockl = len(block)
|
|
1392
|
+
f.write( blockl.to_bytes(6, 'big', signed=False) )
|
|
1393
|
+
if blockl > 0:
|
|
1394
|
+
f.write( block )
|
|
1395
|
+
del block
|
|
1396
|
+
del pdata
|
|
1397
|
+
|
|
1398
|
+
elif fmt == Format.GZIP:
|
|
1399
|
+
if gzip is None:
|
|
1400
|
+
raise ModuleNotFoundError("gzip", "'gzip' not found")
|
|
1401
|
+
with gzip.open(fullFileName,"wb") as f:
|
|
1402
|
+
# handle version as byte string
|
|
1403
|
+
if not version is None:
|
|
1404
|
+
version_ = bytearray(version, "utf-8")
|
|
1405
|
+
if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
|
|
1406
|
+
len8 = bytearray(1)
|
|
1407
|
+
len8[0] = len(version_)
|
|
1408
|
+
f.write(len8)
|
|
1409
|
+
f.write(version_)
|
|
1410
|
+
pickle.dump(obj,f,-1)
|
|
1411
|
+
|
|
1412
|
+
elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
|
|
1413
|
+
with open(fullFileName,"wt",encoding="utf-8") as f:
|
|
1414
|
+
if not version is None:
|
|
1415
|
+
f.write("# " + version + "\n")
|
|
1416
|
+
if fmt == Format.JSON_PICKLE:
|
|
1417
|
+
if jsonpickle is None:
|
|
1418
|
+
raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found")
|
|
1419
|
+
f.write( jsonpickle.encode(obj) )
|
|
1420
|
+
else:
|
|
1421
|
+
assert fmt == Format.JSON_PLAIN, ("Internal error: invalid Format", fmt)
|
|
1422
|
+
f.write( json.dumps( plain(obj, sorted_dicts=True, native_np=True, dt_to_str=True ), default=str ) )
|
|
1423
|
+
|
|
1424
|
+
else:
|
|
1425
|
+
raise NotImplementedError(fmt, txtfmt("Internal error: invalid format '%s'", fmt))
|
|
1426
|
+
except Exception as e:
|
|
1427
|
+
if raiseOnError:
|
|
1428
|
+
raise e
|
|
1429
|
+
return False
|
|
1430
|
+
return True
|
|
1431
|
+
return self._write( writer=writer, key=key, obj=obj, raiseOnError=raiseOnError, ext=ext )
|
|
1432
|
+
|
|
1433
|
+
set = write
|
|
1434
|
+
|
|
1435
|
+
def writeString( self, key : str, line : str, raiseOnError : bool = True, *, ext : str = None ) -> bool:
|
|
1436
|
+
"""
|
|
1437
|
+
Writes 'line' into key. A trailing EOL will not be read back
|
|
1438
|
+
-- Supports 'key' containing directories
|
|
1439
|
+
-- Supports 'key' being a list.
|
|
1440
|
+
In this case, line can either be the same value for all key's or a list, too.
|
|
1441
|
+
|
|
1442
|
+
If the current directory is 'None', then the function throws an EOFError exception
|
|
1443
|
+
See additional comments for write()
|
|
1444
|
+
|
|
1445
|
+
Use 'ext' to specify the extension.
|
|
1446
|
+
You cannot use 'ext' to specify a format as the format is plain text.
|
|
1447
|
+
If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
|
|
1448
|
+
"""
|
|
1449
|
+
verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext, exception=ValueError )
|
|
1450
|
+
ext = ext if not ext is None else self._ext
|
|
1451
|
+
ext = ext if ext != self.EXT_FMT_AUTO else ".txt"
|
|
1452
|
+
|
|
1453
|
+
if len(line) == 0 or line[-1] != '\n':
|
|
1454
|
+
line += '\n'
|
|
1455
|
+
def writer( key, fullFileName, obj ):
|
|
1456
|
+
try:
|
|
1457
|
+
with open(fullFileName,"wt",encoding="utf-8") as f:
|
|
1458
|
+
f.write(obj)
|
|
1459
|
+
except Exception as e:
|
|
1460
|
+
if raiseOnError:
|
|
1461
|
+
raise e
|
|
1462
|
+
return False
|
|
1463
|
+
return True
|
|
1464
|
+
return self._write( writer=writer, key=key, obj=line, raiseOnError=raiseOnError, ext=ext )
|
|
1465
|
+
|
|
1466
|
+
# -- iterate --
|
|
1467
|
+
|
|
1468
|
+
def files(self, *, ext : str = None) -> list:
|
|
1469
|
+
"""
|
|
1470
|
+
Returns a list of keys in this subdirectory with the current extension, or the specified extension.
|
|
1471
|
+
|
|
1472
|
+
In other words, if the extension is ".pck", and the files are "file1.pck", "file2.pck", "file3.bin"
|
|
1473
|
+
then this function will return [ "file1", "file2" ]
|
|
1474
|
+
|
|
1475
|
+
If 'ext' is
|
|
1476
|
+
-- None, the directory's default extension will be used
|
|
1477
|
+
-- "" then this function will return all files in this directory.
|
|
1478
|
+
-- a Format, then the default extension of the format will be used.
|
|
1479
|
+
|
|
1480
|
+
This function ignores directories. Use subDirs() to retrieve those.
|
|
1481
|
+
|
|
1482
|
+
[This function has an alias 'keys']
|
|
1483
|
+
"""
|
|
1484
|
+
if not self.pathExists():
|
|
1485
|
+
return []
|
|
1486
|
+
ext = self.autoExt( ext=ext )
|
|
1487
|
+
ext_l = len(ext)
|
|
1488
|
+
keys = []
|
|
1489
|
+
with os.scandir(self._path) as it:
|
|
1490
|
+
for entry in it:
|
|
1491
|
+
if not entry.is_file():
|
|
1492
|
+
continue
|
|
1493
|
+
if ext_l > 0:
|
|
1494
|
+
if len(entry.name) <= ext_l or entry.name[-ext_l:] != ext:
|
|
1495
|
+
continue
|
|
1496
|
+
keys.append( entry.name[:-ext_l] )
|
|
1497
|
+
else:
|
|
1498
|
+
keys.append( entry.name )
|
|
1499
|
+
return keys
|
|
1500
|
+
keys = files
|
|
1501
|
+
|
|
1502
|
+
def subDirs(self) -> list:
|
|
1503
|
+
"""
|
|
1504
|
+
Returns a list of all sub directories
|
|
1505
|
+
If self does not refer to an existing directory, then this function returns an empty list.
|
|
1506
|
+
"""
|
|
1507
|
+
# do not do anything if the object was deleted
|
|
1508
|
+
if not self.pathExists():
|
|
1509
|
+
return []
|
|
1510
|
+
subdirs = []
|
|
1511
|
+
with os.scandir(self._path[:-1]) as it:
|
|
1512
|
+
for entry in it:
|
|
1513
|
+
if not entry.is_dir():
|
|
1514
|
+
continue
|
|
1515
|
+
subdirs.append( entry.name )
|
|
1516
|
+
return subdirs
|
|
1517
|
+
|
|
1518
|
+
# -- delete --
|
|
1519
|
+
|
|
1520
|
+
def delete( self, key : str, raiseOnError: bool = False, *, ext : str = None ):
|
|
1521
|
+
"""
|
|
1522
|
+
Deletes 'key'; 'key' might be a list.
|
|
1523
|
+
|
|
1524
|
+
Parameters
|
|
1525
|
+
----------
|
|
1526
|
+
key :
|
|
1527
|
+
filename, or list of filenames
|
|
1528
|
+
raiseOnError :
|
|
1529
|
+
if False, do not throw KeyError if file does not exist.
|
|
1530
|
+
ext :
|
|
1531
|
+
Extension, or list thereof if 'key' is an extension.
|
|
1532
|
+
Use
|
|
1533
|
+
-- None for the directory default
|
|
1534
|
+
-- "" to not use an automatic extension.
|
|
1535
|
+
-- A Format to specify the default extension for that format.
|
|
1536
|
+
"""
|
|
1537
|
+
# do not do anything if the object was deleted
|
|
1538
|
+
if self._path is None:
|
|
1539
|
+
if raiseOnError: raise EOFError("Cannot delete '%s': current directory not specified" % key)
|
|
1540
|
+
return
|
|
1541
|
+
|
|
1542
|
+
# vector version
|
|
1543
|
+
if not isinstance(key,str):
|
|
1544
|
+
if not isinstance(key, Collection): error( "'key' must be a string or an interable object. Found type %s", type(key))
|
|
1545
|
+
l = len(key)
|
|
1546
|
+
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1547
|
+
ext = [ ext ] * l
|
|
1548
|
+
else:
|
|
1549
|
+
if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
1550
|
+
for k, e in zip(key,ext):
|
|
1551
|
+
self.delete(k, raiseOnError=raiseOnError, ext=e)
|
|
1552
|
+
return
|
|
1553
|
+
|
|
1554
|
+
# handle directories in 'key'
|
|
1555
|
+
if len(key) == 0: error( "'key' is empty" )
|
|
1556
|
+
sub, key_ = os.path.split(key)
|
|
1557
|
+
if len(key_) == 0: error("'key' %s indicates a directory, not a file", key)
|
|
1558
|
+
if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,raiseOnError=raiseOnError,ext=ext)
|
|
1559
|
+
# don't try if directory doesn't existy
|
|
1560
|
+
if not self.pathExists():
|
|
1561
|
+
if raiseOnError:
|
|
1562
|
+
raise KeyError(key)
|
|
1563
|
+
return
|
|
1564
|
+
fullFileName = self.fullKeyName(key, ext=ext)
|
|
1565
|
+
if not os.path.exists(fullFileName):
|
|
1566
|
+
if raiseOnError:
|
|
1567
|
+
raise KeyError(key)
|
|
1568
|
+
else:
|
|
1569
|
+
os.remove(fullFileName)
|
|
1570
|
+
|
|
1571
|
+
def deleteAllKeys( self, raiseOnError : bool = False, *, ext : str = None ):
|
|
1572
|
+
"""
|
|
1573
|
+
Deletes all valid keys in this sub directory with the correct extension.
|
|
1574
|
+
|
|
1575
|
+
Parameters
|
|
1576
|
+
----------
|
|
1577
|
+
key :
|
|
1578
|
+
filename, or list of filenames
|
|
1579
|
+
raiseOnError :
|
|
1580
|
+
if False, do not throw KeyError if file does not exist.
|
|
1581
|
+
ext :
|
|
1582
|
+
File extension to match.
|
|
1583
|
+
Use
|
|
1584
|
+
-- None for the directory default
|
|
1585
|
+
-- "" to match all files regardless of extension.
|
|
1586
|
+
-- A Format to specify the default extension for that format.
|
|
1587
|
+
"""
|
|
1588
|
+
if self._path is None:
|
|
1589
|
+
if raiseOnError: raise EOFError("Cannot delete all files: current directory not specified")
|
|
1590
|
+
return
|
|
1591
|
+
if not self.pathExists():
|
|
1592
|
+
return
|
|
1593
|
+
self.delete( self.keys(ext=ext), raiseOnError=raiseOnError, ext=ext )
|
|
1594
|
+
|
|
1595
|
+
def deleteAllContent( self, deleteSelf : bool = False, raiseOnError : bool = False, *, ext : str = None ):
|
|
1596
|
+
"""
|
|
1597
|
+
Deletes all valid keys and subdirectories in this sub directory.
|
|
1598
|
+
Does not delete files with other extensions.
|
|
1599
|
+
Use eraseEverything() if the aim is to delete everything.
|
|
1600
|
+
|
|
1601
|
+
Parameters
|
|
1602
|
+
----------
|
|
1603
|
+
deleteSelf:
|
|
1604
|
+
whether to delete the directory or only its contents
|
|
1605
|
+
raiseOnError:
|
|
1606
|
+
False for silent failure
|
|
1607
|
+
ext:
|
|
1608
|
+
Extension for keys, or None for the directory's default.
|
|
1609
|
+
You can also provide a Format for 'ext'.
|
|
1610
|
+
Use "" to match all files regardless of extension.
|
|
1611
|
+
"""
|
|
1612
|
+
# do not do anything if the object was deleted
|
|
1613
|
+
if self._path is None:
|
|
1614
|
+
if raiseOnError: raise EOFError("Cannot delete all contents: current directory not specified")
|
|
1615
|
+
return
|
|
1616
|
+
if not self.pathExists():
|
|
1617
|
+
return
|
|
1618
|
+
# delete sub directories
|
|
1619
|
+
subdirs = self.subDirs();
|
|
1620
|
+
for subdir in subdirs:
|
|
1621
|
+
SubDir(subdir, parent=self).deleteAllContent( deleteSelf=True, raiseOnError=raiseOnError, ext=ext )
|
|
1622
|
+
# delete keys
|
|
1623
|
+
self.deleteAllKeys( raiseOnError=raiseOnError,ext=ext )
|
|
1624
|
+
# delete myself
|
|
1625
|
+
if not deleteSelf:
|
|
1626
|
+
return
|
|
1627
|
+
rest = list( os.scandir(self._path[:-1]) )
|
|
1628
|
+
txt = str(rest)
|
|
1629
|
+
txt = txt if len(txt) < 50 else (txt[:47] + '...')
|
|
1630
|
+
if len(rest) > 0:
|
|
1631
|
+
if raiseOnError: error( "Cannot delete my own directory %s: directory not empty: found %ld object(s): %s", self._path,len(rest), txt)
|
|
1632
|
+
return
|
|
1633
|
+
os.rmdir(self._path[:-1]) ## does not work ????
|
|
1634
|
+
self._path = None
|
|
1635
|
+
|
|
1636
|
+
def eraseEverything( self, keepDirectory : bool = True ):
|
|
1637
|
+
"""
|
|
1638
|
+
Deletes the entire sub directory will all contents
|
|
1639
|
+
WARNING: deletes ALL files, not just those with the present extension.
|
|
1640
|
+
Will keep the subdir itself by default.
|
|
1641
|
+
If not, it will invalidate 'self._path'
|
|
1642
|
+
|
|
1643
|
+
If self is None, do nothing. That means you can call this function several times.
|
|
1644
|
+
"""
|
|
1645
|
+
if self._path is None:
|
|
1646
|
+
return
|
|
1647
|
+
if not self.pathExists():
|
|
1648
|
+
return
|
|
1649
|
+
shutil.rmtree(self._path[:-1], ignore_errors=True)
|
|
1650
|
+
if not keepDirectory and os.path.exists(self._path[:-1]):
|
|
1651
|
+
os.rmdir(self._path[:-1])
|
|
1652
|
+
self._path = None
|
|
1653
|
+
elif keepDirectory and not os.path.exists(self._path[:-1]):
|
|
1654
|
+
os.makedirs(self._path[:-1])
|
|
1655
|
+
|
|
1656
|
+
# -- file ops --
|
|
1657
|
+
|
|
1658
|
+
def exists(self, key : str, *, ext : str = None ) -> bool:
|
|
1659
|
+
"""
|
|
1660
|
+
Checks whether 'key' exists. Works with iterables
|
|
1661
|
+
|
|
1662
|
+
Parameters
|
|
1663
|
+
----------
|
|
1664
|
+
key :
|
|
1665
|
+
filename, or list of filenames
|
|
1666
|
+
ext :
|
|
1667
|
+
Extension, or list thereof if 'key' is an extension.
|
|
1668
|
+
Use
|
|
1669
|
+
-- None for the directory default
|
|
1670
|
+
-- "" for no automatic extension
|
|
1671
|
+
-- A Format to specify the default extension for that format.
|
|
1672
|
+
|
|
1673
|
+
Returns
|
|
1674
|
+
-------
|
|
1675
|
+
If 'key' is a string, returns True or False, else it will return a list of bools.
|
|
1676
|
+
"""
|
|
1677
|
+
# vector version
|
|
1678
|
+
if not isinstance(key,str):
|
|
1679
|
+
verify( isinstance(key, Collection), "'key' must be a string or an interable object. Found type %s", type(key))
|
|
1680
|
+
l = len(key)
|
|
1681
|
+
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1682
|
+
ext = [ ext ] * l
|
|
1683
|
+
else:
|
|
1684
|
+
if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
1685
|
+
return [ self.exists(k,ext=e) for k,e in zip(key,ext) ]
|
|
1686
|
+
# empty directory
|
|
1687
|
+
if self._path is None:
|
|
1688
|
+
return False
|
|
1689
|
+
# handle directories in 'key'
|
|
1690
|
+
if len(key) == 0: raise ValueError("'key' missing (the filename)")
|
|
1691
|
+
sub, key_ = os.path.split(key)
|
|
1692
|
+
if len(key_) == 0: raise IsADirectoryError( key, txtfmt("'key' %s indicates a directory, not a file", key) )
|
|
1693
|
+
if len(sub) > 0:
|
|
1694
|
+
return self(sub).exists(key=key_,ext=ext)
|
|
1695
|
+
# if directory doesn't exit
|
|
1696
|
+
if not self.pathExists():
|
|
1697
|
+
return False
|
|
1698
|
+
# single key
|
|
1699
|
+
fullFileName = self.fullKeyName(key, ext=ext)
|
|
1700
|
+
if not os.path.exists(fullFileName):
|
|
1701
|
+
return False
|
|
1702
|
+
if not os.path.isfile(fullFileName):
|
|
1703
|
+
raise IsADirectoryError("Structural error: key %s: exists, but is not a file (full path %s)",key,fullFileName)
|
|
1704
|
+
return True
|
|
1705
|
+
|
|
1706
|
+
def _getFileProperty( self, *, key : str, ext : str, func ):
|
|
1707
|
+
# vector version
|
|
1708
|
+
if not isinstance(key,str):
|
|
1709
|
+
verify( isinstance(key, Collection), "'key' must be a string or an interable object. Found type %s", type(key))
|
|
1710
|
+
l = len(key)
|
|
1711
|
+
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1712
|
+
ext = [ ext ] * l
|
|
1713
|
+
else:
|
|
1714
|
+
if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
1715
|
+
return [ self._getFileProperty(key=k,ext=e,func=func) for k,e in zip(key,ext) ]
|
|
1716
|
+
# empty directory
|
|
1717
|
+
if self._path is None:
|
|
1718
|
+
return None
|
|
1719
|
+
# handle directories in 'key'
|
|
1720
|
+
if len(key) == 0: raise ValueError("'key' missing (the filename)")
|
|
1721
|
+
sub, key_ = os.path.split(key)
|
|
1722
|
+
if len(key_) == 0: raise IsADirectoryError( key, txtfmt("'key' %s indicates a directory, not a file", key) )
|
|
1723
|
+
if len(sub) > 0: return self(sub)._getFileProperty(key=key_,ext=ext,func=func)
|
|
1724
|
+
# if directory doesn't exit
|
|
1725
|
+
if not self.pathExists():
|
|
1726
|
+
return None
|
|
1727
|
+
# single key
|
|
1728
|
+
fullFileName = self.fullKeyName(key, ext=ext)
|
|
1729
|
+
if not os.path.exists(fullFileName):
|
|
1730
|
+
return None
|
|
1731
|
+
return func(fullFileName)
|
|
1732
|
+
|
|
1733
|
+
def getCreationTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
|
|
1734
|
+
"""
|
|
1735
|
+
Returns the creation time of 'key', or None if file was not found.
|
|
1736
|
+
See comments on os.path.getctime() for compatibility
|
|
1737
|
+
|
|
1738
|
+
Parameters
|
|
1739
|
+
----------
|
|
1740
|
+
key :
|
|
1741
|
+
filename, or list of filenames
|
|
1742
|
+
ext :
|
|
1743
|
+
Extension, or list thereof if 'key' is an extension.
|
|
1744
|
+
Use
|
|
1745
|
+
-- None for the directory default
|
|
1746
|
+
-- "" for no automatic extension
|
|
1747
|
+
-- A Format to specify the default extension for that format.
|
|
1748
|
+
|
|
1749
|
+
Returns
|
|
1750
|
+
-------
|
|
1751
|
+
datetime.datetime if 'key' is a string, otherwise a list of datetime's
|
|
1752
|
+
"""
|
|
1753
|
+
return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getctime(x)) )
|
|
1754
|
+
|
|
1755
|
+
def getLastModificationTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
|
|
1756
|
+
"""
|
|
1757
|
+
Returns the last modification time of 'key', or None if file was not found.
|
|
1758
|
+
See comments on os.path.getmtime() for compatibility
|
|
1759
|
+
|
|
1760
|
+
Parameters
|
|
1761
|
+
----------
|
|
1762
|
+
key :
|
|
1763
|
+
filename, or list of filenames
|
|
1764
|
+
ext :
|
|
1765
|
+
Extension, or list thereof if 'key' is an extension.
|
|
1766
|
+
Use
|
|
1767
|
+
-- None for the directory default
|
|
1768
|
+
-- "" for no automatic extension
|
|
1769
|
+
-- A Format to specify the default extension for that format.
|
|
1770
|
+
|
|
1771
|
+
Returns
|
|
1772
|
+
-------
|
|
1773
|
+
datetime.datetime if 'key' is a string, otherwise a list of datetime's
|
|
1774
|
+
"""
|
|
1775
|
+
return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getmtime(x)) )
|
|
1776
|
+
|
|
1777
|
+
def getLastAccessTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
|
|
1778
|
+
"""
|
|
1779
|
+
Returns the last access time of 'key', or None if file was not found.
|
|
1780
|
+
See comments on os.path.getatime() for compatibility
|
|
1781
|
+
|
|
1782
|
+
Parameters
|
|
1783
|
+
----------
|
|
1784
|
+
key :
|
|
1785
|
+
filename, or list of filenames
|
|
1786
|
+
ext :
|
|
1787
|
+
Extension, or list thereof if 'key' is an extension.
|
|
1788
|
+
Use
|
|
1789
|
+
-- None for the directory default
|
|
1790
|
+
-- "" for no automatic extension
|
|
1791
|
+
-- A Format to specify the default extension for that format.
|
|
1792
|
+
|
|
1793
|
+
Returns
|
|
1794
|
+
-------
|
|
1795
|
+
datetime.datetime if 'key' is a string, otherwise a list of datetime's
|
|
1796
|
+
"""
|
|
1797
|
+
return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getatime(x)) )
|
|
1798
|
+
|
|
1799
|
+
def getFileSize( self, key : str, *, ext : str = None ) -> int:
|
|
1800
|
+
"""
|
|
1801
|
+
Returns the file size of 'key', or None if file was not found.
|
|
1802
|
+
See comments on os.path.getatime() for compatibility
|
|
1803
|
+
|
|
1804
|
+
Parameters
|
|
1805
|
+
----------
|
|
1806
|
+
key :
|
|
1807
|
+
filename, or list of filenames
|
|
1808
|
+
ext :
|
|
1809
|
+
Extension, or list thereof if 'key' is an extension.
|
|
1810
|
+
Use
|
|
1811
|
+
-- None for the directory default
|
|
1812
|
+
-- "" for no automatic extension
|
|
1813
|
+
-- A Format to specify the default extension for that format.
|
|
1814
|
+
|
|
1815
|
+
Returns
|
|
1816
|
+
-------
|
|
1817
|
+
File size if 'key' is a string, otherwise a list thereof.
|
|
1818
|
+
"""
|
|
1819
|
+
return self._getFileProperty( key=key, ext=ext, func=lambda x : os.path.getsize(x) )
|
|
1820
|
+
|
|
1821
|
+
def rename( self, source : str, target : str, *, ext : str = None ):
|
|
1822
|
+
"""
|
|
1823
|
+
Rename "source" key into "target" key.
|
|
1824
|
+
Function will raise an exception if not successful
|
|
1825
|
+
|
|
1826
|
+
Parameters
|
|
1827
|
+
----------
|
|
1828
|
+
source, target:
|
|
1829
|
+
filenames
|
|
1830
|
+
ext :
|
|
1831
|
+
Extension, or list thereof if 'key' is an extension.
|
|
1832
|
+
Use
|
|
1833
|
+
-- None for the directory default
|
|
1834
|
+
-- "" for no automatic extensions.
|
|
1835
|
+
-- A Format to specify the default extension for that format.
|
|
1836
|
+
"""
|
|
1837
|
+
# empty directory
|
|
1838
|
+
if self._path is None:
|
|
1839
|
+
return
|
|
1840
|
+
|
|
1841
|
+
# handle directories in 'source'
|
|
1842
|
+
if len(source) == 0: raise ValueError("'source' missing (the filename)")
|
|
1843
|
+
sub, source_ = os.path.split(source)
|
|
1844
|
+
if len(source_) == 0: raise IsADirectoryError( source, txtfmt("'source' %s indicates a directory, not a file", source ))
|
|
1845
|
+
if len(sub) > 0:
|
|
1846
|
+
src_full = self(sub).fullKeyName(key=source_,ext=ext)
|
|
1847
|
+
else:
|
|
1848
|
+
src_full = self.fullKeyName( source, ext=ext )
|
|
1849
|
+
|
|
1850
|
+
# handle directories in 'target'
|
|
1851
|
+
if len(target) == 0: raise ValueError("'target' missing (the filename)" )
|
|
1852
|
+
sub, target_ = os.path.split(target)
|
|
1853
|
+
if len(target_) == 0: raise IsADirectoryError( target, txtfmt("'target' %s indicates a directory, not a file", target))
|
|
1854
|
+
if len(sub) > 0:
|
|
1855
|
+
tar_dir = self(sub)
|
|
1856
|
+
tar_dir.createDirectory()
|
|
1857
|
+
tar_full = tar_dir.fullKeyName(key=target_,ext=ext)
|
|
1858
|
+
else:
|
|
1859
|
+
tar_full = self.fullKeyName( target, ext=ext )
|
|
1860
|
+
self.createDirectory()
|
|
1861
|
+
|
|
1862
|
+
os.rename(src_full, tar_full)
|
|
1863
|
+
|
|
1864
|
+
# utilities
|
|
1865
|
+
|
|
1866
|
+
@staticmethod
|
|
1867
|
+
def removeBadKeyCharacters( key:str, by:str=' ' ) -> str:
|
|
1868
|
+
"""
|
|
1869
|
+
Replaces invalid characters in a filename by 'by'.
|
|
1870
|
+
See util.fmt_filename() for documentation and further options.
|
|
1871
|
+
"""
|
|
1872
|
+
return fmt_filename( key, by=by )
|
|
1873
|
+
|
|
1874
|
+
def unqiueLabelToKey( self, unique_label:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
|
|
1875
|
+
"""
|
|
1876
|
+
Converts a unique label which might contain invalid characters into a unique file name, such that the full file name does not exceed 'max_length' bytes.
|
|
1877
|
+
The returned key has the format
|
|
1878
|
+
name + separator + ID
|
|
1879
|
+
where ID has length id_length.
|
|
1880
|
+
If unique_label is already guaranteed to be a valid filename, use unqiueLongFileNameToKey() instead.
|
|
1881
|
+
"""
|
|
1882
|
+
len_ext = len(self.ext)
|
|
1883
|
+
assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
|
|
1884
|
+
uqf = uniqueLabelExt( max_length=max_length-len_ext, id_length=id_length, separator=separator, filename_by="default" )
|
|
1885
|
+
return uqf( unique_label )
|
|
1886
|
+
|
|
1887
|
+
def unqiueLongFileNameToKey( self, unique_filename:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
|
|
1888
|
+
"""
|
|
1889
|
+
Converts a unique filename which might be too long to a unique filename such that the total length plus 'ext' does not exceed 'max_length' bytes.
|
|
1890
|
+
If the filename is already short enough, no change is made.
|
|
1891
|
+
|
|
1892
|
+
If 'unique_filename' is not guaranteed to be a valid filename, use unqiueLabelToKey() instead.
|
|
1893
|
+
"""
|
|
1894
|
+
len_ext = len(self.ext)
|
|
1895
|
+
assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
|
|
1896
|
+
uqf = uniqueLabelExt( max_length=max_length-len_ext, id_length=id_length, separator=separator )
|
|
1897
|
+
return uqf( unique_filename )
|
|
1898
|
+
|
|
1899
|
+
# -- dict-like interface --
|
|
1900
|
+
|
|
1901
|
+
def __call__(self, keyOrSub : str,
|
|
1902
|
+
default = RETURN_SUB_DIRECTORY,
|
|
1903
|
+
raiseOnError : bool = False,
|
|
1904
|
+
*,
|
|
1905
|
+
version : str = None,
|
|
1906
|
+
ext : str = None,
|
|
1907
|
+
fmt : Format = None,
|
|
1908
|
+
delete_wrong_version : bool = True,
|
|
1909
|
+
createDirectory : bool = None ):
|
|
1910
|
+
"""
|
|
1911
|
+
Return either the value of a sub-key (file), or return a new sub directory.
|
|
1912
|
+
If only one argument is used, then this function returns a new sub directory.
|
|
1913
|
+
If two arguments are used, then this function returns read( keyOrSub, default ).
|
|
1914
|
+
|
|
1915
|
+
sd = SubDir("!/test")
|
|
1916
|
+
|
|
1917
|
+
Member access:
|
|
1918
|
+
x = sd('x', None) reads 'x' with default value None
|
|
1919
|
+
x = sd('sd/x', default=1) reads 'x' from sub directory 'sd' with default value 1
|
|
1920
|
+
x = sd('x', default=1, ext="tmp") reads 'x.tmp' from sub directory 'sd' with default value 1
|
|
1921
|
+
|
|
1922
|
+
Create sub directory:
|
|
1923
|
+
sd2 = sd("subdir") creates and returns handle to subdirectory 'subdir'
|
|
1924
|
+
sd2 = sd("subdir1/subdir2") creates and returns handle to subdirectory 'subdir1/subdir2'
|
|
1925
|
+
sd2 = sd("subdir1/subdir2", ext=".tmp") creates and returns handle to subdirectory 'subdir1/subdir2' with extension "tmp"
|
|
1926
|
+
sd2 = sd(ext=".tmp") returns handle to current subdirectory with extension "tmp"
|
|
1927
|
+
|
|
1928
|
+
Parameters
|
|
1929
|
+
----------
|
|
1930
|
+
keyOrSub : str
|
|
1931
|
+
identify the object requested. Should be a string or a list of strings.
|
|
1932
|
+
default:
|
|
1933
|
+
If specified, this function reads 'keyOrSub' with read( keyOrSub, default, *args, **kwargs )
|
|
1934
|
+
If not specified, then this function calls SubDir(keyOrSub,parent=self,ext=ext,fmt=fmt)
|
|
1935
|
+
|
|
1936
|
+
The following keywords are only relevant when reading files.
|
|
1937
|
+
They echo the parameters of read()
|
|
1938
|
+
|
|
1939
|
+
raiseOnError : bool
|
|
1940
|
+
Whether to raise an exception if reading an existing file failed.
|
|
1941
|
+
By default this function fails silently and returns the default.
|
|
1942
|
+
version : str
|
|
1943
|
+
If not None, specifies the version of the current code base.
|
|
1944
|
+
Use '*' to read any version (this is distrinct from reading a file without version).
|
|
1945
|
+
If version is not' '*', then this version will be compared to the version of the file being read.
|
|
1946
|
+
If they do not match, read fails (either by returning default or throwing an exception).
|
|
1947
|
+
delete_wrong_version : bool
|
|
1948
|
+
If True, and if a wrong version was found, delete the file.
|
|
1949
|
+
ext : str
|
|
1950
|
+
Extension overwrite, or a list thereof if key is a list
|
|
1951
|
+
Set to:
|
|
1952
|
+
-- None to use directory's default
|
|
1953
|
+
-- '*' to use the extension implied by 'fmt'
|
|
1954
|
+
-- for convenience 'ext' can also be a Format (in this case leave fmt to None)
|
|
1955
|
+
fmt : Format
|
|
1956
|
+
File format or None to use the directory's default.
|
|
1957
|
+
Note that 'fmt' cannot be a list even if 'key' is.
|
|
1958
|
+
Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
|
|
1959
|
+
|
|
1960
|
+
The following keywords are only relevant when accessing directories
|
|
1961
|
+
They echo the parameters of __init__
|
|
1962
|
+
|
|
1963
|
+
createDirectory : bool
|
|
1964
|
+
Whether or not to create the directory. The default, None, is to inherit the behaviour from self.
|
|
1965
|
+
ext : str
|
|
1966
|
+
Set to None to inherit the parent's extension.
|
|
1967
|
+
fmt : Format
|
|
1968
|
+
Set to None to inherit the parent's format.
|
|
1969
|
+
|
|
1970
|
+
Returns
|
|
1971
|
+
-------
|
|
1972
|
+
Either the value in the file, a new sub directory, or lists thereof.
|
|
1973
|
+
Returns None if an element was not found.
|
|
1974
|
+
"""
|
|
1975
|
+
if default == SubDir.RETURN_SUB_DIRECTORY:
|
|
1976
|
+
if not isinstance(keyOrSub, str):
|
|
1977
|
+
if not isinstance(keyOrSub, Collection):
|
|
1978
|
+
raise ValueError(txtfmt("'keyOrSub' must be a string or an iterable object. Found type '%s;", type(keyOrSub)))
|
|
1979
|
+
return [ SubDir( k,parent=self,ext=ext,fmt=fmt,createDirectory=createDirectory) for k in keyOrSub ]
|
|
1980
|
+
return SubDir(keyOrSub,parent=self,ext=ext,fmt=fmt,createDirectory=createDirectory)
|
|
1981
|
+
return self.read( key=keyOrSub,
|
|
1982
|
+
default=default,
|
|
1983
|
+
raiseOnError=raiseOnError,
|
|
1984
|
+
version=version,
|
|
1985
|
+
delete_wrong_version=delete_wrong_version,
|
|
1986
|
+
ext=ext,
|
|
1987
|
+
fmt=fmt )
|
|
1988
|
+
|
|
1989
|
+
def __getitem__( self, key ):
|
|
1990
|
+
"""
|
|
1991
|
+
Reads self[key]
|
|
1992
|
+
If 'key' does not exist, throw a KeyError
|
|
1993
|
+
"""
|
|
1994
|
+
return self.read( key=key, default=None, raiseOnError=True )
|
|
1995
|
+
|
|
1996
|
+
def __setitem__( self, key, value):
|
|
1997
|
+
""" Writes 'value' to 'key' """
|
|
1998
|
+
self.write(key,value)
|
|
1999
|
+
|
|
2000
|
+
def __delitem__(self,key):
|
|
2001
|
+
""" Silently delete self[key] """
|
|
2002
|
+
self.delete(key, False )
|
|
2003
|
+
|
|
2004
|
+
def __len__(self) -> int:
|
|
2005
|
+
""" Return the number of files (keys) in this directory """
|
|
2006
|
+
return len(self.keys())
|
|
2007
|
+
|
|
2008
|
+
def __iter__(self):
|
|
2009
|
+
""" Returns an iterator which allows traversing through all keys (files) below this directory """
|
|
2010
|
+
return self.keys().__iter__()
|
|
2011
|
+
|
|
2012
|
+
def __contains__(self, key):
|
|
2013
|
+
""" Implements 'in' operator """
|
|
2014
|
+
return self.exists(key)
|
|
2015
|
+
|
|
2016
|
+
# -- object like interface --
|
|
2017
|
+
|
|
2018
|
+
def __getattr__(self, key):
|
|
2019
|
+
"""
|
|
2020
|
+
Allow using member notation to get data
|
|
2021
|
+
This function throws an AttributeError if 'key' is not found.
|
|
2022
|
+
"""
|
|
2023
|
+
if not self.exists(key):
|
|
2024
|
+
raise AttributeError(key)
|
|
2025
|
+
return self.read( key=key, raiseOnError=True )
|
|
2026
|
+
|
|
2027
|
+
def __setattr__(self, key, value):
|
|
2028
|
+
"""
|
|
2029
|
+
Allow using member notation to write data
|
|
2030
|
+
Note: keys starting with '_' are /not/ written to disk
|
|
2031
|
+
"""
|
|
2032
|
+
if key[0] == '_':
|
|
2033
|
+
self.__dict__[key] = value
|
|
2034
|
+
else:
|
|
2035
|
+
self.write(key,value)
|
|
2036
|
+
|
|
2037
|
+
def __delattr__(self, key):
|
|
2038
|
+
""" Silently delete a key with member notation. """
|
|
2039
|
+
verify( key[:1] != "_", "Deleting protected or private members disabled. Fix __delattr__ to support this")
|
|
2040
|
+
return self.delete( key=key, raiseOnError=False )
|
|
2041
|
+
|
|
2042
|
+
# pickling
|
|
2043
|
+
# --------
|
|
2044
|
+
|
|
2045
|
+
def __getstate__(self):
|
|
2046
|
+
""" Return state to pickle """
|
|
2047
|
+
return dict( path=self._path, ext=self._ext, fmt=self._fmt, crt=self._crt )
|
|
2048
|
+
|
|
2049
|
+
def __setstate__(self, state):
|
|
2050
|
+
""" Restore pickle """
|
|
2051
|
+
self._path = state['path']
|
|
2052
|
+
self._ext = state['ext']
|
|
2053
|
+
self._fmt = state['fmt']
|
|
2054
|
+
self._crt = state['crt']
|
|
2055
|
+
|
|
2056
|
+
# caching
|
|
2057
|
+
# -------
|
|
2058
|
+
|
|
2059
|
+
def cache( self, version : str = None , *,
|
|
2060
|
+
dependencies : list = None,
|
|
2061
|
+
label : Callable = None,
|
|
2062
|
+
uid : Callable = None,
|
|
2063
|
+
name : str = None,
|
|
2064
|
+
exclude_args : list[str] = None,
|
|
2065
|
+
include_args : list[str] = None,
|
|
2066
|
+
exclude_arg_types : list[type] = None,
|
|
2067
|
+
version_auto_class : bool = True):
|
|
2068
|
+
"""
|
|
2069
|
+
Wraps a callable or a class into a cachable function.
|
|
2070
|
+
Caching is based on the following two simple principles:
|
|
2071
|
+
|
|
2072
|
+
1) Unique Call ID:
|
|
2073
|
+
When a function is called with some parameters, the wrapper identifies a unique ID based
|
|
2074
|
+
on the qualified name of the function and on its runtime functional parameters (ie those
|
|
2075
|
+
which alter the outcome of the function).
|
|
2076
|
+
When a function is called the first time with a given unique call ID, it will store
|
|
2077
|
+
the result of the call to disk. If the function is called with the same call ID again,
|
|
2078
|
+
the result is read from disk and returned.
|
|
2079
|
+
|
|
2080
|
+
To compute unique call IDs' cdxbasics.util.namedUniqueHashExt() is used.
|
|
2081
|
+
Please read implementation comments there:
|
|
2082
|
+
Key default features:
|
|
2083
|
+
* It hashes objects via their __dict__ or __slot__ members.
|
|
2084
|
+
This can be overwritten for a class by implementing __unique_hash__; see cdxbasics.util.namedUniqueHashExt().
|
|
2085
|
+
* Function members of objects or any members starting with '_' are not considered
|
|
2086
|
+
unless this behaviour is changed using CacheController().
|
|
2087
|
+
* Numpy and panda frames are hashed using their byte representation.
|
|
2088
|
+
That is slow and not recommended. It is better to identify numpy/panda inputs
|
|
2089
|
+
via their generating characteristic ID.
|
|
2090
|
+
|
|
2091
|
+
2) Version:
|
|
2092
|
+
Each function has a version, which includes dependencies on other functions or classes.
|
|
2093
|
+
If the version of a result on disk does not match the current version, it is deleted
|
|
2094
|
+
and the function is called again. This way you can use your code to drive updates
|
|
2095
|
+
to data generated with cached functions.
|
|
2096
|
+
Behind the scenes this is implemented using cdxbasics.version.version() which means
|
|
2097
|
+
that the version of a cached function can also depend on versions of non-cached functions
|
|
2098
|
+
or other objects.
|
|
2099
|
+
|
|
2100
|
+
Functions
|
|
2101
|
+
---------
|
|
2102
|
+
Example of caching functions:
|
|
2103
|
+
|
|
2104
|
+
Cache a simple function 'f':
|
|
2105
|
+
|
|
2106
|
+
from cdxbasics.subdir import SubDir
|
|
2107
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2108
|
+
|
|
2109
|
+
@cache.cache("0.1")
|
|
2110
|
+
def f(x,y):
|
|
2111
|
+
return x*y
|
|
2112
|
+
|
|
2113
|
+
_ = f(1,2) # function gets computed and the result cached
|
|
2114
|
+
_ = f(1,2) # restore result from cache
|
|
2115
|
+
_ = f(2,2) # different parameters: compute and store result
|
|
2116
|
+
|
|
2117
|
+
Another function g which calls f, and whose version therefore on f's version:
|
|
2118
|
+
|
|
2119
|
+
from cdxbasics.subdir import SubDir
|
|
2120
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2121
|
+
|
|
2122
|
+
@cache.cache("0.1", dependencies=[f])
|
|
2123
|
+
def g(x,y):
|
|
2124
|
+
return g(x,y)**2
|
|
2125
|
+
|
|
2126
|
+
A function may have non-functional parameters which do not alter the function's outcome.
|
|
2127
|
+
An example are 'debug' flags:
|
|
2128
|
+
|
|
2129
|
+
from cdxbasics.subdir import SubDir
|
|
2130
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2131
|
+
|
|
2132
|
+
@cache.cache("0.1", dependencies=[f], exclude_args='debug')
|
|
2133
|
+
def g(x,y,debug): # <-- debug is a non-functional parameter
|
|
2134
|
+
if debug:
|
|
2135
|
+
print(f"h(x={x},y={y})")
|
|
2136
|
+
return g(x,y)**2
|
|
2137
|
+
|
|
2138
|
+
You can systematically define certain types as non-functional for *all* functions wrapped
|
|
2139
|
+
by this SubDir by specifying the respective parameter for the CacheController() in SubDir.__init__().
|
|
2140
|
+
|
|
2141
|
+
The Unique Call ID of a functions is by default generated by its fully qualified name
|
|
2142
|
+
and a unique hash of its functional parameters.
|
|
2143
|
+
This can be made more readable by using id=
|
|
2144
|
+
|
|
2145
|
+
from cdxbasics.subdir import SubDir
|
|
2146
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2147
|
+
|
|
2148
|
+
@cache.cache("0.1", id="f({x},{y}") # <- using a string to be passed to str.format()
|
|
2149
|
+
def f(x,y):
|
|
2150
|
+
return x*y
|
|
2151
|
+
|
|
2152
|
+
You can also use functions:
|
|
2153
|
+
|
|
2154
|
+
from cdxbasics.subdir import SubDir
|
|
2155
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2156
|
+
|
|
2157
|
+
# Using a function 'id'. Note the **_ to catch uninteresting parameters, here 'debug'
|
|
2158
|
+
@cache.cache("0.1", id=lambda x,y,**_: f"h({x},{y})", exclude_args='debug')
|
|
2159
|
+
def h(x,y,debug=False):
|
|
2160
|
+
if debug:
|
|
2161
|
+
print(f"h(x={x},y={y})")
|
|
2162
|
+
return x*y
|
|
2163
|
+
|
|
2164
|
+
Note that by default it is not assumed that the call Id returned by id is unique,
|
|
2165
|
+
and a hash generated from all pertinent arguments will be generated.
|
|
2166
|
+
That is why in the previous example we still need to exclude_args 'debug' here.
|
|
2167
|
+
|
|
2168
|
+
If the id you generate is guaranteed to be unique for all functional parameter values,
|
|
2169
|
+
you can add unique=True. In this case the filename of the function
|
|
2170
|
+
|
|
2171
|
+
from cdxbasics.subdir import SubDir
|
|
2172
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2173
|
+
|
|
2174
|
+
# Using a function 'id' with 'unique' to generate a unique ID.
|
|
2175
|
+
@cache.cache("0.1", id=lambda x,y,**_: f"h({x},{y})", unique=True)
|
|
2176
|
+
def h(x,y,debug=False):
|
|
2177
|
+
if debug:
|
|
2178
|
+
print(f"h(x={x},y={y})")
|
|
2179
|
+
return x*y
|
|
2180
|
+
|
|
2181
|
+
Numpy/Panda
|
|
2182
|
+
-----------
|
|
2183
|
+
Numpy/Panda data should not be hashed for identifying unique call IDs.
|
|
2184
|
+
Instead, use the defining characteristics for generating the data frames.
|
|
2185
|
+
|
|
2186
|
+
For example:
|
|
2187
|
+
|
|
2188
|
+
from cdxbasics.subdir import SubDir
|
|
2189
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2190
|
+
|
|
2191
|
+
from cdxbasics.prettydict import pdct
|
|
2192
|
+
|
|
2193
|
+
@cache.cache("0.1")
|
|
2194
|
+
def load_src( src_def ):
|
|
2195
|
+
result = ... load ...
|
|
2196
|
+
return result
|
|
2197
|
+
|
|
2198
|
+
# ignore 'src_result'. It is uniquely identified by 'src_def' -->
|
|
2199
|
+
@cache.cache("0.1", dependencies=[load_src], exclude_args=['data'])
|
|
2200
|
+
def statistics( stats_def, src_def, data ):
|
|
2201
|
+
stats = ... using data
|
|
2202
|
+
return stats
|
|
2203
|
+
|
|
2204
|
+
src_def = pdct()
|
|
2205
|
+
src_def.start = "2010-01-01"
|
|
2206
|
+
src_def.end = "2025-01-01"
|
|
2207
|
+
src_def.x = 0.1
|
|
2208
|
+
|
|
2209
|
+
stats_def = pdct()
|
|
2210
|
+
stats_def.lambda = 0.1
|
|
2211
|
+
stats_def.window = 100
|
|
2212
|
+
|
|
2213
|
+
data = load_src( src_def )
|
|
2214
|
+
stats = statistics( stats_def, src_def, data )
|
|
2215
|
+
|
|
2216
|
+
While instructive, this case is not optimal: we do not really need to load 'data'
|
|
2217
|
+
if we can reconstruct 'stats' from 'data' (unless we need 'data' further on).
|
|
2218
|
+
|
|
2219
|
+
Consider therefore
|
|
2220
|
+
|
|
2221
|
+
@cache.cache("0.1")
|
|
2222
|
+
def load_src( src_def ):
|
|
2223
|
+
result = ... load ...
|
|
2224
|
+
return result
|
|
2225
|
+
|
|
2226
|
+
# ignore 'src_result'. It is uniquely identified by 'src_def' -->
|
|
2227
|
+
@cache.cache("0.1", dependencies=[load_src])
|
|
2228
|
+
def statistics_only( stats_def, src_def ):
|
|
2229
|
+
data = load_src( src_def ) # <-- embedd call to load_src() here
|
|
2230
|
+
stats = ... using src_result
|
|
2231
|
+
return stats
|
|
2232
|
+
|
|
2233
|
+
stats = statistics_only( stats_def, src_def )
|
|
2234
|
+
|
|
2235
|
+
Member functions
|
|
2236
|
+
----------------
|
|
2237
|
+
You can cache member functions like any other function.
|
|
2238
|
+
Note that version information are by default inherited, i.e. member functions will be dependent on the version of their
|
|
2239
|
+
defining class, and class versions will be dependent on their base classes' versions.
|
|
2240
|
+
|
|
2241
|
+
from cdxbasics.subdir import SubDir, version
|
|
2242
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2243
|
+
|
|
2244
|
+
@version("0.1")
|
|
2245
|
+
class A(object):
|
|
2246
|
+
def __init__(self, x):
|
|
2247
|
+
self.x = x
|
|
2248
|
+
|
|
2249
|
+
@cache.cache("0.1")
|
|
2250
|
+
def f(self, y):
|
|
2251
|
+
return self.x*y
|
|
2252
|
+
|
|
2253
|
+
a = A(x=1)
|
|
2254
|
+
_ = a.f(y=1) # compute f and store result
|
|
2255
|
+
_ = a.f(y=1) # load result back from disk
|
|
2256
|
+
a.x = 2
|
|
2257
|
+
_ = a.f(y=1) # 'a' changed: compute f and store result
|
|
2258
|
+
b = A(x=2)
|
|
2259
|
+
_ = b.f(y=1) # same unique call ID as previous call -> restore result from disk
|
|
2260
|
+
|
|
2261
|
+
**WARNING**
|
|
2262
|
+
The hashing function used -- cdxbasics.util.uniqueHashExt() -- does by default *not* process members of objects or dictionaries
|
|
2263
|
+
which start with a "_". This behaviour can be changed using CacheController().
|
|
2264
|
+
For reasonably complex objects it is recommended to implement:
|
|
2265
|
+
__unique_hash__( self, length : int, parse_functions : bool, parse_underscore : str )
|
|
2266
|
+
(it is also possible to simply set this value to a string constant).
|
|
2267
|
+
|
|
2268
|
+
Bound Member Functions
|
|
2269
|
+
----------------------
|
|
2270
|
+
Note that above is functionally different to decorating a bound member function:
|
|
2271
|
+
|
|
2272
|
+
from cdxbasics.subdir import SubDir, version
|
|
2273
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2274
|
+
|
|
2275
|
+
class A(object):
|
|
2276
|
+
def __init__(self,x):
|
|
2277
|
+
self.x = x
|
|
2278
|
+
def f(self,y):
|
|
2279
|
+
return self.x*y
|
|
2280
|
+
|
|
2281
|
+
a = A(x=1)
|
|
2282
|
+
f = cache.cache("0.1", id=lambda self, y : f"a.f({y})")(a.f) # <- decorate bound 'f'.
|
|
2283
|
+
r = c(y=2)
|
|
2284
|
+
|
|
2285
|
+
In this case the function 'f' is bound to 'a'. The object is added as 'self' to the function
|
|
2286
|
+
parameter list even though the bound function parameter list does not include 'self'.
|
|
2287
|
+
This, together with the comments on hashing objects above, ensures that (hashed) changes to 'a' will
|
|
2288
|
+
be reflected in the unique call ID for the member function.
|
|
2289
|
+
|
|
2290
|
+
Classes
|
|
2291
|
+
-------
|
|
2292
|
+
Classes can also be cached.
|
|
2293
|
+
This is done in two steps: first, the class itself is decorated to provide version information at its own level.
|
|
2294
|
+
Secondly, decorate __init__ which also helps to define the unique call id. You do not need to specify a version
|
|
2295
|
+
for __init__ as its version usually coincides with the version of the class.
|
|
2296
|
+
|
|
2297
|
+
Simple example:
|
|
2298
|
+
|
|
2299
|
+
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2300
|
+
|
|
2301
|
+
@cache.cache("0.1")
|
|
2302
|
+
class A(object):
|
|
2303
|
+
|
|
2304
|
+
@cache.cache(exclude_args=['debug'])
|
|
2305
|
+
def __init__(self, x, debug):
|
|
2306
|
+
if debug:
|
|
2307
|
+
print("__init__",x)
|
|
2308
|
+
self.x = x
|
|
2309
|
+
|
|
2310
|
+
__init__ does not actually return a value; for this reason the actual function decorated will be __new__.
|
|
2311
|
+
Attempting to cache decorate __new__ will lead to an exception.
|
|
2312
|
+
|
|
2313
|
+
A nuance for __init__ vs ordinary member function is that 'self' is non-functional.
|
|
2314
|
+
It is therefore automatically excluded from computing a unique call ID.
|
|
2315
|
+
Specifically, 'self' is not part of the arguments passed to 'id':
|
|
2316
|
+
|
|
2317
|
+
@cache.cache("0.1")
|
|
2318
|
+
class A(object):
|
|
2319
|
+
|
|
2320
|
+
@cache.cache("0.1", id=lambda x, debug: f"A.__init__(x={x})") # <-- 'self' is not passed to the lambda function; no need to add **_
|
|
2321
|
+
def __init__(self, x, debug):
|
|
2322
|
+
if debug:
|
|
2323
|
+
print("__init__",x)
|
|
2324
|
+
self.x = x
|
|
2325
|
+
|
|
2326
|
+
Decorating classes with __slots__ does not yet work.
|
|
2327
|
+
|
|
2328
|
+
Non-functional parameters
|
|
2329
|
+
-------------------------
|
|
2330
|
+
Often functions have parameters which do not alter the output of the function but control i/o or other aspects of the overall environment.
|
|
2331
|
+
An example is a function parameter 'debug':
|
|
2332
|
+
|
|
2333
|
+
def f(x,y,debug=False):
|
|
2334
|
+
z = x*y
|
|
2335
|
+
if not debug:
|
|
2336
|
+
print(f"x={x}, y={y}, z={z}")
|
|
2337
|
+
return z
|
|
2338
|
+
|
|
2339
|
+
To specify which parameters are pertinent for identiying a unique id, use:
|
|
2340
|
+
|
|
2341
|
+
a) include_args: list of functions arguments to include. If None, use all as input in the next step
|
|
2342
|
+
b) exclude_args: list of funciton arguments to exclude, if not None.
|
|
2343
|
+
c) exclude_arg_types: a list of types to exclude. This is helpful if control flow is managed with dedicated data types.
|
|
2344
|
+
An example of such a type is cdxbasics.verbose.Context which is used to print hierarchical output messages.
|
|
2345
|
+
Types can be globally excluded using the CacheController.
|
|
2346
|
+
|
|
2347
|
+
See also
|
|
2348
|
+
--------
|
|
2349
|
+
For project-wide use it is usually inconvenient to control caching at the level of a 'directory'.
|
|
2350
|
+
See VersionedCacheRoot() is a thin wrapper around a SubDir with a CacheController.
|
|
2351
|
+
|
|
2352
|
+
Parameters
|
|
2353
|
+
----------
|
|
2354
|
+
version : str, optional
|
|
2355
|
+
Version of the function.
|
|
2356
|
+
* If None then F must be decorated with cdxbasics.version.version().
|
|
2357
|
+
* If set, the function F is first decorated with cdxbasics.version.version().
|
|
2358
|
+
dependencies : list, optional
|
|
2359
|
+
List of version dependencies
|
|
2360
|
+
|
|
2361
|
+
id : str, Callable
|
|
2362
|
+
Create a call label for the function call and its parameters.
|
|
2363
|
+
See above for a description.
|
|
2364
|
+
* A plain string without {} formatting: this is the fully qualified id
|
|
2365
|
+
* A string with {} formatting: id.str( name=name, **parameters ) will be used to generate the fully qualified id
|
|
2366
|
+
* A Callable, in which case id( name=name, **parameters ) will be used to generate the fully qualified id
|
|
2367
|
+
|
|
2368
|
+
unique : bool
|
|
2369
|
+
Whether the 'id' generated by 'id' is unique for this function call with its parameters.
|
|
2370
|
+
If True, then the function will attempt to use 'id' as filename as long as it has no invalid characters and is short
|
|
2371
|
+
enough (see 'max_filename_length').
|
|
2372
|
+
If False, the function will append to the 'id' a unique hash of the qualified function name and all pertinent parameters
|
|
2373
|
+
|
|
2374
|
+
name : str
|
|
2375
|
+
The name of the function, or None for using the fully qualified function name.
|
|
2376
|
+
|
|
2377
|
+
include_args : list[str]
|
|
2378
|
+
List of arguments to include in generating a unqiue id, or None for all.
|
|
2379
|
+
|
|
2380
|
+
exclude_args : list[str]:
|
|
2381
|
+
List of argumernts to exclude
|
|
2382
|
+
|
|
2383
|
+
exclude_arg_types : list[type]
|
|
2384
|
+
List of types to exclude.
|
|
2385
|
+
|
|
2386
|
+
version_auto_class : bool
|
|
2387
|
+
|
|
2388
|
+
|
|
2389
|
+
|
|
2390
|
+
Returns
|
|
2391
|
+
-------
|
|
2392
|
+
A callable to execute F if need be.
|
|
2393
|
+
This callable has a member 'cache_info' which can be used to access information on caching activity.
|
|
2394
|
+
|
|
2395
|
+
Information available at any time after decoration:
|
|
2396
|
+
F.cache_info.name : qualified name of the function
|
|
2397
|
+
F.cache_info.signature : signature of the function
|
|
2398
|
+
|
|
2399
|
+
Additonal information available during a call to a decorated function F, and thereafter:
|
|
2400
|
+
F.cache_info.version : unique version string reflecting all dependencies.
|
|
2401
|
+
F.cache_info.uid : unique call ID.
|
|
2402
|
+
F.cache_info.label : last id generated, or None (if id was a string and unique was True)
|
|
2403
|
+
F.cache_info.arguments : arguments parsed to create a unique call ID, or None (if id was a string and unique was True)
|
|
2404
|
+
|
|
2405
|
+
Additonal information available after a call to F:
|
|
2406
|
+
F.cache_info.last_cached : whether the last function call returned a cached object
|
|
2407
|
+
|
|
2408
|
+
The function F has additional function parameters
|
|
2409
|
+
override_cache_mode : allows to override caching mode temporarily, in particular "off"
|
|
2410
|
+
track_cached_files : pass a CacheTracker object to keep track of all files used (loaded from or saved to).
|
|
2411
|
+
This can be used to delete intermediary files when a large operation was completed.
|
|
2412
|
+
"""
|
|
2413
|
+
return CacheCallable(subdir = self,
|
|
2414
|
+
version = version,
|
|
2415
|
+
dependencies = dependencies,
|
|
2416
|
+
label = label,
|
|
2417
|
+
uid = uid,
|
|
2418
|
+
name = name,
|
|
2419
|
+
exclude_args = exclude_args,
|
|
2420
|
+
include_args = include_args,
|
|
2421
|
+
exclude_arg_types = exclude_arg_types,
|
|
2422
|
+
version_auto_class = version_auto_class )
|
|
2423
|
+
|
|
2424
|
+
def cache_class( self,
|
|
2425
|
+
version : str = None , *,
|
|
2426
|
+
name : str = None,
|
|
2427
|
+
dependencies : list = None,
|
|
2428
|
+
version_auto_class : bool = True
|
|
2429
|
+
):
|
|
2430
|
+
"""
|
|
2431
|
+
Short cut for SubDir.cache() for classes
|
|
2432
|
+
See SubDir.cache() for documentation.
|
|
2433
|
+
"""
|
|
2434
|
+
return self.cache( name=name,
|
|
2435
|
+
version=version,
|
|
2436
|
+
dependencies=dependencies,
|
|
2437
|
+
version_auto_class=version_auto_class)
|
|
2438
|
+
|
|
2439
|
+
|
|
2440
|
+
def _ensure_has_version( F,
|
|
2441
|
+
version : str = None,
|
|
2442
|
+
dependencies : list = None,
|
|
2443
|
+
auto_class : bool = True,
|
|
2444
|
+
allow_default: bool = False):
|
|
2445
|
+
"""
|
|
2446
|
+
Sets a version if requested, or ensures one is present
|
|
2447
|
+
"""
|
|
2448
|
+
if version is None and not dependencies is None:
|
|
2449
|
+
raise ValueError(f"'{F.__qualname__}: you cannot specify version 'dependencies' without specifying also a 'version'")
|
|
2450
|
+
|
|
2451
|
+
version_info = getattr(F,"version", None)
|
|
2452
|
+
if not version_info is None and type(version_info).__name__ != Version.__name__:
|
|
2453
|
+
raise RuntimeError(f"'{F.__qualname__}: has a 'version' member, but it is not of class 'Version'. Found '{type(version_info).__name__}'")
|
|
2454
|
+
|
|
2455
|
+
if version is None:
|
|
2456
|
+
if not version_info is None:
|
|
2457
|
+
return F
|
|
2458
|
+
if allow_default:
|
|
2459
|
+
version = "0"
|
|
2460
|
+
else:
|
|
2461
|
+
raise ValueError(f"'{F.__qualname__}': cannot determine version. Specify 'version'")
|
|
2462
|
+
elif not version_info is None:
|
|
2463
|
+
raise ValueError(f"'{F.__qualname__}: function already has version information; cannot set version '{version}' again")
|
|
2464
|
+
return version_decorator( version=version,
|
|
2465
|
+
dependencies=dependencies,
|
|
2466
|
+
auto_class=auto_class)(F)
|
|
2467
|
+
|
|
2468
|
+
def _qualified_name( F, name ):
|
|
2469
|
+
"""
|
|
2470
|
+
Return qualified name including module name, robustly
|
|
2471
|
+
"""
|
|
2472
|
+
if name is None:
|
|
2473
|
+
try:
|
|
2474
|
+
name = F.__qualname__
|
|
2475
|
+
except:
|
|
2476
|
+
try:
|
|
2477
|
+
name = F.__name__
|
|
2478
|
+
finally:
|
|
2479
|
+
pass
|
|
2480
|
+
verify( not name is None, "Cannot determine qualified name for 'F': it has neither __qualname__ nor a type with a name. Please specify 'name'", exception=RuntimeError)
|
|
2481
|
+
try:
|
|
2482
|
+
name = name + "@" + F.__module__
|
|
2483
|
+
except:
|
|
2484
|
+
warn( f"Cannot determine module name for '{name}' of {type(F)}" )
|
|
2485
|
+
return name
|
|
2486
|
+
|
|
2487
|
+
class CacheCallable(object):
|
|
2488
|
+
"""
|
|
2489
|
+
Utility class for SubDir.cache_callable.
|
|
2490
|
+
See documentation for that function.
|
|
2491
|
+
"""
|
|
2492
|
+
|
|
2493
|
+
def __init__(self,
|
|
2494
|
+
subdir : SubDir, *,
|
|
2495
|
+
version : str = None,
|
|
2496
|
+
dependencies : list,
|
|
2497
|
+
label : Callable = None,
|
|
2498
|
+
uid : Callable = None,
|
|
2499
|
+
name : str = None,
|
|
2500
|
+
exclude_args : set[str] = None,
|
|
2501
|
+
include_args : set[str] = None,
|
|
2502
|
+
exclude_arg_types : set[type] = None,
|
|
2503
|
+
version_auto_class : bool = True,
|
|
2504
|
+
name_of_name_arg : str = "name"):
|
|
2505
|
+
"""
|
|
2506
|
+
Utility class for SubDir.cache_callable.
|
|
2507
|
+
See documentation for that function.
|
|
2508
|
+
"""
|
|
2509
|
+
if not label is None and not uid is None:
|
|
2510
|
+
error("Cannot specify both 'label' and 'uid'.")
|
|
2511
|
+
|
|
2512
|
+
self._subdir = SubDir(subdir)
|
|
2513
|
+
self._version = str(version) if not version is None else None
|
|
2514
|
+
self._dependencies = list(dependencies) if not dependencies is None else None
|
|
2515
|
+
self._label = label
|
|
2516
|
+
self._uid = uid
|
|
2517
|
+
self._name = str(name) if not name is None else None
|
|
2518
|
+
self._exclude_args = set(exclude_args) if not exclude_args is None and len(exclude_args) > 0 else None
|
|
2519
|
+
self._include_args = set(include_args) if not include_args is None and len(include_args) > 0 else None
|
|
2520
|
+
self._exclude_arg_types = set(exclude_arg_types) if not exclude_arg_types is None and len(exclude_arg_types) > 0 else None
|
|
2521
|
+
self._version_auto_class = bool(version_auto_class)
|
|
2522
|
+
self._name_of_name_arg = str(name_of_name_arg)
|
|
2523
|
+
|
|
2524
|
+
@property
|
|
2525
|
+
def uid_or_label(self) -> Callable:
|
|
2526
|
+
return self._uid if self._label is None else self._label
|
|
2527
|
+
@property
|
|
2528
|
+
def unique(self) -> bool:
|
|
2529
|
+
return not self._uid is None
|
|
2530
|
+
|
|
2531
|
+
@property
|
|
2532
|
+
def cacheController(self) -> CacheController:
|
|
2533
|
+
""" Returns the cache controller """
|
|
2534
|
+
return self._subdir.cacheController
|
|
2535
|
+
@property
|
|
2536
|
+
def cache_mode(self) -> Context:
|
|
2537
|
+
return self.cacheController.cache_mode
|
|
2538
|
+
@property
|
|
2539
|
+
def debug_verbose(self) -> Context:
|
|
2540
|
+
return self.cacheController.debug_verbose
|
|
2541
|
+
@property
|
|
2542
|
+
def uniqueNamedFileName(self) -> Callable:
|
|
2543
|
+
return self.cacheController.uniqueNamedFileName
|
|
2544
|
+
@property
|
|
2545
|
+
def uniqueLabelledFileName(self) -> Callable:
|
|
2546
|
+
return self.cacheController.uniqueLabelledFileName
|
|
2547
|
+
@property
|
|
2548
|
+
def global_exclude_arg_types(self) -> list[type]:
|
|
2549
|
+
return self.cacheController.exclude_arg_types
|
|
2550
|
+
|
|
2551
|
+
def __call__(self, F : Callable):
|
|
2552
|
+
"""
|
|
2553
|
+
Decorate 'F' as cachable callable. Can also decorate classes via ClassCallable()
|
|
2554
|
+
See SubDir.cache() for documentation.
|
|
2555
|
+
"""
|
|
2556
|
+
if inspect.isclass(F):
|
|
2557
|
+
if not self._label is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'label' for __init__, not the class")
|
|
2558
|
+
if not self._uid is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'uid' for __init__, not the class")
|
|
2559
|
+
if not self._exclude_args is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'exclude_args' for __init__, not the class")
|
|
2560
|
+
if not self._include_args is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'include_args' for __init__, not the class")
|
|
2561
|
+
if not self._exclude_arg_types is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'exclude_arg_types' for __init__, not the class")
|
|
2562
|
+
return self._wrap_class(F)
|
|
2563
|
+
|
|
2564
|
+
return self._wrap( F )
|
|
2565
|
+
|
|
2566
|
+
def _wrap_class(self, C : type):
|
|
2567
|
+
"""
|
|
2568
|
+
Wrap class
|
|
2569
|
+
This wrapper:
|
|
2570
|
+
1) Assigns a cdxbasics.version.version() for the class (if not yet present)
|
|
2571
|
+
2) Extracts from __init__ the wrapper to decorate __new__
|
|
2572
|
+
"""
|
|
2573
|
+
debug_verbose = self.cacheController.debug_verbose
|
|
2574
|
+
|
|
2575
|
+
assert not inspect.isclass(C), ("Not a class", C)
|
|
2576
|
+
|
|
2577
|
+
# apply decorator provided for __init__ to __new__
|
|
2578
|
+
C__init__ = getattr(C, "__init__", None)
|
|
2579
|
+
if C__init__ is None:
|
|
2580
|
+
raise RuntimeError("'{F.__qualname__}': define and decorate __init__")
|
|
2581
|
+
init_cache_callable = getattr(C__init__, "init_cache_callable", None)
|
|
2582
|
+
if init_cache_callable is None:
|
|
2583
|
+
raise RuntimeError("'{F.__qualname__}': must also decorate __init__")
|
|
2584
|
+
assert type(init_cache_callable).__name__ == CacheCallable.__name__, (f"*** Internal error: '{C.__qualname__}': __init__ has wrong type for 'init_cache_callable': {type(init_cache_callable)} ?")
|
|
2585
|
+
|
|
2586
|
+
C__init__.init_cache_callable = None # tell the __init__ wrapper we have processed this information
|
|
2587
|
+
|
|
2588
|
+
C__new__ = C.__new__
|
|
2589
|
+
class_parameter = list(inspect.signature(C__new__).parameters)[0]
|
|
2590
|
+
init_cache_callable._exclude_args = {class_parameter} if init_cache_callable._exclude_args is None else ( init_cache_callable._exclude_args | {class_parameter})
|
|
2591
|
+
init_cache_callable._name = _qualified_name( C, self._name ) if init_cache_callable._name is None else init_cache_callable._name
|
|
2592
|
+
|
|
2593
|
+
C.__new__ = init_cache_callable._wrap( C__new__, is_new = True )
|
|
2594
|
+
C.__new__.cache_info.signature = inspect.signature(C__init__) # signature of the function
|
|
2595
|
+
|
|
2596
|
+
# apply version
|
|
2597
|
+
# this also ensures that __init__ picks up a version dependency on the class itse
|
|
2598
|
+
# (as we forceed 'auto_class' to be true)
|
|
2599
|
+
C = _ensure_has_version( C, version=self._version,
|
|
2600
|
+
dependencies=self._dependencies,
|
|
2601
|
+
auto_class=self._version_auto_class)
|
|
2602
|
+
|
|
2603
|
+
if not debug_verbose is None:
|
|
2604
|
+
debug_verbose.write(f"cache_class({C.__qualname__}): class wrapped; class parameter '{class_parameter}' to __new__ will be ignored.")
|
|
2605
|
+
|
|
2606
|
+
return C
|
|
2607
|
+
|
|
2608
|
+
def _wrap(self, F : Callable, is_new : bool = False):
|
|
2609
|
+
"""
|
|
2610
|
+
Decorate callable 'F'.
|
|
2611
|
+
"""
|
|
2612
|
+
|
|
2613
|
+
debug_verbose = self.cacheController.debug_verbose
|
|
2614
|
+
assert not inspect.isclass(F), ("Internal error")
|
|
2615
|
+
|
|
2616
|
+
# check validity
|
|
2617
|
+
# --------------
|
|
2618
|
+
# Cannot currently decorate classes.
|
|
2619
|
+
|
|
2620
|
+
|
|
2621
|
+
is_method = inspect.ismethod(F)
|
|
2622
|
+
if is_method:
|
|
2623
|
+
assert not getattr(F, "__self__", None) is None, ("Method type must have __self__...?", F.__qualname__ )
|
|
2624
|
+
elif not inspect.isfunction(F):
|
|
2625
|
+
# if F is neither a function or class, attempt to decorate (bound) __call__
|
|
2626
|
+
if not callable(F):
|
|
2627
|
+
raise ValueError(f"{F.__qualname__}' is not callable")
|
|
2628
|
+
F_ = getattr(F, "__call__", None)
|
|
2629
|
+
if F_ is None:
|
|
2630
|
+
raise ValueError(f"{F.__qualname__}' is callable, but has no '__call__'. F is of type {type(F)}")
|
|
2631
|
+
if not debug_verbose is None:
|
|
2632
|
+
debug_verbose.write(f"cache({F.__qualname__}): 'F' is an object; will use bound __call__")
|
|
2633
|
+
F = F_
|
|
2634
|
+
del F_
|
|
2635
|
+
else:
|
|
2636
|
+
# __new__ should not be decorated manually
|
|
2637
|
+
if not is_new and F.__name__ == "__new__":
|
|
2638
|
+
raise ValueError(f"You cannot decorate __new__ of '{F.__qualname__}'.")
|
|
2639
|
+
|
|
2640
|
+
# handle __init__
|
|
2641
|
+
# ---------------
|
|
2642
|
+
|
|
2643
|
+
if F.__name__ == "__init__":
|
|
2644
|
+
# the decorate __init__ has two purposes
|
|
2645
|
+
# 1) during initializaton keep ahold of 'self' which will be the decorator for __new__ in fact
|
|
2646
|
+
# 2) during runtime, deciding based upon '__new__' caching status wherer to run the original __init__
|
|
2647
|
+
|
|
2648
|
+
def execute_init( self, *args, **kwargs ):
|
|
2649
|
+
"""
|
|
2650
|
+
Overwriting __init__ directly does not work as __init__ does not return anything.
|
|
2651
|
+
"""
|
|
2652
|
+
# ensure '__new__' was processed.
|
|
2653
|
+
# this will happen when the class is wrapped
|
|
2654
|
+
if not execute_init.init_cache_callable is None:
|
|
2655
|
+
raise RuntimeError(f"Class '{type(self).__qualname__}': __init__ was decorated for caching but it seems the class '{type(self).__qualname__}' was not decorated, too.")
|
|
2656
|
+
|
|
2657
|
+
__magic_cache_call_init__ = getattr(self, "__magic_cache_call_init__", None)
|
|
2658
|
+
assert not __magic_cache_call_init__ is None, ("*** Internal error: __init__ called illegally")
|
|
2659
|
+
|
|
2660
|
+
if __magic_cache_call_init__:
|
|
2661
|
+
# call __init__
|
|
2662
|
+
F( self, *args, **kwargs )
|
|
2663
|
+
#if not debug_verbose is None:
|
|
2664
|
+
# debug_verbose.write(f"cache({type(self).__qualname__}): __init__ called")
|
|
2665
|
+
else:
|
|
2666
|
+
pass
|
|
2667
|
+
# do not call __init___
|
|
2668
|
+
#if not debug_verbose is None:
|
|
2669
|
+
# debug_verbose.write(f"cache({type(self).__qualname__}): __init__ skipped")
|
|
2670
|
+
self.__magic_cache_call_init__ = None
|
|
2671
|
+
|
|
2672
|
+
update_wrapper( wrapper=execute_init, wrapped=F )
|
|
2673
|
+
|
|
2674
|
+
# for class decorator to pick up.
|
|
2675
|
+
# ClassCallable() will set this to None before excecute_init
|
|
2676
|
+
# is called (ie before the first object is created)
|
|
2677
|
+
execute_init.init_cache_callable = self
|
|
2678
|
+
return execute_init
|
|
2679
|
+
|
|
2680
|
+
# version
|
|
2681
|
+
# -------
|
|
2682
|
+
# Decorate now or pick up existing @version
|
|
2683
|
+
|
|
2684
|
+
F = _ensure_has_version( F, version=self._version,
|
|
2685
|
+
dependencies=self._dependencies,
|
|
2686
|
+
auto_class=self._version_auto_class,
|
|
2687
|
+
allow_default=is_new )
|
|
2688
|
+
|
|
2689
|
+
# name
|
|
2690
|
+
# ----
|
|
2691
|
+
|
|
2692
|
+
name = _qualified_name( F, self._name )
|
|
2693
|
+
|
|
2694
|
+
# any other function
|
|
2695
|
+
# ------------------
|
|
2696
|
+
|
|
2697
|
+
exclude_types = ( self._exclude_arg_types if not self._exclude_arg_types is None else set() )\
|
|
2698
|
+
| ( self.global_exclude_arg_types if not self.global_exclude_arg_types is None else set())
|
|
2699
|
+
|
|
2700
|
+
def execute( *args, override_cache_mode : CacheMode = None,
|
|
2701
|
+
track_cached_files : CacheTracker = None,
|
|
2702
|
+
**kwargs ):
|
|
2703
|
+
"""
|
|
2704
|
+
Cached execution of the wrapped function
|
|
2705
|
+
"""
|
|
2706
|
+
|
|
2707
|
+
if is_new:
|
|
2708
|
+
# if 'F' is __new__ then we might need to turn off all caching when deserializing cached objects from disk
|
|
2709
|
+
if execute.__new_during_read:
|
|
2710
|
+
return F(*args, **kwargs)
|
|
2711
|
+
|
|
2712
|
+
# determine unique id_ for this function call
|
|
2713
|
+
# -------------------------------------------
|
|
2714
|
+
|
|
2715
|
+
label = None
|
|
2716
|
+
uid = None
|
|
2717
|
+
uid_or_label = self.uid_or_label
|
|
2718
|
+
if isinstance(uid_or_label, str) and self.unique:
|
|
2719
|
+
# if 'id' does not contain formatting codes, and the result is 'unique' then do not bother collecting
|
|
2720
|
+
# function arguments
|
|
2721
|
+
try:
|
|
2722
|
+
uid = uid_or_label.format() # throws a KeyError if 'id' contains formatting information
|
|
2723
|
+
except KeyError:
|
|
2724
|
+
pass
|
|
2725
|
+
|
|
2726
|
+
if not uid is None:
|
|
2727
|
+
# generate name with the unique string provided by the user
|
|
2728
|
+
label = uid
|
|
2729
|
+
uid = self.uniqueLabelledFileName( self.id )
|
|
2730
|
+
arguments = None
|
|
2731
|
+
|
|
2732
|
+
else:
|
|
2733
|
+
# get dictionary of named arguments
|
|
2734
|
+
arguments = execute.cache_info.signature.bind(*args,**kwargs)
|
|
2735
|
+
arguments.apply_defaults()
|
|
2736
|
+
arguments = arguments.arguments # ordered dict
|
|
2737
|
+
|
|
2738
|
+
if is_new:
|
|
2739
|
+
# delete 'cls' from argument list
|
|
2740
|
+
assert len(arguments) >= 1, ("*** Internal error", F.__qualname__, is_new, arguments)
|
|
2741
|
+
del arguments[list(arguments)[0]]
|
|
2742
|
+
argus = set(arguments)
|
|
2743
|
+
|
|
2744
|
+
# filter dictionary
|
|
2745
|
+
if not self._exclude_args is None or not self._include_args is None:
|
|
2746
|
+
excl = set(self._exclude_args) if not self._exclude_args is None else set()
|
|
2747
|
+
if not self._exclude_args is None:
|
|
2748
|
+
if self._exclude_args > argus:
|
|
2749
|
+
raise ValueError(f"{name}: 'exclude_args' contains unknown argument names: exclude_args {sorted(self._exclude_args)} while argument names are {sorted(argus)}.")
|
|
2750
|
+
if not self._include_args is None:
|
|
2751
|
+
if self._include_args > argus:
|
|
2752
|
+
raise ValueError(f"{name}: 'include_args' contains unknown argument names: include_args {sorted(self._iinclude_args)} while argument names are {sorted(argus)}.")
|
|
2753
|
+
excl = argus - self._iinclude_args
|
|
2754
|
+
if not self._exclude_args is None:
|
|
2755
|
+
excl |= self._exclude_args
|
|
2756
|
+
for arg in excl:
|
|
2757
|
+
if arg in arguments:
|
|
2758
|
+
del arguments[arg]
|
|
2759
|
+
del excl
|
|
2760
|
+
|
|
2761
|
+
if len(exclude_types) > 0:
|
|
2762
|
+
excl = []
|
|
2763
|
+
for k, v in arguments.items():
|
|
2764
|
+
if type(v) in exclude_types or type(v).__name__ in exclude_types:
|
|
2765
|
+
excl.append( k )
|
|
2766
|
+
for arg in excl:
|
|
2767
|
+
if arg in arguments:
|
|
2768
|
+
del arguments[arg]
|
|
2769
|
+
|
|
2770
|
+
# apply logics
|
|
2771
|
+
if uid_or_label is None:
|
|
2772
|
+
label = name
|
|
2773
|
+
|
|
2774
|
+
else:
|
|
2775
|
+
if self._name_of_name_arg in arguments:
|
|
2776
|
+
error(f"{name}: '{self._name_of_name_arg}' is a reserved keyword and used as parameter name for the function name. Found it also in the function parameter list. Use 'name_of_name_arg' to change the internal parameter name used.")
|
|
2777
|
+
|
|
2778
|
+
# add standard arguments
|
|
2779
|
+
full_arguments = OrderedDict()
|
|
2780
|
+
if is_method:
|
|
2781
|
+
assert not 'self' in set(arguments), ("__self__ found in bound method argument list...?", F.__qualname__, execute.cache_info.signature.bind(*args,**kwargs).arguments )
|
|
2782
|
+
full_arguments['self'] = F.__self__
|
|
2783
|
+
full_arguments[self._name_of_name_arg] = name
|
|
2784
|
+
for k,v in arguments.items():
|
|
2785
|
+
full_arguments[k] = v
|
|
2786
|
+
arguments = full_arguments
|
|
2787
|
+
del full_arguments, k, v
|
|
2788
|
+
|
|
2789
|
+
# call format or function
|
|
2790
|
+
if isinstance( uid_or_label, str ):
|
|
2791
|
+
try:
|
|
2792
|
+
label = str.format( uid_or_label, **arguments )
|
|
2793
|
+
except KeyError as e:
|
|
2794
|
+
raise KeyError(e, f"Error while generating id for '{name}' using format string '{uid_or_label}': {e}. Available arguments: {list(arguments)}")
|
|
2795
|
+
|
|
2796
|
+
else:
|
|
2797
|
+
which = 'uid' if not self._uid is None else 'label'
|
|
2798
|
+
try:
|
|
2799
|
+
label = uid_or_label(**arguments)
|
|
2800
|
+
except TypeError as e:
|
|
2801
|
+
raise TypeError(e, f"Error while generating '{which}' for '{name}' using a function: {e}. Available arguments: {list(arguments)}")
|
|
2802
|
+
except Exception as e:
|
|
2803
|
+
raise type(e)(f"Error while generating '{which}' for '{name}': attempt to call '{which}' of type {type(uid_or_label)} failed: {e}")
|
|
2804
|
+
assert isinstance(label, str), ("Error:", which,"callable must return a string. Found",type(label))
|
|
2805
|
+
|
|
2806
|
+
if self.unique:
|
|
2807
|
+
uid = self.uniqueLabelledFileName( label )
|
|
2808
|
+
else:
|
|
2809
|
+
uid = self.uniqueNamedFileName( label, **arguments )
|
|
2810
|
+
|
|
2811
|
+
# determine version, cache mode
|
|
2812
|
+
# ------------------
|
|
2813
|
+
|
|
2814
|
+
version_ = self._version if not self._version is None else F.version.unique_id64
|
|
2815
|
+
cache_mode = CacheMode(override_cache_mode) if not override_cache_mode is None else self.cache_mode
|
|
2816
|
+
del override_cache_mode
|
|
2817
|
+
|
|
2818
|
+
# store process information
|
|
2819
|
+
# -------------------------
|
|
2820
|
+
|
|
2821
|
+
execute.cache_info.label = str(label) if not label is None else None
|
|
2822
|
+
execute.cache_info.uid = uid
|
|
2823
|
+
execute.cache_info.version = version_
|
|
2824
|
+
|
|
2825
|
+
if self.cacheController.keep_last_arguments:
|
|
2826
|
+
info_arguments = OrderedDict()
|
|
2827
|
+
for argname, argvalue in arguments.items():
|
|
2828
|
+
info_arguments[argname] = str(argvalue)[:100]
|
|
2829
|
+
execute.cache_info.arguments = info_arguments
|
|
2830
|
+
del argname, argvalue
|
|
2831
|
+
|
|
2832
|
+
# execute caching
|
|
2833
|
+
# ---------------
|
|
2834
|
+
|
|
2835
|
+
if cache_mode.delete:
|
|
2836
|
+
self._subdir.delete( uid )
|
|
2837
|
+
elif cache_mode.read:
|
|
2838
|
+
class Tag:
|
|
2839
|
+
pass
|
|
2840
|
+
tag = Tag()
|
|
2841
|
+
if not is_new:
|
|
2842
|
+
r = self._subdir.read( uid, tag, version=version_ )
|
|
2843
|
+
else:
|
|
2844
|
+
try:
|
|
2845
|
+
execute.__new_during_read = True
|
|
2846
|
+
r = self._subdir.read( uid, tag, version=version_ )
|
|
2847
|
+
finally:
|
|
2848
|
+
execute.__new_during_read = False
|
|
2849
|
+
|
|
2850
|
+
if not r is tag:
|
|
2851
|
+
if not track_cached_files is None:
|
|
2852
|
+
track_cached_files += self._fullFileName(uid)
|
|
2853
|
+
execute.cache_info.last_cached = True
|
|
2854
|
+
if not debug_verbose is None:
|
|
2855
|
+
debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.fullFileName(uid)}'.")
|
|
2856
|
+
if is_new:
|
|
2857
|
+
assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__", F.__qualname__, label)
|
|
2858
|
+
r.__magic_cache_call_init__ = False # since we called __new__, __init__ will be called next
|
|
2859
|
+
|
|
2860
|
+
return r
|
|
2861
|
+
|
|
2862
|
+
r = F(*args, **kwargs)
|
|
2863
|
+
|
|
2864
|
+
if is_new:
|
|
2865
|
+
# __new__ created the object, but __init__ was not called yet to initialize it
|
|
2866
|
+
# we simulate this here
|
|
2867
|
+
cls = args[0]
|
|
2868
|
+
assert not cls is None and inspect.isclass(cls), ("*** Internal error", cls)
|
|
2869
|
+
r.__magic_cache_call_init__ = True
|
|
2870
|
+
cls.__init__( r, *args[1:], **kwargs )
|
|
2871
|
+
assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__")
|
|
2872
|
+
|
|
2873
|
+
if cache_mode.write:
|
|
2874
|
+
self._subdir.write(uid,r,version=version_)
|
|
2875
|
+
if not track_cached_files is None:
|
|
2876
|
+
track_cached_files += self._subdir.fullFileName(uid)
|
|
2877
|
+
execute.cache_info.last_cached = False
|
|
2878
|
+
|
|
2879
|
+
if is_new:
|
|
2880
|
+
assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__")
|
|
2881
|
+
r.__magic_cache_call_init__ = False # since we called __new__, __init__ will be called next
|
|
2882
|
+
#debug_verbose.write(f"cache({name}): called __init__ after __new__ with: {args[1:]} / {kwargs}")
|
|
2883
|
+
|
|
2884
|
+
if not debug_verbose is None:
|
|
2885
|
+
if cache_mode.write:
|
|
2886
|
+
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.fullFileName(uid)}'.")
|
|
2887
|
+
else:
|
|
2888
|
+
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.fullFileName(uid)}'.")
|
|
2889
|
+
return r
|
|
2890
|
+
|
|
2891
|
+
update_wrapper( wrapper=execute, wrapped=F )
|
|
2892
|
+
execute.cache_info = CacheInfo()
|
|
2893
|
+
|
|
2894
|
+
execute.cache_info.name = name # decoded name of the function
|
|
2895
|
+
execute.cache_info.signature = inspect.signature(F) # signature of the function
|
|
2896
|
+
|
|
2897
|
+
execute.cache_info.uid = None # last function call ID
|
|
2898
|
+
execute.cache_info.label = None # last unique file name cached to
|
|
2899
|
+
execute.cache_info.version = None # last version used
|
|
2900
|
+
|
|
2901
|
+
execute.cache_info.last_cached = None # last function call restored from disk?
|
|
2902
|
+
|
|
2903
|
+
if self.cacheController.keep_last_arguments:
|
|
2904
|
+
execute.cache_info.arguments = None # last function call arguments dictionary of strings
|
|
2905
|
+
|
|
2906
|
+
if is_new:
|
|
2907
|
+
execute.__new_during_read = False
|
|
2908
|
+
|
|
2909
|
+
if not debug_verbose is None:
|
|
2910
|
+
debug_verbose.write(f"cache({name}): {'function' if not is_new else 'class constructor function'} registered for caching into '{self._subdir.path}'.")
|
|
2911
|
+
self.cacheController.versioned[name] = execute
|
|
2912
|
+
return execute
|
|
2913
|
+
|
|
2914
|
+
def VersionedCacheRoot( directory : str, *,
|
|
2915
|
+
ext : str = None,
|
|
2916
|
+
fmt : Format = None,
|
|
2917
|
+
createDirectory : bool = None,
|
|
2918
|
+
**controller_kwargs
|
|
2919
|
+
):
|
|
2920
|
+
"""
|
|
2921
|
+
Create a root directory for versioning caching on disk
|
|
2922
|
+
|
|
2923
|
+
Usage:
|
|
2924
|
+
In a central file, define a root directory
|
|
2925
|
+
vroot = VersionedCacheRoot("!/cache")
|
|
2926
|
+
|
|
2927
|
+
and a sub-directory
|
|
2928
|
+
vtest = vroot("test")
|
|
2929
|
+
|
|
2930
|
+
@vtest.cache("1.0")
|
|
2931
|
+
def f1( x=1, y=2 ):
|
|
2932
|
+
print(x,y)
|
|
2933
|
+
|
|
2934
|
+
@vtest.cache("1.0", dps=[f1])
|
|
2935
|
+
def f2( x=1, y=2, z=3 ):
|
|
2936
|
+
f1( x,y )
|
|
2937
|
+
print(z)
|
|
2938
|
+
|
|
2939
|
+
Parameters
|
|
2940
|
+
----------
|
|
2941
|
+
directory : name of the directory. Using SubDir the following short cuts are supported:
|
|
2942
|
+
"!/dir" creates 'dir' in the temporary directory
|
|
2943
|
+
"~/dir" creates 'dir' in the home directory
|
|
2944
|
+
"./dir" created 'dir' relative to the current directory
|
|
2945
|
+
ext : extension, which will automatically be appended to file names (see SubDir). Default depends on format. For Format.PICKLE it is 'pck'
|
|
2946
|
+
fmt : format, see SubDir.Format. Default is Format.PICKLE
|
|
2947
|
+
createDirectory : whether to create the directory upon creation. Default is no.
|
|
2948
|
+
controller_kwargs: parameters passed to VersionController, for example:
|
|
2949
|
+
exclude_arg_types : list of types or names of types to exclude when auto-generating function signatures from function arguments.
|
|
2950
|
+
A standard example from cdxbasics is "Context" as it is used to print progress messages.
|
|
2951
|
+
max_filename_length : maximum filename length
|
|
2952
|
+
hash_length: length used for hashes, see cdxbasics.util.uniqueHash()
|
|
2953
|
+
|
|
2954
|
+
Returns
|
|
2955
|
+
-------
|
|
2956
|
+
A root cache directory
|
|
2957
|
+
"""
|
|
2958
|
+
controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
|
|
2959
|
+
return SubDir( directory=directory, ext=ext, fmt=fmt, createDirectory=createDirectory, controller=controller )
|
|
2960
|
+
|
|
2961
|
+
version = version_decorator
|
|
2962
|
+
|
|
2963
|
+
|