cdxcore 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cdxcore might be problematic. Click here for more details.

cdxcore/subdir.py ADDED
@@ -0,0 +1,2963 @@
1
+ """
2
+ subdir
3
+ Simple class to keep track of directory sturctures and for automated caching on disk
4
+ Hans Buehler 2020
5
+ """
6
+
7
+
8
+ import os
9
+ import os.path
10
+ import uuid
11
+ import threading
12
+ import pickle
13
+ import tempfile
14
+ import shutil
15
+ import datetime
16
+ import inspect
17
+ from collections import OrderedDict
18
+ from collections.abc import Collection, Mapping, Callable
19
+ from enum import Enum
20
+ import json as json
21
+ import platform as platform
22
+ from functools import update_wrapper
23
+ import warnings as warnings
24
+
25
+ import numpy as np
26
+ import jsonpickle as jsonpickle
27
+ import jsonpickle.ext.numpy as jsonpickle_numpy
28
+ import zlib as zlib
29
+ import gzip as gzip
30
+ import blosc as blosc
31
+
32
+ from .prettydict import pdct
33
+ from .verbose import Context
34
+ from .version import Version, version as version_decorator
35
+ from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP, fmt as txtfmt, plain
36
+ from .uniquehash import uniqueHash48, uniqueLabelExt, namedUniqueHashExt
37
+
38
+ def error( text, *args, exception = RuntimeError, **kwargs ):
39
+ raise exception( txtfmt(text, *args, **kwargs) )
40
+ def verify( cond, text, *args, exception = RuntimeError, **kwargs ):
41
+ if not cond:
42
+ error( text, *args, **kwargs, exception=exception )
43
+ def warn( text, *args, warning=warnings.RuntimeWarning, stack_level=1, **kwargs ):
44
+ warnings.warn( txtfmt(text, *args, **kwargs), warning, stack_level=stack_level )
45
+
46
+ """
47
+ compression
48
+ """
49
+ jsonpickle_numpy.register_handlers()
50
+ BLOSC_MAX_BLOCK = 2147483631
51
+ BLOSC_MAX_USE = 1147400000 # ... blosc really cannot handle large files
52
+
53
+ """
54
+ Hashing
55
+ """
56
+ uniqueFileName48 = uniqueHash48
57
+ uniqueNamedFileName48_16 = namedUniqueHashExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
58
+ uniqueLabelledFileName48_16 = uniqueLabelExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
59
+
60
+ def _remove_trailing( path ):
61
+ if len(path) > 0:
62
+ if path[-1] in ['/' or '\\']:
63
+ return _remove_trailing(path[:-1])
64
+ return path
65
+
66
+ class Format(Enum):
67
+ """ File formats for SubDir """
68
+ PICKLE = 0
69
+ JSON_PICKLE = 1
70
+ JSON_PLAIN = 2
71
+ BLOSC = 3
72
+ GZIP = 4
73
+
74
+ PICKLE = Format.PICKLE
75
+ JSON_PICKLE = Format.JSON_PICKLE
76
+ JSON_PLAIN = Format.JSON_PLAIN
77
+ BLOSC = Format.BLOSC
78
+ GZIP = Format.GZIP
79
+
80
+ """
81
+ Use the following for config calls:
82
+ format = subdir.mkFormat( config("format", "pickle", subdir.FORMAT_NAMES, "File format") )
83
+ """
84
+ FORMAT_NAMES = [ s.lower() for s in Format.__members__ ]
85
+ def mkFormat( name ):
86
+ if not name in FORMAT_NAMES:
87
+ raise LookupError(f"Unknown format name '{name}'. Must be one of: {fmt_list(name)}")
88
+ return Format[name.upper()]
89
+
90
+ class CacheMode(object):
91
+ """
92
+ CacheMode
93
+ A class which encodes standard behaviour of a caching strategy:
94
+
95
+ on gen off update clear readonly
96
+ load cache from disk if exists x x - - - x
97
+ write updates to disk x x - x - -
98
+ delete existing object - - - - x -
99
+ delete existing object if incompatible x - - x x -
100
+
101
+ See cdxbasics.subdir for functions to manage files.
102
+ """
103
+
104
+ ON = "on"
105
+ GEN = "gen"
106
+ OFF = "off"
107
+ UPDATE = "update"
108
+ CLEAR = "clear"
109
+ READONLY = "readonly"
110
+
111
+ MODES = [ ON, GEN, OFF, UPDATE, CLEAR, READONLY ]
112
+ HELP = "'on' for standard caching; 'gen' for caching but keep existing incompatible files; 'off' to turn off; 'update' to overwrite any existing cache; 'clear' to clear existing caches; 'readonly' to read existing caches but not write new ones"
113
+
114
+ def __init__(self, mode : str = None ):
115
+ """
116
+ Encodes standard behaviour of a caching strategy:
117
+
118
+ on gen off update clear readonly
119
+ load upon start from disk if exists x x - - - x
120
+ write updates to disk x x - x - -
121
+ delete existing object upon start - - - - x -
122
+ delete existing object if incompatible x - - x x -
123
+
124
+ Parameters
125
+ ----------
126
+ mode : str
127
+ Which mode to use.
128
+ """
129
+ if isinstance( mode, CacheMode ):
130
+ return# id copy constuctor
131
+ mode = self.ON if mode is None else mode
132
+ self.mode = mode.mode if isinstance(mode, CacheMode) else str(mode)
133
+ if not self.mode in self.MODES:
134
+ raise KeyError( self.mode, "Caching mode must be 'on', 'off', 'update', 'clear', or 'readonly'. Found " + self.mode )
135
+ self._read = self.mode in [self.ON, self.READONLY, self.GEN]
136
+ self._write = self.mode in [self.ON, self.UPDATE, self.GEN]
137
+ self._delete = self.mode in [self.UPDATE, self.CLEAR]
138
+ self._del_in = self.mode in [self.UPDATE, self.CLEAR, self.ON]
139
+
140
+ def __new__(cls, *kargs, **kwargs):
141
+ """ Copy constructor """
142
+ if len(kargs) == 1 and len(kwargs) == 0 and isinstance( kargs[0], CacheMode):
143
+ return kargs[0]
144
+ return super().__new__(cls)
145
+
146
+ @property
147
+ def read(self) -> bool:
148
+ """ Whether to load any existing data when starting """
149
+ return self._read
150
+
151
+ @property
152
+ def write(self) -> bool:
153
+ """ Whether to write cache data to disk """
154
+ return self._write
155
+
156
+ @property
157
+ def delete(self) -> bool:
158
+ """ Whether to delete existing data """
159
+ return self._delete
160
+
161
+ @property
162
+ def del_incomp(self) -> bool:
163
+ """ Whether to delete existing data if it is not compatible """
164
+ return self._del_in
165
+
166
+ def __str__(self) -> str:# NOQA
167
+ return self.mode
168
+ def __repr__(self) -> str:# NOQA
169
+ return self.mode
170
+
171
+ def __eq__(self, other) -> bool:# NOQA
172
+ return self.mode == other
173
+ def __neq__(self, other) -> bool:# NOQA
174
+ return self.mode != other
175
+
176
+ @property
177
+ def is_off(self) -> bool:
178
+ """ Whether this cache mode is OFF """
179
+ return self.mode == self.OFF
180
+
181
+ @property
182
+ def is_on(self) -> bool:
183
+ """ Whether this cache mode is ON """
184
+ return self.mode == self.ON
185
+
186
+ @property
187
+ def is_gen(self) -> bool:
188
+ """ Whether this cache mode is GEN """
189
+ return self.mode == self.GEN
190
+
191
+ @property
192
+ def is_update(self) -> bool:
193
+ """ Whether this cache mode is UPDATE """
194
+ return self.mode == self.UPDATE
195
+
196
+ @property
197
+ def is_clear(self) -> bool:
198
+ """ Whether this cache mode is CLEAR """
199
+ return self.mode == self.CLEAR
200
+
201
+ @property
202
+ def is_readonly(self) -> bool:
203
+ """ Whether this cache mode is READONLY """
204
+ return self.mode == self.READONLY
205
+
206
+ class CacheController( object ):
207
+ """
208
+ Central control for versioning.
209
+ Enabes to to turn on/off caching, debugging and tracks all versions
210
+ """
211
+
212
+ def __init__(self, *,
213
+ exclude_arg_types : list[type] = [Context],
214
+ cache_mode : CacheMode = CacheMode.ON,
215
+ max_filename_length: int = 48,
216
+ hash_length : int = 16,
217
+ debug_verbose : Context = None,
218
+ keep_last_arguments: bool = False
219
+ ):
220
+ """
221
+ Background parameters to control caching
222
+
223
+ Parameters
224
+ ----------
225
+ exclude_arg_types :
226
+ List of types to exclude from producing unique ids from function arguments. Defaults to [SubDir, Context]
227
+ cache_mode :
228
+ Top level cache control. Set to "OFF" to turn off all caching. Default is "ON"
229
+ max_filename_length :
230
+ Maximum filename length. If unique id's exceed the file name a hash of length 'hash_length' will be intergated into the file name.
231
+ See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
232
+ hash_length :
233
+ Length of the hash used to make sure each filename is unique
234
+ See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
235
+ debug_verbose :
236
+ If non-None print caching process messages to this object.
237
+ keep_last_arguments :
238
+ keep a dictionary of all parameters as string representations after each function call.
239
+ If the function F was decorated using SubDir.cache(), you can access this information via
240
+ F.cache_info.last_arguments
241
+ Note that strings are limited to 100 characters per argument to avoid memory
242
+ overload when large objects are passed.
243
+ """
244
+ max_filename_length = int(max_filename_length)
245
+ hash_length = int(hash_length)
246
+ assert max_filename_length>0, ("'max_filename_length' must be positive")
247
+ assert hash_length>0 and hash_length<=max_filename_length, ("'hash_length' must be positive and at most 'max_filename_length'")
248
+ assert max_filename_length>=hash_length, ("'hash_length' must not exceed 'max_filename_length")
249
+ self.cache_mode = CacheMode(cache_mode if not cache_mode is None else CacheMode.ON)
250
+ self.debug_verbose = debug_verbose
251
+ self.exclude_arg_types = set(exclude_arg_types) if not exclude_arg_types is None else None
252
+ self.versioned = pdct() # list
253
+ self.uniqueNamedFileName = namedUniqueHashExt(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
254
+ self.uniqueLabelledFileName = uniqueLabelExt(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
255
+ self.keep_last_arguments = keep_last_arguments
256
+
257
+ default_cacheController = CacheController()
258
+
259
+
260
+ class CacheTracker(object):
261
+ """
262
+ Utility class to track caching and be able to delete all dependent objects
263
+
264
+ """
265
+ def __init__(self):
266
+ """ track cache files """
267
+ self._files = []
268
+ def __iadd__(self, new_file):
269
+ """ Add a new file to the tracker """
270
+ self._files.append( new_file )
271
+ def delete_cache_files(self):
272
+ """ Delete all tracked files """
273
+ for file in self._files:
274
+ if os.path.exists(file):
275
+ os.remove(file)
276
+ self._files = []
277
+ def __str__(self) -> str:#NOQA
278
+ return f"Tracked: {self._files}"
279
+ def __repr__(self) -> str:#NOQA
280
+ return f"Tracked: {self._files}"
281
+
282
+ class InitCacheInfo(object):
283
+ pass
284
+
285
+ class CacheInfo(object):
286
+ pass
287
+
288
+ # SubDir
289
+ # ======
290
+
291
+ class SubDir(object):
292
+ """
293
+ SubDir implements a transparent interface for storing data in files, with a common extension.
294
+ The generic pattern is:
295
+
296
+ 1) create a root 'parentDir':
297
+ Absolute: parentDir = SubDir("C:/temp/root")
298
+ In system temp directory: parentDir = SubDir("!/root")
299
+ In user directory: parentDir = SubDir("~/root")
300
+ Relative to current directory: parentDir = SubDir("./root")
301
+
302
+ 2) Use SubDirs to transparently create hierachies of stored data:
303
+ assume f() will want to store some data:
304
+
305
+ def f(parentDir, ...):
306
+
307
+ subDir = parentDir('subdir') <-- note that the call () operator is overloaded: if a second argument is provided, the directory will try to read the respective file.
308
+ or
309
+ subDir = SubDir('subdir', parentDir)
310
+ :
311
+ :
312
+ Write data:
313
+
314
+ subDir['item1'] = item1 <-- dictionary style
315
+ subDir.item2 = item2 <-- member style
316
+ subDir.write('item3',item3) <-- explicit
317
+
318
+ Note that write() can write to multiple files at the same time.
319
+
320
+ 3) Reading is similar
321
+
322
+ def readF(parentDir,...):
323
+
324
+ subDir = parentDir('subdir')
325
+
326
+ item = subDir('item', 'i1') <-- returns 'i1' if not found.
327
+ item = subdir.read('item') <-- returns None if not found
328
+ item = subdir.read('item','i2') <-- returns 'i2' if not found
329
+ item = subDir['item'] <-- throws a KeyError if not found
330
+ item = subDir.item <-- throws an AttributeError if not found
331
+
332
+ 4) Treating data like dictionaries
333
+
334
+ def scanF(parentDir,...)
335
+
336
+ subDir = parentDir('f')
337
+
338
+ for item in subDir:
339
+ data = subDir[item]
340
+
341
+ Delete items:
342
+
343
+ del subDir['item'] <-- silently fails if 'item' does not exist
344
+ del subDir.item <-- silently fails if 'item' does not exist
345
+ subDir.delete('item') <-- silently fails if 'item' does not exist
346
+ subDir.delete('item', True) <-- throw a KeyError if 'item' does not exit
347
+
348
+ 5) Cleaning up
349
+
350
+ parentDir.deleteAllContent() <-- silently deletes all files and sub directories.
351
+
352
+ 6) As of version 0.2.59 subdir supports json file formats. Those can be controlled with the 'fmt' keyword in various functions.
353
+ The most straightfoward way is to specify the format of the directory itself:
354
+
355
+ subdir = SubDir("!/.test", fmt=SubDir.JSON_PICKLE)
356
+
357
+ The following formats are supported:
358
+
359
+ SubDir.PICKLE:
360
+ Use pickle
361
+ SubDir.JSON_PLAIN:
362
+ Uses cdxbasics.util.plain() to convert data into plain Python objects and writes
363
+ this to disk as text. Loading back such files will result in plain Python objects,
364
+ but *not* the original objects
365
+ SubDir.JSON_PICKLE:
366
+ Uses the jsonpickle package to load/write data in somewhat readable text formats.
367
+ Data can be loaded back from such a file, but files may not be readable (e.g. numpy arrays
368
+ are written in compressed form).
369
+ SubDir.BLOSC:
370
+ Uses https://www.blosc.org/python-blosc/ to compress data on-the-fly.
371
+ BLOSC is much faster than GZIP or ZLIB but is limited to 2GB data, sadly.
372
+ SubDir.ZLIB:
373
+ Uses https://docs.python.org/3/library/zlib.html to compress data on-the-fly
374
+ using, essentially, GZIP.
375
+
376
+ Summary of properties:
377
+
378
+ | Restores objects | Human readable | Speed | Compression
379
+ PICKLE | yes | no | high | no
380
+ JSON_PLAIN | no | yes | low | no
381
+ JSON_PICKLE | yes | limited | low | no
382
+ BLOSC | yes | no | high | yes
383
+ GZIP | yes | no | high | yes
384
+
385
+ Several other operations are supported; see help()
386
+
387
+ Hans Buehler May 2020
388
+ """
389
+
390
+ class __RETURN_SUB_DIRECTORY(object):
391
+ pass
392
+
393
+ Format = Format
394
+ PICKLE = Format.PICKLE
395
+ JSON_PICKLE = Format.JSON_PICKLE
396
+ JSON_PLAIN = Format.JSON_PLAIN
397
+ BLOSC = Format.BLOSC
398
+ GZIP = Format.GZIP
399
+
400
+ DEFAULT_RAISE_ON_ERROR = False
401
+ RETURN_SUB_DIRECTORY = __RETURN_SUB_DIRECTORY
402
+ DEFAULT_FORMAT = Format.PICKLE
403
+ DEFAULT_CREATE_DIRECTORY = False # legacy behaviour so that self.path is a valid path
404
+ EXT_FMT_AUTO = "*"
405
+
406
+ MAX_VERSION_BINARY_LEN = 128
407
+
408
+ VER_NORMAL = 0
409
+ VER_CHECK = 1
410
+ VER_RETURN = 2
411
+
412
+ def __init__(self, name : str,
413
+ parent = None, *,
414
+ ext : str = None,
415
+ fmt : Format = None,
416
+ eraseEverything : bool = False,
417
+ createDirectory : bool = None,
418
+ cacheController : CacheController = None
419
+ ):
420
+ """
421
+ Instantiates a sub directory which contains pickle files with a common extension.
422
+ By default the directory is created.
423
+
424
+ Absolute directories
425
+ sd = SubDir("!/subdir") - relative to system temp directory
426
+ sd = SubDir("~/subdir") - relative to user home directory
427
+ sd = SubDir("./subdir") - relative to current working directory (explicit)
428
+ sd = SubDir("subdir") - relative to current working directory (implicit)
429
+ sd = SubDir("/tmp/subdir") - absolute path (linux)
430
+ sd = SubDir("C:/temp/subdir") - absolute path (windows)
431
+ Short-cut
432
+ sd = SubDir("") - current working directory
433
+
434
+ It is often desired that the user specifies a sub-directory name under some common parent directory.
435
+ You can create sub directories if you provide a 'parent' directory:
436
+ sd2 = SubDir("subdir2", parent=sd) - relative to other sub directory
437
+ sd2 = sd("subdir2") - using call operator
438
+ Works with strings, too:
439
+ sd2 = SubDir("subdir2", parent="~/my_config") - relative to ~/my_config
440
+
441
+ All files managed by SubDir will have the same extension.
442
+ The extension can be specified with 'ext', or as part of the directory string:
443
+ sd = SubDir("~/subdir;*.bin") - set extension to 'bin'
444
+
445
+ COPY CONSTRUCTION
446
+ This function also allows copy construction and constrution from a repr() string.
447
+
448
+ HANDLING KEYS
449
+ SubDirs allows reading data using the item and attribute notation, i.e. we may use
450
+ sd = SubDir("~/subdir")
451
+ x = sd.x
452
+ y = sd['y']
453
+ If the respective keys are not found, exceptions are thrown.
454
+
455
+ NONE OBJECTS
456
+ It is possible to set the directory name to 'None'. In this case the directory will behave as if:
457
+ No files exist
458
+ Writing fails with a EOFError.
459
+
460
+ Parameters
461
+ ----------
462
+ name - Name of the directory.
463
+ '.' for current directory
464
+ '~' for home directory
465
+ '!' for system default temp directory
466
+ May contain a formatting string for defining 'ext' on the fly:
467
+ Use "!/test;*.bin" to specify 'test' in the system temp directory as root directory with extension 'bin'
468
+ Can be set to None, see above.
469
+ parent - Parent directory. If provided, will also set defaults for 'ext' and 'raiseOnError'
470
+ ext - standard file extenson for data files. All files will share the same extension.
471
+ If None, use the parent extension, or if that is not specified use an extension depending on 'fmt':
472
+ 'pck' for the default PICKLE format
473
+ 'json' for JSON_PLAIN
474
+ 'jpck' for JSON_PICKLE
475
+ Set to "" to turn off managing extensions.
476
+ fmt - format, current pickle or json
477
+ eraseEverything - delete all contents in the newly defined subdir
478
+ createDirectory - whether to create the directory.
479
+ Otherwise it will be created upon first write().
480
+ Set to None to use the setting of the parent directory
481
+ """
482
+ createDirectory = bool(createDirectory) if not createDirectory is None else None
483
+
484
+ # copy constructor support
485
+ if isinstance(name, SubDir):
486
+ assert parent is None, "Internal error: copy construction does not accept 'parent' keyword"
487
+ self._path = name._path
488
+ self._ext = name._ext if ext is None else ext
489
+ self._fmt = name._fmt if fmt is None else fmt
490
+ self._crt = name._crt if createDirectory is None else createDirectory
491
+ self._cctrl = name._cctrl if cacheController is None else cacheController
492
+ if eraseEverything: raise ValueError( "Cannot use 'eraseEverything' when cloning a directory")
493
+ return
494
+
495
+ # reconstruction from a dictionary
496
+ if isinstance(name, Mapping):
497
+ assert parent is None, "Internal error: dictionary construction does not accept 'parent keyword"
498
+ self._path = name['_path']
499
+ self._ext = name['_ext'] if ext is None else ext
500
+ self._fmt = name['_fmt'] if fmt is None else fmt
501
+ self._crt = name['_crt'] if createDirectory is None else createDirectory
502
+ self._cctrl = name['_cctrl'] if cacheController is None else cacheController
503
+ if eraseEverything: raise ValueError( "Cannot use 'eraseEverything' when cloning a directory")
504
+ return
505
+
506
+ # parent
507
+ if isinstance(parent, str):
508
+ parent = SubDir( parent, ext=ext, fmt=fmt, createDirectory=createDirectory, cacheController=cacheController )
509
+ if not parent is None and not isinstance(parent, SubDir):
510
+ raise ValueError( "'parent' must be SubDir, str, or None. Found object of type '{type(parent)}'")
511
+
512
+ # operational flags
513
+ _name = name if not name is None else "(none)"
514
+
515
+ # format
516
+ if fmt is None:
517
+ assert parent is None or not parent._fmt is None
518
+ self._fmt = parent._fmt if not parent is None else self.DEFAULT_FORMAT
519
+ assert not self._fmt is None
520
+ else:
521
+ self._fmt = fmt
522
+ assert not self._fmt is None
523
+
524
+ # extension
525
+ if not name is None:
526
+ if not isinstance(name, str): raise ValueError( txtfmt("'name' must be string. Found object of type %s", type(name) ))
527
+ name = name.replace('\\','/')
528
+
529
+ # avoid windows file names on Linux
530
+ if platform.system() != "Windows" and name[1:3] == ":/":
531
+ raise ValueError( txtfmt("Detected use of windows-style drive declaration %s in path %s.", name[:3], name ))
532
+
533
+ # extract extension information
534
+ ext_i = name.find(";*.")
535
+ if ext_i >= 0:
536
+ _ext = name[ext_i+3:]
537
+ if not ext is None and ext != _ext:
538
+ raise ValueError( txtfmt("Canot specify an extension both in the name string ('%s') and as 'ext' ('%s')", _name, ext))
539
+ ext = _ext
540
+ name = name[:ext_i]
541
+ if ext is None:
542
+ self._ext = self.EXT_FMT_AUTO if parent is None else parent._ext
543
+ else:
544
+ self._ext = SubDir._extract_ext(ext)
545
+
546
+ # createDirectory
547
+ if createDirectory is None:
548
+ self._crt = self.DEFAULT_CREATE_DIRECTORY if parent is None else parent._crt
549
+ else:
550
+ self._crt = bool(createDirectory)
551
+
552
+ # cache controller
553
+ assert type(cacheController).__name__ == CacheController.__name__, ("'cacheController' should be of type 'CacheController'", type(cacheController))
554
+ self._cctrl = cacheController
555
+
556
+ # name
557
+ if name is None:
558
+ if not parent is None and not parent._path is None:
559
+ name = parent._path[:-1]
560
+ else:
561
+ # expand name
562
+ name = _remove_trailing(name)
563
+ if name == "" and parent is None:
564
+ name = "."
565
+ if name[:1] in ['!', '~'] or name[:2] == "./" or name == ".":
566
+ if len(name) > 1 and name[1] != '/':
567
+ raise ValueError( txtfmt("If 'name' starts with '%s', then the second character must be '/' (or '\\' on windows). Found 'name' set to '%s'", name[:1], _name ))
568
+ if name[0] == '!':
569
+ name = SubDir.tempDir()[:-1] + name[1:]
570
+ elif name[0] == ".":
571
+ name = SubDir.workingDir()[:-1] + name[1:]
572
+ else:
573
+ assert name[0] == "~", ("Internal error", name[0] )
574
+ name = SubDir.userDir()[:-1] + name[1:]
575
+ elif name == "..":
576
+ error("Cannot use name '..'")
577
+ elif not parent is None:
578
+ # path relative to 'parent'
579
+ if not parent.is_none:
580
+ name = os.path.join( parent._path, name )
581
+
582
+ # create directory/clean up
583
+ if name is None:
584
+ self._path = None
585
+ else:
586
+ # expand path
587
+ self._path = os.path.abspath(name) + '/'
588
+ self._path = self._path.replace('\\','/')
589
+
590
+ if eraseEverything:
591
+ self.eraseEverything(keepDirectory=self._crt)
592
+ if self._crt:
593
+ self.createDirectory()
594
+
595
+ @staticmethod
596
+ def expandStandardRoot( name ):
597
+ """
598
+ Expands 'name' by a standardized root directory if provided:
599
+ If 'name' starts with -> return
600
+ ! -> tempDir()
601
+ . -> workingDir()
602
+ ~ -> userDir()
603
+ """
604
+ if len(name) < 2 or name[0] not in ['.','!','~'] or name[1] not in ["\\","/"]:
605
+ return name
606
+ if name[0] == '!':
607
+ return SubDir.tempDir() + name[2:]
608
+ elif name[0] == ".":
609
+ return SubDir.workingDir() + name[2:]
610
+ else:
611
+ return SubDir.userDir() + name[2:]
612
+
613
+ def createDirectory( self ):
614
+ """
615
+ Creates the directory if it doesn't exist yet.
616
+ Does not do anything if is_none.
617
+ """
618
+ # create directory/clean up
619
+ if self._path is None:
620
+ return
621
+ # create directory
622
+ if not os.path.exists( self._path[:-1] ):
623
+ try:
624
+ os.makedirs( self._path[:-1] )
625
+ return
626
+ except FileExistsError:
627
+ pass
628
+ if not os.path.isdir(self._path[:-1]):
629
+ raise NotADirectoryError(txtfmt( "Cannot use sub directory %s: object exists but is not a directory", self._path[:-1] ))
630
+
631
+ def pathExists(self) -> bool:
632
+ """ Returns True if the current directory exists """
633
+ return os.path.exists( self._path[:-1] ) if not self._path is None else False
634
+
635
+ # -- a few basic properties --
636
+
637
+ def __str__(self) -> str: # NOQA
638
+ if self._path is None: return "(none)"
639
+ ext = self.ext
640
+ return self._path if len(ext) == 0 else self._path + ";*" + ext
641
+
642
+ def __repr__(self) -> str: # NOQA
643
+ if self._path is None: return "SubDir(None)"
644
+ return "SubDir(%s)" % self.__str__()
645
+
646
+ def __eq__(self, other) -> bool: # NOQA
647
+ """ Tests equality between to SubDirs, or between a SubDir and a directory """
648
+ if isinstance(other,str):
649
+ return self._path == other
650
+ verify( isinstance(other,SubDir), "Cannot compare SubDir to object of type '%s'", type(other).__name__, exception=TypeError )
651
+ return self._path == other._path and self._ext == other._ext and self._fmt == other._fmt
652
+
653
+ def __bool__(self) -> bool:
654
+ """ Returns True if 'self' is set, or False if 'self' is a None directory """
655
+ return not self.is_none
656
+
657
+ def __hash__(self) -> str: #NOQA
658
+ return hash( (self._path, self._ext, self._fmt) )
659
+
660
+ @property
661
+ def is_none(self) -> bool:
662
+ """ Whether this object is 'None' or not """
663
+ return self._path is None
664
+
665
+ @property
666
+ def path(self) -> str:
667
+ """
668
+ Return current path, including trailing '/'
669
+ Note that the path may not exist yet. If this is required, consider using existing_path
670
+ """
671
+ return self._path
672
+
673
+ @property
674
+ def existing_path(self) -> str:
675
+ """
676
+ Return current path, including training '/'.
677
+ In addition to self.path this property ensures that the directory structure exists (or raises an exception)
678
+ """
679
+ self.createDirectory()
680
+ return self.path
681
+
682
+ @property
683
+ def fmt(self) -> Format:
684
+ """ Returns current format """
685
+ return self._fmt
686
+
687
+ @property
688
+ def ext(self) -> str:
689
+ """
690
+ Returns the common extension of the files in this directory, including leading '.'
691
+ Resolves '*' into the extension associated with the current format.
692
+ """
693
+ return self._ext if self._ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
694
+
695
+ def autoExt( self, ext : str = None ) -> str:
696
+ """
697
+ Computes the effective extension based on inputs 'ext', defaulting to the SubDir's extension.
698
+ Resolves '*' into the extension associated with the specified format.
699
+ This function allows setting 'ext' also as a Format.
700
+
701
+ Returns the extension with leading '.'
702
+ """
703
+ if isinstance(ext, Format):
704
+ return self._auto_ext(ext)
705
+ else:
706
+ ext = self._ext if ext is None else SubDir._extract_ext(ext)
707
+ return ext if ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
708
+
709
+ def autoExtFmt( self, *, ext : str = None, fmt : Format = None ) -> str:
710
+ """
711
+ Computes the effective extension and format based on inputs 'ext' and 'fmt', each of which defaults to the SubDir's current settings.
712
+ Resolves '*' into the extension associated with the specified format.
713
+ This function allows setting 'ext' also as a Format.
714
+
715
+ Returns (ext, fmt) where 'ext' contains the leading '.'
716
+ """
717
+ if isinstance(ext, Format):
718
+ verify( fmt is None or fmt == ext, "If 'ext' is a Format, then 'fmt' must match 'ext' or be None. Found '%s' and '%s', respectively.", ext, fmt, exception=ValueError )
719
+ return self._auto_ext(ext), ext
720
+
721
+ fmt = fmt if not fmt is None else self._fmt
722
+ ext = self._ext if ext is None else SubDir._extract_ext(ext)
723
+ ext = ext if ext != self.EXT_FMT_AUTO else self._auto_ext(fmt)
724
+ return ext, fmt
725
+
726
+ @property
727
+ def cacheController(self):
728
+ """ Returns an assigned CacheController, or None """
729
+ return self._cctrl if not self._cctrl is None else default_cacheController
730
+
731
+ # -- static helpers --
732
+
733
+ @staticmethod
734
+ def _auto_ext( fmt : Format ) -> str:
735
+ """ Default extension for a given format, including leading '.' """
736
+ if fmt == Format.PICKLE:
737
+ return ".pck"
738
+ if fmt == Format.JSON_PLAIN:
739
+ return ".json"
740
+ if fmt == Format.JSON_PICKLE:
741
+ return ".jpck"
742
+ if fmt == Format.BLOSC:
743
+ return ".zbsc"
744
+ if fmt == Format.GZIP:
745
+ return ".pgz"
746
+ error("Unknown format '%s'", str(fmt))
747
+
748
+ @staticmethod
749
+ def _version_to_bytes( version : str ) -> bytearray:
750
+ """ Convert string version to byte string of at most size MAX_VERSION_BINARY_LEN + 1 """
751
+ if version is None:
752
+ return None
753
+ version_ = bytearray(version,'utf-8')
754
+ if len(version_) >= SubDir.MAX_VERSION_BINARY_LEN:
755
+ raise ValueError(txtfmt("Cannot use version '%s': when translated into a bytearray it exceeds the maximum version lengths of '%ld' (byte string is '%s')", version, SubDir.MAX_VERSION_BINARY_LEN-1, version_ ))
756
+ ver_ = bytearray(SubDir.MAX_VERSION_BINARY_LEN)
757
+ l = len(version_)
758
+ ver_[0] = l
759
+ ver_[1:1+l] = version_
760
+ assert len(ver_) == SubDir.MAX_VERSION_BINARY_LEN, ("Internal error", len(ver_), ver_)
761
+ return ver_
762
+
763
+ @staticmethod
764
+ def _extract_ext( ext : str ) -> str:
765
+ """
766
+ Checks that 'ext' is an extension, and returns .ext.
767
+ -- Accepts '.ext' and 'ext'
768
+ -- Detects use of directories
769
+ -- Returns '*' if ext='*'
770
+ """
771
+ assert not ext is None, ("'ext' should not be None here")
772
+ verify( isinstance(ext,str), "Extension 'ext' must be a string. Found type %s", type(ext).__name__, exception=ValueError )
773
+ # auto?
774
+ if ext == SubDir.EXT_FMT_AUTO:
775
+ return SubDir.EXT_FMT_AUTO
776
+ # remove leading '.'s
777
+ while ext[:1] == ".":
778
+ ext = ext[1:]
779
+ # empty extension -> match all files
780
+ if ext == "":
781
+ return ""
782
+ # ensure extension has no directiory information
783
+ sub, _ = os.path.split(ext)
784
+ verify( len(sub) == 0, "Extension '%s' contains directory information", ext)
785
+
786
+ # remove internal characters
787
+ verify( ext[0] != "!", "Extension '%s' cannot start with '!' (this symbol indicates the temp directory)", ext, exception=ValueError )
788
+ verify( ext[0] != "~", "Extension '%s' cannot start with '~' (this symbol indicates the user's directory)", ext, exception=ValueError )
789
+ return "." + ext
790
+
791
+ # -- public utilities --
792
+
793
+ def fullFileName(self, key : str, *, ext : str = None) -> str:
794
+ """
795
+ Returns fully qualified file name.
796
+ The function tests that 'key' does not contain directory information.
797
+
798
+ If 'self' is None, then this function returns None
799
+ If key is None then this function returns None
800
+
801
+ Parameters
802
+ ----------
803
+ key : str
804
+ Core file name, e.g. the 'key' in a data base sense
805
+ ext : str
806
+ If not None, use this extension rather than self.ext
807
+
808
+ Returns
809
+ -------
810
+ Fully qualified system file name
811
+
812
+ [This function has an alias 'fullKeyName' for backward compatibility]
813
+ """
814
+ if self._path is None or key is None:
815
+ return None
816
+ key = str(key)
817
+ verify( len(key) > 0, "'key' cannot be empty")
818
+
819
+ sub, _ = os.path.split(key)
820
+ verify( len(sub) == 0, "Key '%s' contains directory information", key)
821
+
822
+ verify( key[0] != "!", "Key '%s' cannot start with '!' (this symbol indicates the temp directory)", key, exception=ValueError )
823
+ verify( key[0] != "~", "Key '%s' cannot start with '~' (this symbol indicates the user's directory)", key, exception=ValueError )
824
+
825
+ ext = self.autoExt( ext )
826
+ if len(ext) > 0 and key[-len(ext):] != ext:
827
+ return self._path + key + ext
828
+ return self._path + key
829
+ fullKeyName = fullFileName # backwards compatibility
830
+
831
+ @staticmethod
832
+ def tempDir() -> str:
833
+ """
834
+ Return system temp directory. Short cut to tempfile.gettempdir()
835
+ Result contains trailing '/'
836
+ """
837
+ d = tempfile.gettempdir()
838
+ assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-1", d)
839
+ return d + "/"
840
+
841
+ @staticmethod
842
+ def workingDir() -> str:
843
+ """
844
+ Return current working directory. Short cut for os.getcwd()
845
+ Result contains trailing '/'
846
+ """
847
+ d = os.getcwd()
848
+ assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-2", d)
849
+ return d + "/"
850
+
851
+ @staticmethod
852
+ def userDir() -> str:
853
+ """
854
+ Return current working directory. Short cut for os.path.expanduser('~')
855
+ Result contains trailing '/'
856
+ """
857
+ d = os.path.expanduser('~')
858
+ assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-3", d)
859
+ return d + "/"
860
+
861
+ # -- read --
862
+
863
+ def _read_reader( self, reader, key : str, default, raiseOnError : bool, *, ext : str = None ):
864
+ """
865
+ Utility function for read() and readLine()
866
+
867
+ Parameters
868
+ ----------
869
+ reader( key, fullFileName, default )
870
+ A function which is called to read the file once the correct directory is identified
871
+ key : key (for error messages, might include '/')
872
+ fullFileName : full file name
873
+ default value
874
+ key : str or list
875
+ str: fully qualified key
876
+ list: list of fully qualified names
877
+ default :
878
+ default value. None is a valid default value
879
+ list : list of defaults for a list of keys
880
+ raiseOnError : bool
881
+ If True, and the file does not exist, throw exception
882
+ ext :
883
+ Extension or None for current extension.
884
+ list : list of extensions for a list of keys
885
+ """
886
+ # vector version
887
+ if not isinstance(key,str):
888
+ if not isinstance(key, Collection): raise ValueError(txtfmt( "'key' must be a string, or an interable object. Found type %s", type(key)))
889
+ l = len(key)
890
+ if default is None or isinstance(default,str) or not isinstance(default, Collection):
891
+ default = [ default ] * l
892
+ else:
893
+ if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(default), l ))
894
+ if ext is None or isinstance(ext, str) or not isinstance(ext, Collection):
895
+ ext = [ ext ] * l
896
+ else:
897
+ if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l ))
898
+ return [ self._read_reader(reader=reader,key=k,default=d,raiseOnError=raiseOnError,ext=e) for k, d, e in zip(key,default,ext) ]
899
+
900
+ # deleted directory?
901
+ if self._path is None:
902
+ verify( not raiseOnError, "Trying to read '%s' from an empty directory object", key, exception=NotADirectoryError)
903
+ return default
904
+
905
+ # single key
906
+ if len(key) == 0: raise ValueError(txtfmt("'key' missing (the filename)" ))
907
+ sub, key_ = os.path.split(key)
908
+ if len(sub) > 0:
909
+ return self(sub)._read_reader(reader=reader,key=key_,default=default,raiseOnError=raiseOnError,ext=ext)
910
+ if len(key_) == 0: ValueError(txtfmt("'key' %s indicates a directory, not a file", key))
911
+
912
+ # don't try if directory doesn't exist
913
+ fullFileName = self.fullFileName(key,ext=ext)
914
+ if not self.pathExists():
915
+ if raiseOnError:
916
+ raise KeyError(key, fullFileName)
917
+ return default
918
+
919
+ # does file exit?
920
+ if not os.path.exists(fullFileName):
921
+ if raiseOnError:
922
+ raise KeyError(key,fullFileName)
923
+ return default
924
+ if not os.path.isfile(fullFileName):
925
+ raise IOError(txtfmt( "Cannot read %s: object exists, but is not a file (full path %s)", key, fullFileName ))
926
+
927
+ # read content
928
+ # delete existing files upon read error
929
+ try:
930
+ return reader( key, fullFileName, default )
931
+ except EOFError as e:
932
+ try:
933
+ os.remove(fullFileName)
934
+ warn("Cannot read %s; file deleted (full path %s).\nError: %s",key,fullFileName, str(e))
935
+ except Exception as e:
936
+ warn("Cannot read %s; attempt to delete file failed (full path %s): %s",key,fullFileName,str(e))
937
+ except FileNotFoundError as e:
938
+ if raiseOnError:
939
+ raise KeyError(key, fullFileName, str(e)) from e
940
+ except Exception as e:
941
+ if raiseOnError:
942
+ raise KeyError(key, fullFileName, str(e)) from e
943
+ except (ImportError, BaseException) as e:
944
+ e.add_note( key )
945
+ e.add_note( fullFileName )
946
+ raise e
947
+ return default
948
+
949
+ def _read( self, key : str,
950
+ default = None,
951
+ raiseOnError : bool = False,
952
+ *,
953
+ version : str = None,
954
+ ext : str = None,
955
+ fmt : Format = None,
956
+ delete_wrong_version : bool = True,
957
+ handle_version : int = 0
958
+ ):
959
+ """ See read() """
960
+ ext, fmt = self.autoExtFmt(ext=ext, fmt=fmt)
961
+ version = str(version) if not version is None else None
962
+ version = version if handle_version != SubDir.VER_RETURN else ""
963
+ assert not fmt == self.EXT_FMT_AUTO, ("'fmt' is '*' ...?")
964
+
965
+ if version is None and fmt in [Format.BLOSC, Format.GZIP]:
966
+ version = ""
967
+
968
+ def reader( key, fullFileName, default ):
969
+ test_version = "(unknown)"
970
+ if fmt == Format.PICKLE or fmt == Format.BLOSC:
971
+ with open(fullFileName,"rb") as f:
972
+ # handle version as byte string
973
+ ok = True
974
+ if not version is None:
975
+ test_len = int( f.read( 1 )[0] )
976
+ test_version = f.read(test_len)
977
+ test_version = test_version.decode("utf-8")
978
+ if handle_version == SubDir.VER_RETURN:
979
+ return test_version
980
+ ok = (version == "*" or test_version == version)
981
+ if ok:
982
+ if handle_version == SubDir.VER_CHECK:
983
+ return True
984
+ if fmt == Format.PICKLE:
985
+ data = pickle.load(f)
986
+ elif fmt == Format.BLOSC:
987
+ if blosc is None:
988
+ raise ModuleNotFoundError("blosc", "'blosc' not found.")
989
+ nnbb = f.read(2)
990
+ num_blocks = int.from_bytes( nnbb, 'big', signed=False )
991
+ data = bytearray()
992
+ for i in range(num_blocks):
993
+ blockl = int.from_bytes( f.read(6), 'big', signed=False )
994
+ if blockl>0:
995
+ bdata = blosc.decompress( f.read(blockl) )
996
+ data += bdata
997
+ del bdata
998
+ data = pickle.loads(data)
999
+ else:
1000
+ raise NotImplementedError(fmt, txtfmt("Unkown format '%s'", fmt))
1001
+ return data
1002
+
1003
+ elif fmt == Format.GZIP:
1004
+ if gzip is None:
1005
+ raise ModuleNotFoundError("gzip", "'gzip' not found'")
1006
+ with gzip.open(fullFileName,"rb") as f:
1007
+ # handle version as byte string
1008
+ ok = True
1009
+ test_len = int( f.read( 1 )[0] )
1010
+ test_version = f.read(test_len)
1011
+ test_version = test_version.decode("utf-8")
1012
+ if handle_version == SubDir.VER_RETURN:
1013
+ return test_version
1014
+ ok = (version == "*" or test_version == version)
1015
+ if ok:
1016
+ if handle_version == SubDir.VER_CHECK:
1017
+ return True
1018
+ data = pickle.load(f)
1019
+ return data
1020
+
1021
+ elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
1022
+ with open(fullFileName,"rt",encoding="utf-8") as f:
1023
+ # handle versioning
1024
+ ok = True
1025
+ if not version is None:
1026
+ test_version = f.readline()
1027
+ if test_version[:2] != "# ":
1028
+ raise EnvironmentError("Error reading '%s': file does not appear to contain a version (it should start with '# ')" % fullFileName)
1029
+ test_version = test_version[2:]
1030
+ if test_version[-1:] == "\n":
1031
+ test_version = test_version[:-1]
1032
+ if handle_version == SubDir.VER_RETURN:
1033
+ return test_version
1034
+ ok = (version == "*" or test_version == version)
1035
+ if ok:
1036
+ if handle_version == SubDir.VER_CHECK:
1037
+ return ok
1038
+ # read
1039
+ if fmt == Format.JSON_PICKLE:
1040
+ if jsonpickle is None:
1041
+ raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found'")
1042
+ return jsonpickle.decode( f.read() )
1043
+ else:
1044
+ assert fmt == Format.JSON_PLAIN, ("Internal error: unknown Format", fmt)
1045
+ return json.loads( f.read() )
1046
+ else:
1047
+ raise NotImplementedError(fmt, txtfmt("Unknown format '%s'", fmt ))
1048
+
1049
+ # arrive here if version is wrong
1050
+ # delete a wrong version
1051
+ deleted = ""
1052
+ if delete_wrong_version:
1053
+ try:
1054
+ os.remove(fullFileName)
1055
+ e = None
1056
+ except Exception as e_:
1057
+ e = str(e_)
1058
+ if handle_version == SubDir.VER_CHECK:
1059
+ return False
1060
+ if not raiseOnError:
1061
+ return default
1062
+ deleted = " (file was deleted)" if e is None else " (attempt to delete file failed: %s)" % e
1063
+ raise EnvironmentError("Error reading '%s': found version '%s' not '%s'%s" % (fullFileName,str(test_version),str(version),deleted))
1064
+
1065
+ return self._read_reader( reader=reader, key=key, default=default, raiseOnError=raiseOnError, ext=ext )
1066
+
1067
+ def read( self, key : str,
1068
+ default = None,
1069
+ raiseOnError : bool = False,
1070
+ *,
1071
+ version : str = None,
1072
+ delete_wrong_version : bool = True,
1073
+ ext : str = None,
1074
+ fmt : Format = None
1075
+ ):
1076
+ """
1077
+ Read pickled data from 'key' if the file exists, or return 'default'
1078
+ -- Supports 'key' containing directories
1079
+ -- Supports 'key' (and default, ext) being iterable.
1080
+ In this case any any iterable 'default' except strings are considered accordingly.
1081
+ In order to have a unit default which is an iterable, you will have to wrap it in another iterable, e.g.
1082
+ E.g.:
1083
+ keys = ['file1', 'file2']
1084
+
1085
+ sd.read( keys )
1086
+ --> works, both are using default None
1087
+
1088
+ sd.read( keys, 1 )
1089
+ --> works, both are using default '1'
1090
+
1091
+ sd.read( keys, [1,2] )
1092
+ --> works, defaults 1 and 2, respectively
1093
+
1094
+ sd.read( keys, [1] )
1095
+ --> produces error as len(keys) != len(default)
1096
+
1097
+ Strings are iterable but are treated as single value.
1098
+ Therefore
1099
+ sd.read( keys, '12' )
1100
+ means the default value '12' is used for both files.
1101
+ Use
1102
+ sd.read( keys, ['1','2'] )
1103
+ in case the intention was using '1' and '2', respectively.
1104
+
1105
+ Returns the read object, or a list of objects if 'key' was iterable.
1106
+ If the current directory is 'None', then behaviour is as if the file did not exist.
1107
+
1108
+ Parameters
1109
+ ----------
1110
+ key : str
1111
+ A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
1112
+ default :
1113
+ Default value, or default values if key is a list
1114
+ raiseOnError : bool
1115
+ Whether to raise an exception if reading an existing file failed.
1116
+ By default this function fails silently and returns the default.
1117
+ version : str
1118
+ If not None, specifies the version of the current code base.
1119
+ In this case, this version will be compared to the version of the file being read.
1120
+ If they do not match, read fails (either by returning default or throwing an exception).
1121
+ You can specify version "*" to read any version. This is distrinct from reading a file without version.
1122
+ delete_wrong_version : bool
1123
+ If True, and if a wrong version was found, delete the file.
1124
+ ext : str
1125
+ Extension overwrite, or a list thereof if key is a list
1126
+ Set to:
1127
+ -- None to use directory's default
1128
+ -- '*' to use the extension implied by 'fmt'
1129
+ -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1130
+ fmt : Format
1131
+ File format or None to use the directory's default.
1132
+ Note that 'fmt' cannot be a list even if 'key' is.
1133
+ Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1134
+
1135
+ Returns
1136
+ -------
1137
+ For a single 'key': Content of the file if successfully read, or 'default' otherwise.
1138
+ If 'key' is a list: list of contents.
1139
+ """
1140
+ return self._read( key=key,
1141
+ default=default,
1142
+ raiseOnError=raiseOnError,
1143
+ version=version,
1144
+ ext=ext,
1145
+ fmt=fmt,
1146
+ delete_wrong_version=delete_wrong_version,
1147
+ handle_version=SubDir.VER_NORMAL )
1148
+
1149
+ get = read # backwards compatibility
1150
+
1151
+ def is_version( self, key : str, version : str = None, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
1152
+ """
1153
+ Compares the version of 'key' with 'version'.
1154
+
1155
+ Parameters
1156
+ ----------
1157
+ key : str
1158
+ A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
1159
+ version : str
1160
+ Specifies the version of the current code base to compare with.
1161
+ You can use '*' to match any version
1162
+
1163
+ raiseOnError : bool
1164
+ Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
1165
+ By default this function fails silently and returns the default.
1166
+ delete_wrong_version : bool
1167
+ If True, and if a wrong version was found, delete the file.
1168
+ ext : str
1169
+ Extension overwrite, or a list thereof if key is a list.
1170
+ Set to:
1171
+ -- None to use directory's default
1172
+ -- '*' to use the extension implied by 'fmt'
1173
+ -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1174
+ fmt : Format
1175
+ File format or None to use the directory's default.
1176
+ Note that 'fmt' cannot be a list even if 'key' is.
1177
+ Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1178
+
1179
+ Returns
1180
+ -------
1181
+ Returns True only if the file exists and has the correct version.
1182
+ """
1183
+ return self._read( key=key,default=False,raiseOnError=raiseOnError,version=version,ext=ext,fmt=fmt,delete_wrong_version=delete_wrong_version,handle_version=SubDir.VER_CHECK )
1184
+
1185
+ def get_version( self, key : str, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None ):
1186
+ """
1187
+ Returns the version ID stored in 'key'.
1188
+ This requires that the file has previously been saved with a version.
1189
+ Otherwise this function will return unpredictable results.
1190
+
1191
+ Parameters
1192
+ ----------
1193
+ key : str
1194
+ A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
1195
+ raiseOnError : bool
1196
+ Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
1197
+ By default this function fails silently and returns the default.
1198
+ ext : str
1199
+ Extension overwrite, or a list thereof if key is a list.
1200
+ Set to:
1201
+ -- None to use directory's default
1202
+ -- '*' to use the extension implied by 'fmt'
1203
+ -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1204
+ fmt : Format
1205
+ File format or None to use the directory's default.
1206
+ Note that 'fmt' cannot be a list even if 'key' is.
1207
+ Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1208
+
1209
+ Returns
1210
+ -------
1211
+ Version ID.
1212
+ """
1213
+ return self._read( key=key,default=None,raiseOnError=raiseOnError,version="",ext=ext,fmt=fmt,delete_wrong_version=False,handle_version=SubDir.VER_RETURN )
1214
+
1215
+ def readString( self, key : str, default = None, raiseOnError : bool = False, *, ext : str = None ) -> str:
1216
+ """
1217
+ Reads text from 'key' or returns 'default'. Removes trailing EOLs
1218
+ -- Supports 'key' containing directories#
1219
+ -- Supports 'key' being iterable. In this case any 'default' can be a list, too.
1220
+
1221
+ Returns the read string, or a list of strings if 'key' was iterable.
1222
+ If the current directory is 'None', then behaviour is as if the file did not exist.
1223
+
1224
+ Use 'ext' to specify the extension.
1225
+ You cannot use 'ext' to specify a format as the format is plain text.
1226
+ If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
1227
+ """
1228
+ verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext)
1229
+ ext = ext if not ext is None else self._ext
1230
+ ext = ext if ext != self.EXT_FMT_AUTO else ".txt"
1231
+
1232
+ def reader( key, fullFileName, default ):
1233
+ with open(fullFileName,"rt",encoding="utf-8") as f:
1234
+ line = f.readline()
1235
+ if len(line) > 0 and line[-1] == '\n':
1236
+ line = line[:-1]
1237
+ return line
1238
+ return self._read_reader( reader=reader, key=key, default=default, raiseOnError=raiseOnError, ext=ext )
1239
+
1240
+ # -- write --
1241
+
1242
+ def _write( self, writer, key : str, obj, raiseOnError : bool, *, ext : str = None ) -> bool:
1243
+ """ Utility function for write() and writeLine() """
1244
+ if self._path is None:
1245
+ raise EOFError("Cannot write to '%s': current directory is not specified" % key)
1246
+ self.createDirectory()
1247
+
1248
+ # vector version
1249
+ if not isinstance(key,str):
1250
+ if not isinstance(key, Collection): error( "'key' must be a string or an interable object. Found type %s", type(key))
1251
+ l = len(key)
1252
+ if obj is None or isinstance(obj,str) or not isinstance(obj, Collection):
1253
+ obj = [ obj ] * l
1254
+ else:
1255
+ if len(obj) != l: error("'obj' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(obj), l )
1256
+ if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1257
+ ext = [ ext ] * l
1258
+ else:
1259
+ if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1260
+ ok = True
1261
+ for k,o,e in zip(key,obj,ext):
1262
+ ok |= self._write( writer, k, o, raiseOnError=raiseOnError, ext=e )
1263
+ return ok
1264
+
1265
+ # single key
1266
+ if not len(key) > 0: error("'key is empty (the filename)" )
1267
+ sub, key = os.path.split(key)
1268
+ if len(key) == 0: error("'key '%s' refers to a directory, not a file", key)
1269
+ if len(sub) > 0:
1270
+ return SubDir(sub,parent=self)._write(writer,key,obj, raiseOnError=raiseOnError,ext=ext )
1271
+
1272
+ # write to temp file, then rename into target file
1273
+ # this reduces collision when i/o operations are slow
1274
+ fullFileName = self.fullKeyName(key,ext=ext)
1275
+ tmp_file = uniqueHash48( [ key, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
1276
+ tmp_i = 0
1277
+ fullTmpFile = self.fullKeyName(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
1278
+ while os.path.exists(fullTmpFile):
1279
+ fullTmpFile = self.fullKeyName(tmp_file) + "." + str(tmp_i) + ".tmp"
1280
+ tmp_i += 1
1281
+ if tmp_i >= 10:
1282
+ raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % ( fullFileName, fullTmpFile ) )
1283
+
1284
+ # write
1285
+ if not writer( key, fullTmpFile, obj ):
1286
+ return False
1287
+ assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile, fullFileName)
1288
+ try:
1289
+ if os.path.exists(fullFileName):
1290
+ os.remove(fullFileName)
1291
+ os.rename(fullTmpFile, fullFileName)
1292
+ except Exception as e:
1293
+ os.remove(fullTmpFile)
1294
+ if raiseOnError:
1295
+ raise e
1296
+ return False
1297
+ return True
1298
+
1299
+ def write( self, key : str,
1300
+ obj,
1301
+ raiseOnError : bool = True,
1302
+ *,
1303
+ version : str = None,
1304
+ ext : str = None,
1305
+ fmt : Format = None ) -> bool:
1306
+ """
1307
+ Pickles 'obj' into key.
1308
+ -- Supports 'key' containing directories
1309
+ -- Supports 'key' being a list.
1310
+ In this case, if obj is an iterable it is considered the list of values for the elements of 'keys'
1311
+ If 'obj' is not iterable, it will be written into all 'key's
1312
+
1313
+ keys = ['file1', 'file2']
1314
+
1315
+ sd.write( keys, 1 )
1316
+ --> works, writes '1' in both files.
1317
+
1318
+ sd.read( keys, [1,2] )
1319
+ --> works, writes 1 and 2, respectively
1320
+
1321
+ sd.read( keys, "12" )
1322
+ --> works, writes '12' in both files
1323
+
1324
+ sd.write( keys, [1] )
1325
+ --> produces error as len(keys) != len(obj)
1326
+
1327
+ If the current directory is 'None', then the function throws an EOFError exception
1328
+
1329
+ Parameters
1330
+ ----------
1331
+ key : str
1332
+ Core filename ("key"), or list thereof
1333
+ obj :
1334
+ Object to write, or list thereof if 'key' is a list
1335
+ raiseOnError : bool
1336
+ If False, this function will return False upon failure
1337
+ version : str
1338
+ If not None, specifies the version of the code which generated 'obj'.
1339
+ This version will be written to the beginning of the file.
1340
+ ext : str
1341
+ Extension, or list thereof if 'key' is a list.
1342
+ Set to:
1343
+ -- None to use directory's default
1344
+ -- '*' to use the extension implied by 'fmt'
1345
+ -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1346
+ fmt : Format
1347
+ File format or None to use the directory's default.
1348
+ Note that 'fmt' cannot be a list even if 'key' is.
1349
+ Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1350
+
1351
+ Returns
1352
+ -------
1353
+ Boolean to indicate success if raiseOnError is False.
1354
+ """
1355
+ ext, fmt = self.autoExtFmt(ext=ext, fmt=fmt)
1356
+ version = str(version) if not version is None else None
1357
+ assert ext != self.EXT_FMT_AUTO, ("'ext' is '*'...?")
1358
+
1359
+ if version=='*': error("You cannot write version '*'. Use None to write a file without version.")
1360
+ if version is None and fmt in [Format.BLOSC, Format.GZIP]:
1361
+ version = ""
1362
+
1363
+ def writer( key, fullFileName, obj ):
1364
+ try:
1365
+ if fmt == Format.PICKLE or fmt == Format.BLOSC:
1366
+ with open(fullFileName,"wb") as f:
1367
+ # handle version as byte string
1368
+ if not version is None:
1369
+ version_ = bytearray(version, "utf-8")
1370
+ if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
1371
+ len8 = bytearray(1)
1372
+ len8[0] = len(version_)
1373
+ f.write(len8)
1374
+ f.write(version_)
1375
+ if fmt == Format.PICKLE:
1376
+ pickle.dump(obj,f,-1)
1377
+ else:
1378
+ assert fmt == fmt.BLOSC, ("Internal error: unknown format", fmt)
1379
+ if blosc is None:
1380
+ raise ModuleNotFoundError("blosc", "'blosc' not found")
1381
+ pdata = pickle.dumps(obj) # returns data as a bytes object
1382
+ del obj
1383
+ len_data = len(pdata)
1384
+ num_blocks = max(0,len_data-1) // BLOSC_MAX_USE + 1
1385
+ f.write(num_blocks.to_bytes(2, 'big', signed=False))
1386
+ for i in range(num_blocks):
1387
+ start = i*BLOSC_MAX_USE
1388
+ end = min(len_data,start+BLOSC_MAX_USE)
1389
+ assert end>start, ("Internal error; nothing to write")
1390
+ block = blosc.compress( pdata[start:end] )
1391
+ blockl = len(block)
1392
+ f.write( blockl.to_bytes(6, 'big', signed=False) )
1393
+ if blockl > 0:
1394
+ f.write( block )
1395
+ del block
1396
+ del pdata
1397
+
1398
+ elif fmt == Format.GZIP:
1399
+ if gzip is None:
1400
+ raise ModuleNotFoundError("gzip", "'gzip' not found")
1401
+ with gzip.open(fullFileName,"wb") as f:
1402
+ # handle version as byte string
1403
+ if not version is None:
1404
+ version_ = bytearray(version, "utf-8")
1405
+ if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
1406
+ len8 = bytearray(1)
1407
+ len8[0] = len(version_)
1408
+ f.write(len8)
1409
+ f.write(version_)
1410
+ pickle.dump(obj,f,-1)
1411
+
1412
+ elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
1413
+ with open(fullFileName,"wt",encoding="utf-8") as f:
1414
+ if not version is None:
1415
+ f.write("# " + version + "\n")
1416
+ if fmt == Format.JSON_PICKLE:
1417
+ if jsonpickle is None:
1418
+ raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found")
1419
+ f.write( jsonpickle.encode(obj) )
1420
+ else:
1421
+ assert fmt == Format.JSON_PLAIN, ("Internal error: invalid Format", fmt)
1422
+ f.write( json.dumps( plain(obj, sorted_dicts=True, native_np=True, dt_to_str=True ), default=str ) )
1423
+
1424
+ else:
1425
+ raise NotImplementedError(fmt, txtfmt("Internal error: invalid format '%s'", fmt))
1426
+ except Exception as e:
1427
+ if raiseOnError:
1428
+ raise e
1429
+ return False
1430
+ return True
1431
+ return self._write( writer=writer, key=key, obj=obj, raiseOnError=raiseOnError, ext=ext )
1432
+
1433
+ set = write
1434
+
1435
+ def writeString( self, key : str, line : str, raiseOnError : bool = True, *, ext : str = None ) -> bool:
1436
+ """
1437
+ Writes 'line' into key. A trailing EOL will not be read back
1438
+ -- Supports 'key' containing directories
1439
+ -- Supports 'key' being a list.
1440
+ In this case, line can either be the same value for all key's or a list, too.
1441
+
1442
+ If the current directory is 'None', then the function throws an EOFError exception
1443
+ See additional comments for write()
1444
+
1445
+ Use 'ext' to specify the extension.
1446
+ You cannot use 'ext' to specify a format as the format is plain text.
1447
+ If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
1448
+ """
1449
+ verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext, exception=ValueError )
1450
+ ext = ext if not ext is None else self._ext
1451
+ ext = ext if ext != self.EXT_FMT_AUTO else ".txt"
1452
+
1453
+ if len(line) == 0 or line[-1] != '\n':
1454
+ line += '\n'
1455
+ def writer( key, fullFileName, obj ):
1456
+ try:
1457
+ with open(fullFileName,"wt",encoding="utf-8") as f:
1458
+ f.write(obj)
1459
+ except Exception as e:
1460
+ if raiseOnError:
1461
+ raise e
1462
+ return False
1463
+ return True
1464
+ return self._write( writer=writer, key=key, obj=line, raiseOnError=raiseOnError, ext=ext )
1465
+
1466
+ # -- iterate --
1467
+
1468
+ def files(self, *, ext : str = None) -> list:
1469
+ """
1470
+ Returns a list of keys in this subdirectory with the current extension, or the specified extension.
1471
+
1472
+ In other words, if the extension is ".pck", and the files are "file1.pck", "file2.pck", "file3.bin"
1473
+ then this function will return [ "file1", "file2" ]
1474
+
1475
+ If 'ext' is
1476
+ -- None, the directory's default extension will be used
1477
+ -- "" then this function will return all files in this directory.
1478
+ -- a Format, then the default extension of the format will be used.
1479
+
1480
+ This function ignores directories. Use subDirs() to retrieve those.
1481
+
1482
+ [This function has an alias 'keys']
1483
+ """
1484
+ if not self.pathExists():
1485
+ return []
1486
+ ext = self.autoExt( ext=ext )
1487
+ ext_l = len(ext)
1488
+ keys = []
1489
+ with os.scandir(self._path) as it:
1490
+ for entry in it:
1491
+ if not entry.is_file():
1492
+ continue
1493
+ if ext_l > 0:
1494
+ if len(entry.name) <= ext_l or entry.name[-ext_l:] != ext:
1495
+ continue
1496
+ keys.append( entry.name[:-ext_l] )
1497
+ else:
1498
+ keys.append( entry.name )
1499
+ return keys
1500
+ keys = files
1501
+
1502
+ def subDirs(self) -> list:
1503
+ """
1504
+ Returns a list of all sub directories
1505
+ If self does not refer to an existing directory, then this function returns an empty list.
1506
+ """
1507
+ # do not do anything if the object was deleted
1508
+ if not self.pathExists():
1509
+ return []
1510
+ subdirs = []
1511
+ with os.scandir(self._path[:-1]) as it:
1512
+ for entry in it:
1513
+ if not entry.is_dir():
1514
+ continue
1515
+ subdirs.append( entry.name )
1516
+ return subdirs
1517
+
1518
+ # -- delete --
1519
+
1520
+ def delete( self, key : str, raiseOnError: bool = False, *, ext : str = None ):
1521
+ """
1522
+ Deletes 'key'; 'key' might be a list.
1523
+
1524
+ Parameters
1525
+ ----------
1526
+ key :
1527
+ filename, or list of filenames
1528
+ raiseOnError :
1529
+ if False, do not throw KeyError if file does not exist.
1530
+ ext :
1531
+ Extension, or list thereof if 'key' is an extension.
1532
+ Use
1533
+ -- None for the directory default
1534
+ -- "" to not use an automatic extension.
1535
+ -- A Format to specify the default extension for that format.
1536
+ """
1537
+ # do not do anything if the object was deleted
1538
+ if self._path is None:
1539
+ if raiseOnError: raise EOFError("Cannot delete '%s': current directory not specified" % key)
1540
+ return
1541
+
1542
+ # vector version
1543
+ if not isinstance(key,str):
1544
+ if not isinstance(key, Collection): error( "'key' must be a string or an interable object. Found type %s", type(key))
1545
+ l = len(key)
1546
+ if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1547
+ ext = [ ext ] * l
1548
+ else:
1549
+ if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1550
+ for k, e in zip(key,ext):
1551
+ self.delete(k, raiseOnError=raiseOnError, ext=e)
1552
+ return
1553
+
1554
+ # handle directories in 'key'
1555
+ if len(key) == 0: error( "'key' is empty" )
1556
+ sub, key_ = os.path.split(key)
1557
+ if len(key_) == 0: error("'key' %s indicates a directory, not a file", key)
1558
+ if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,raiseOnError=raiseOnError,ext=ext)
1559
+ # don't try if directory doesn't existy
1560
+ if not self.pathExists():
1561
+ if raiseOnError:
1562
+ raise KeyError(key)
1563
+ return
1564
+ fullFileName = self.fullKeyName(key, ext=ext)
1565
+ if not os.path.exists(fullFileName):
1566
+ if raiseOnError:
1567
+ raise KeyError(key)
1568
+ else:
1569
+ os.remove(fullFileName)
1570
+
1571
+ def deleteAllKeys( self, raiseOnError : bool = False, *, ext : str = None ):
1572
+ """
1573
+ Deletes all valid keys in this sub directory with the correct extension.
1574
+
1575
+ Parameters
1576
+ ----------
1577
+ key :
1578
+ filename, or list of filenames
1579
+ raiseOnError :
1580
+ if False, do not throw KeyError if file does not exist.
1581
+ ext :
1582
+ File extension to match.
1583
+ Use
1584
+ -- None for the directory default
1585
+ -- "" to match all files regardless of extension.
1586
+ -- A Format to specify the default extension for that format.
1587
+ """
1588
+ if self._path is None:
1589
+ if raiseOnError: raise EOFError("Cannot delete all files: current directory not specified")
1590
+ return
1591
+ if not self.pathExists():
1592
+ return
1593
+ self.delete( self.keys(ext=ext), raiseOnError=raiseOnError, ext=ext )
1594
+
1595
+ def deleteAllContent( self, deleteSelf : bool = False, raiseOnError : bool = False, *, ext : str = None ):
1596
+ """
1597
+ Deletes all valid keys and subdirectories in this sub directory.
1598
+ Does not delete files with other extensions.
1599
+ Use eraseEverything() if the aim is to delete everything.
1600
+
1601
+ Parameters
1602
+ ----------
1603
+ deleteSelf:
1604
+ whether to delete the directory or only its contents
1605
+ raiseOnError:
1606
+ False for silent failure
1607
+ ext:
1608
+ Extension for keys, or None for the directory's default.
1609
+ You can also provide a Format for 'ext'.
1610
+ Use "" to match all files regardless of extension.
1611
+ """
1612
+ # do not do anything if the object was deleted
1613
+ if self._path is None:
1614
+ if raiseOnError: raise EOFError("Cannot delete all contents: current directory not specified")
1615
+ return
1616
+ if not self.pathExists():
1617
+ return
1618
+ # delete sub directories
1619
+ subdirs = self.subDirs();
1620
+ for subdir in subdirs:
1621
+ SubDir(subdir, parent=self).deleteAllContent( deleteSelf=True, raiseOnError=raiseOnError, ext=ext )
1622
+ # delete keys
1623
+ self.deleteAllKeys( raiseOnError=raiseOnError,ext=ext )
1624
+ # delete myself
1625
+ if not deleteSelf:
1626
+ return
1627
+ rest = list( os.scandir(self._path[:-1]) )
1628
+ txt = str(rest)
1629
+ txt = txt if len(txt) < 50 else (txt[:47] + '...')
1630
+ if len(rest) > 0:
1631
+ if raiseOnError: error( "Cannot delete my own directory %s: directory not empty: found %ld object(s): %s", self._path,len(rest), txt)
1632
+ return
1633
+ os.rmdir(self._path[:-1]) ## does not work ????
1634
+ self._path = None
1635
+
1636
+ def eraseEverything( self, keepDirectory : bool = True ):
1637
+ """
1638
+ Deletes the entire sub directory will all contents
1639
+ WARNING: deletes ALL files, not just those with the present extension.
1640
+ Will keep the subdir itself by default.
1641
+ If not, it will invalidate 'self._path'
1642
+
1643
+ If self is None, do nothing. That means you can call this function several times.
1644
+ """
1645
+ if self._path is None:
1646
+ return
1647
+ if not self.pathExists():
1648
+ return
1649
+ shutil.rmtree(self._path[:-1], ignore_errors=True)
1650
+ if not keepDirectory and os.path.exists(self._path[:-1]):
1651
+ os.rmdir(self._path[:-1])
1652
+ self._path = None
1653
+ elif keepDirectory and not os.path.exists(self._path[:-1]):
1654
+ os.makedirs(self._path[:-1])
1655
+
1656
+ # -- file ops --
1657
+
1658
+ def exists(self, key : str, *, ext : str = None ) -> bool:
1659
+ """
1660
+ Checks whether 'key' exists. Works with iterables
1661
+
1662
+ Parameters
1663
+ ----------
1664
+ key :
1665
+ filename, or list of filenames
1666
+ ext :
1667
+ Extension, or list thereof if 'key' is an extension.
1668
+ Use
1669
+ -- None for the directory default
1670
+ -- "" for no automatic extension
1671
+ -- A Format to specify the default extension for that format.
1672
+
1673
+ Returns
1674
+ -------
1675
+ If 'key' is a string, returns True or False, else it will return a list of bools.
1676
+ """
1677
+ # vector version
1678
+ if not isinstance(key,str):
1679
+ verify( isinstance(key, Collection), "'key' must be a string or an interable object. Found type %s", type(key))
1680
+ l = len(key)
1681
+ if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1682
+ ext = [ ext ] * l
1683
+ else:
1684
+ if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1685
+ return [ self.exists(k,ext=e) for k,e in zip(key,ext) ]
1686
+ # empty directory
1687
+ if self._path is None:
1688
+ return False
1689
+ # handle directories in 'key'
1690
+ if len(key) == 0: raise ValueError("'key' missing (the filename)")
1691
+ sub, key_ = os.path.split(key)
1692
+ if len(key_) == 0: raise IsADirectoryError( key, txtfmt("'key' %s indicates a directory, not a file", key) )
1693
+ if len(sub) > 0:
1694
+ return self(sub).exists(key=key_,ext=ext)
1695
+ # if directory doesn't exit
1696
+ if not self.pathExists():
1697
+ return False
1698
+ # single key
1699
+ fullFileName = self.fullKeyName(key, ext=ext)
1700
+ if not os.path.exists(fullFileName):
1701
+ return False
1702
+ if not os.path.isfile(fullFileName):
1703
+ raise IsADirectoryError("Structural error: key %s: exists, but is not a file (full path %s)",key,fullFileName)
1704
+ return True
1705
+
1706
+ def _getFileProperty( self, *, key : str, ext : str, func ):
1707
+ # vector version
1708
+ if not isinstance(key,str):
1709
+ verify( isinstance(key, Collection), "'key' must be a string or an interable object. Found type %s", type(key))
1710
+ l = len(key)
1711
+ if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1712
+ ext = [ ext ] * l
1713
+ else:
1714
+ if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1715
+ return [ self._getFileProperty(key=k,ext=e,func=func) for k,e in zip(key,ext) ]
1716
+ # empty directory
1717
+ if self._path is None:
1718
+ return None
1719
+ # handle directories in 'key'
1720
+ if len(key) == 0: raise ValueError("'key' missing (the filename)")
1721
+ sub, key_ = os.path.split(key)
1722
+ if len(key_) == 0: raise IsADirectoryError( key, txtfmt("'key' %s indicates a directory, not a file", key) )
1723
+ if len(sub) > 0: return self(sub)._getFileProperty(key=key_,ext=ext,func=func)
1724
+ # if directory doesn't exit
1725
+ if not self.pathExists():
1726
+ return None
1727
+ # single key
1728
+ fullFileName = self.fullKeyName(key, ext=ext)
1729
+ if not os.path.exists(fullFileName):
1730
+ return None
1731
+ return func(fullFileName)
1732
+
1733
+ def getCreationTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
1734
+ """
1735
+ Returns the creation time of 'key', or None if file was not found.
1736
+ See comments on os.path.getctime() for compatibility
1737
+
1738
+ Parameters
1739
+ ----------
1740
+ key :
1741
+ filename, or list of filenames
1742
+ ext :
1743
+ Extension, or list thereof if 'key' is an extension.
1744
+ Use
1745
+ -- None for the directory default
1746
+ -- "" for no automatic extension
1747
+ -- A Format to specify the default extension for that format.
1748
+
1749
+ Returns
1750
+ -------
1751
+ datetime.datetime if 'key' is a string, otherwise a list of datetime's
1752
+ """
1753
+ return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getctime(x)) )
1754
+
1755
+ def getLastModificationTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
1756
+ """
1757
+ Returns the last modification time of 'key', or None if file was not found.
1758
+ See comments on os.path.getmtime() for compatibility
1759
+
1760
+ Parameters
1761
+ ----------
1762
+ key :
1763
+ filename, or list of filenames
1764
+ ext :
1765
+ Extension, or list thereof if 'key' is an extension.
1766
+ Use
1767
+ -- None for the directory default
1768
+ -- "" for no automatic extension
1769
+ -- A Format to specify the default extension for that format.
1770
+
1771
+ Returns
1772
+ -------
1773
+ datetime.datetime if 'key' is a string, otherwise a list of datetime's
1774
+ """
1775
+ return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getmtime(x)) )
1776
+
1777
+ def getLastAccessTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
1778
+ """
1779
+ Returns the last access time of 'key', or None if file was not found.
1780
+ See comments on os.path.getatime() for compatibility
1781
+
1782
+ Parameters
1783
+ ----------
1784
+ key :
1785
+ filename, or list of filenames
1786
+ ext :
1787
+ Extension, or list thereof if 'key' is an extension.
1788
+ Use
1789
+ -- None for the directory default
1790
+ -- "" for no automatic extension
1791
+ -- A Format to specify the default extension for that format.
1792
+
1793
+ Returns
1794
+ -------
1795
+ datetime.datetime if 'key' is a string, otherwise a list of datetime's
1796
+ """
1797
+ return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getatime(x)) )
1798
+
1799
+ def getFileSize( self, key : str, *, ext : str = None ) -> int:
1800
+ """
1801
+ Returns the file size of 'key', or None if file was not found.
1802
+ See comments on os.path.getatime() for compatibility
1803
+
1804
+ Parameters
1805
+ ----------
1806
+ key :
1807
+ filename, or list of filenames
1808
+ ext :
1809
+ Extension, or list thereof if 'key' is an extension.
1810
+ Use
1811
+ -- None for the directory default
1812
+ -- "" for no automatic extension
1813
+ -- A Format to specify the default extension for that format.
1814
+
1815
+ Returns
1816
+ -------
1817
+ File size if 'key' is a string, otherwise a list thereof.
1818
+ """
1819
+ return self._getFileProperty( key=key, ext=ext, func=lambda x : os.path.getsize(x) )
1820
+
1821
+ def rename( self, source : str, target : str, *, ext : str = None ):
1822
+ """
1823
+ Rename "source" key into "target" key.
1824
+ Function will raise an exception if not successful
1825
+
1826
+ Parameters
1827
+ ----------
1828
+ source, target:
1829
+ filenames
1830
+ ext :
1831
+ Extension, or list thereof if 'key' is an extension.
1832
+ Use
1833
+ -- None for the directory default
1834
+ -- "" for no automatic extensions.
1835
+ -- A Format to specify the default extension for that format.
1836
+ """
1837
+ # empty directory
1838
+ if self._path is None:
1839
+ return
1840
+
1841
+ # handle directories in 'source'
1842
+ if len(source) == 0: raise ValueError("'source' missing (the filename)")
1843
+ sub, source_ = os.path.split(source)
1844
+ if len(source_) == 0: raise IsADirectoryError( source, txtfmt("'source' %s indicates a directory, not a file", source ))
1845
+ if len(sub) > 0:
1846
+ src_full = self(sub).fullKeyName(key=source_,ext=ext)
1847
+ else:
1848
+ src_full = self.fullKeyName( source, ext=ext )
1849
+
1850
+ # handle directories in 'target'
1851
+ if len(target) == 0: raise ValueError("'target' missing (the filename)" )
1852
+ sub, target_ = os.path.split(target)
1853
+ if len(target_) == 0: raise IsADirectoryError( target, txtfmt("'target' %s indicates a directory, not a file", target))
1854
+ if len(sub) > 0:
1855
+ tar_dir = self(sub)
1856
+ tar_dir.createDirectory()
1857
+ tar_full = tar_dir.fullKeyName(key=target_,ext=ext)
1858
+ else:
1859
+ tar_full = self.fullKeyName( target, ext=ext )
1860
+ self.createDirectory()
1861
+
1862
+ os.rename(src_full, tar_full)
1863
+
1864
+ # utilities
1865
+
1866
+ @staticmethod
1867
+ def removeBadKeyCharacters( key:str, by:str=' ' ) -> str:
1868
+ """
1869
+ Replaces invalid characters in a filename by 'by'.
1870
+ See util.fmt_filename() for documentation and further options.
1871
+ """
1872
+ return fmt_filename( key, by=by )
1873
+
1874
+ def unqiueLabelToKey( self, unique_label:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
1875
+ """
1876
+ Converts a unique label which might contain invalid characters into a unique file name, such that the full file name does not exceed 'max_length' bytes.
1877
+ The returned key has the format
1878
+ name + separator + ID
1879
+ where ID has length id_length.
1880
+ If unique_label is already guaranteed to be a valid filename, use unqiueLongFileNameToKey() instead.
1881
+ """
1882
+ len_ext = len(self.ext)
1883
+ assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
1884
+ uqf = uniqueLabelExt( max_length=max_length-len_ext, id_length=id_length, separator=separator, filename_by="default" )
1885
+ return uqf( unique_label )
1886
+
1887
+ def unqiueLongFileNameToKey( self, unique_filename:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
1888
+ """
1889
+ Converts a unique filename which might be too long to a unique filename such that the total length plus 'ext' does not exceed 'max_length' bytes.
1890
+ If the filename is already short enough, no change is made.
1891
+
1892
+ If 'unique_filename' is not guaranteed to be a valid filename, use unqiueLabelToKey() instead.
1893
+ """
1894
+ len_ext = len(self.ext)
1895
+ assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
1896
+ uqf = uniqueLabelExt( max_length=max_length-len_ext, id_length=id_length, separator=separator )
1897
+ return uqf( unique_filename )
1898
+
1899
+ # -- dict-like interface --
1900
+
1901
+ def __call__(self, keyOrSub : str,
1902
+ default = RETURN_SUB_DIRECTORY,
1903
+ raiseOnError : bool = False,
1904
+ *,
1905
+ version : str = None,
1906
+ ext : str = None,
1907
+ fmt : Format = None,
1908
+ delete_wrong_version : bool = True,
1909
+ createDirectory : bool = None ):
1910
+ """
1911
+ Return either the value of a sub-key (file), or return a new sub directory.
1912
+ If only one argument is used, then this function returns a new sub directory.
1913
+ If two arguments are used, then this function returns read( keyOrSub, default ).
1914
+
1915
+ sd = SubDir("!/test")
1916
+
1917
+ Member access:
1918
+ x = sd('x', None) reads 'x' with default value None
1919
+ x = sd('sd/x', default=1) reads 'x' from sub directory 'sd' with default value 1
1920
+ x = sd('x', default=1, ext="tmp") reads 'x.tmp' from sub directory 'sd' with default value 1
1921
+
1922
+ Create sub directory:
1923
+ sd2 = sd("subdir") creates and returns handle to subdirectory 'subdir'
1924
+ sd2 = sd("subdir1/subdir2") creates and returns handle to subdirectory 'subdir1/subdir2'
1925
+ sd2 = sd("subdir1/subdir2", ext=".tmp") creates and returns handle to subdirectory 'subdir1/subdir2' with extension "tmp"
1926
+ sd2 = sd(ext=".tmp") returns handle to current subdirectory with extension "tmp"
1927
+
1928
+ Parameters
1929
+ ----------
1930
+ keyOrSub : str
1931
+ identify the object requested. Should be a string or a list of strings.
1932
+ default:
1933
+ If specified, this function reads 'keyOrSub' with read( keyOrSub, default, *args, **kwargs )
1934
+ If not specified, then this function calls SubDir(keyOrSub,parent=self,ext=ext,fmt=fmt)
1935
+
1936
+ The following keywords are only relevant when reading files.
1937
+ They echo the parameters of read()
1938
+
1939
+ raiseOnError : bool
1940
+ Whether to raise an exception if reading an existing file failed.
1941
+ By default this function fails silently and returns the default.
1942
+ version : str
1943
+ If not None, specifies the version of the current code base.
1944
+ Use '*' to read any version (this is distrinct from reading a file without version).
1945
+ If version is not' '*', then this version will be compared to the version of the file being read.
1946
+ If they do not match, read fails (either by returning default or throwing an exception).
1947
+ delete_wrong_version : bool
1948
+ If True, and if a wrong version was found, delete the file.
1949
+ ext : str
1950
+ Extension overwrite, or a list thereof if key is a list
1951
+ Set to:
1952
+ -- None to use directory's default
1953
+ -- '*' to use the extension implied by 'fmt'
1954
+ -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1955
+ fmt : Format
1956
+ File format or None to use the directory's default.
1957
+ Note that 'fmt' cannot be a list even if 'key' is.
1958
+ Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1959
+
1960
+ The following keywords are only relevant when accessing directories
1961
+ They echo the parameters of __init__
1962
+
1963
+ createDirectory : bool
1964
+ Whether or not to create the directory. The default, None, is to inherit the behaviour from self.
1965
+ ext : str
1966
+ Set to None to inherit the parent's extension.
1967
+ fmt : Format
1968
+ Set to None to inherit the parent's format.
1969
+
1970
+ Returns
1971
+ -------
1972
+ Either the value in the file, a new sub directory, or lists thereof.
1973
+ Returns None if an element was not found.
1974
+ """
1975
+ if default == SubDir.RETURN_SUB_DIRECTORY:
1976
+ if not isinstance(keyOrSub, str):
1977
+ if not isinstance(keyOrSub, Collection):
1978
+ raise ValueError(txtfmt("'keyOrSub' must be a string or an iterable object. Found type '%s;", type(keyOrSub)))
1979
+ return [ SubDir( k,parent=self,ext=ext,fmt=fmt,createDirectory=createDirectory) for k in keyOrSub ]
1980
+ return SubDir(keyOrSub,parent=self,ext=ext,fmt=fmt,createDirectory=createDirectory)
1981
+ return self.read( key=keyOrSub,
1982
+ default=default,
1983
+ raiseOnError=raiseOnError,
1984
+ version=version,
1985
+ delete_wrong_version=delete_wrong_version,
1986
+ ext=ext,
1987
+ fmt=fmt )
1988
+
1989
+ def __getitem__( self, key ):
1990
+ """
1991
+ Reads self[key]
1992
+ If 'key' does not exist, throw a KeyError
1993
+ """
1994
+ return self.read( key=key, default=None, raiseOnError=True )
1995
+
1996
+ def __setitem__( self, key, value):
1997
+ """ Writes 'value' to 'key' """
1998
+ self.write(key,value)
1999
+
2000
+ def __delitem__(self,key):
2001
+ """ Silently delete self[key] """
2002
+ self.delete(key, False )
2003
+
2004
+ def __len__(self) -> int:
2005
+ """ Return the number of files (keys) in this directory """
2006
+ return len(self.keys())
2007
+
2008
+ def __iter__(self):
2009
+ """ Returns an iterator which allows traversing through all keys (files) below this directory """
2010
+ return self.keys().__iter__()
2011
+
2012
+ def __contains__(self, key):
2013
+ """ Implements 'in' operator """
2014
+ return self.exists(key)
2015
+
2016
+ # -- object like interface --
2017
+
2018
+ def __getattr__(self, key):
2019
+ """
2020
+ Allow using member notation to get data
2021
+ This function throws an AttributeError if 'key' is not found.
2022
+ """
2023
+ if not self.exists(key):
2024
+ raise AttributeError(key)
2025
+ return self.read( key=key, raiseOnError=True )
2026
+
2027
+ def __setattr__(self, key, value):
2028
+ """
2029
+ Allow using member notation to write data
2030
+ Note: keys starting with '_' are /not/ written to disk
2031
+ """
2032
+ if key[0] == '_':
2033
+ self.__dict__[key] = value
2034
+ else:
2035
+ self.write(key,value)
2036
+
2037
+ def __delattr__(self, key):
2038
+ """ Silently delete a key with member notation. """
2039
+ verify( key[:1] != "_", "Deleting protected or private members disabled. Fix __delattr__ to support this")
2040
+ return self.delete( key=key, raiseOnError=False )
2041
+
2042
+ # pickling
2043
+ # --------
2044
+
2045
+ def __getstate__(self):
2046
+ """ Return state to pickle """
2047
+ return dict( path=self._path, ext=self._ext, fmt=self._fmt, crt=self._crt )
2048
+
2049
+ def __setstate__(self, state):
2050
+ """ Restore pickle """
2051
+ self._path = state['path']
2052
+ self._ext = state['ext']
2053
+ self._fmt = state['fmt']
2054
+ self._crt = state['crt']
2055
+
2056
+ # caching
2057
+ # -------
2058
+
2059
+ def cache( self, version : str = None , *,
2060
+ dependencies : list = None,
2061
+ label : Callable = None,
2062
+ uid : Callable = None,
2063
+ name : str = None,
2064
+ exclude_args : list[str] = None,
2065
+ include_args : list[str] = None,
2066
+ exclude_arg_types : list[type] = None,
2067
+ version_auto_class : bool = True):
2068
+ """
2069
+ Wraps a callable or a class into a cachable function.
2070
+ Caching is based on the following two simple principles:
2071
+
2072
+ 1) Unique Call ID:
2073
+ When a function is called with some parameters, the wrapper identifies a unique ID based
2074
+ on the qualified name of the function and on its runtime functional parameters (ie those
2075
+ which alter the outcome of the function).
2076
+ When a function is called the first time with a given unique call ID, it will store
2077
+ the result of the call to disk. If the function is called with the same call ID again,
2078
+ the result is read from disk and returned.
2079
+
2080
+ To compute unique call IDs' cdxbasics.util.namedUniqueHashExt() is used.
2081
+ Please read implementation comments there:
2082
+ Key default features:
2083
+ * It hashes objects via their __dict__ or __slot__ members.
2084
+ This can be overwritten for a class by implementing __unique_hash__; see cdxbasics.util.namedUniqueHashExt().
2085
+ * Function members of objects or any members starting with '_' are not considered
2086
+ unless this behaviour is changed using CacheController().
2087
+ * Numpy and panda frames are hashed using their byte representation.
2088
+ That is slow and not recommended. It is better to identify numpy/panda inputs
2089
+ via their generating characteristic ID.
2090
+
2091
+ 2) Version:
2092
+ Each function has a version, which includes dependencies on other functions or classes.
2093
+ If the version of a result on disk does not match the current version, it is deleted
2094
+ and the function is called again. This way you can use your code to drive updates
2095
+ to data generated with cached functions.
2096
+ Behind the scenes this is implemented using cdxbasics.version.version() which means
2097
+ that the version of a cached function can also depend on versions of non-cached functions
2098
+ or other objects.
2099
+
2100
+ Functions
2101
+ ---------
2102
+ Example of caching functions:
2103
+
2104
+ Cache a simple function 'f':
2105
+
2106
+ from cdxbasics.subdir import SubDir
2107
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2108
+
2109
+ @cache.cache("0.1")
2110
+ def f(x,y):
2111
+ return x*y
2112
+
2113
+ _ = f(1,2) # function gets computed and the result cached
2114
+ _ = f(1,2) # restore result from cache
2115
+ _ = f(2,2) # different parameters: compute and store result
2116
+
2117
+ Another function g which calls f, and whose version therefore on f's version:
2118
+
2119
+ from cdxbasics.subdir import SubDir
2120
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2121
+
2122
+ @cache.cache("0.1", dependencies=[f])
2123
+ def g(x,y):
2124
+ return g(x,y)**2
2125
+
2126
+ A function may have non-functional parameters which do not alter the function's outcome.
2127
+ An example are 'debug' flags:
2128
+
2129
+ from cdxbasics.subdir import SubDir
2130
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2131
+
2132
+ @cache.cache("0.1", dependencies=[f], exclude_args='debug')
2133
+ def g(x,y,debug): # <-- debug is a non-functional parameter
2134
+ if debug:
2135
+ print(f"h(x={x},y={y})")
2136
+ return g(x,y)**2
2137
+
2138
+ You can systematically define certain types as non-functional for *all* functions wrapped
2139
+ by this SubDir by specifying the respective parameter for the CacheController() in SubDir.__init__().
2140
+
2141
+ The Unique Call ID of a functions is by default generated by its fully qualified name
2142
+ and a unique hash of its functional parameters.
2143
+ This can be made more readable by using id=
2144
+
2145
+ from cdxbasics.subdir import SubDir
2146
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2147
+
2148
+ @cache.cache("0.1", id="f({x},{y}") # <- using a string to be passed to str.format()
2149
+ def f(x,y):
2150
+ return x*y
2151
+
2152
+ You can also use functions:
2153
+
2154
+ from cdxbasics.subdir import SubDir
2155
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2156
+
2157
+ # Using a function 'id'. Note the **_ to catch uninteresting parameters, here 'debug'
2158
+ @cache.cache("0.1", id=lambda x,y,**_: f"h({x},{y})", exclude_args='debug')
2159
+ def h(x,y,debug=False):
2160
+ if debug:
2161
+ print(f"h(x={x},y={y})")
2162
+ return x*y
2163
+
2164
+ Note that by default it is not assumed that the call Id returned by id is unique,
2165
+ and a hash generated from all pertinent arguments will be generated.
2166
+ That is why in the previous example we still need to exclude_args 'debug' here.
2167
+
2168
+ If the id you generate is guaranteed to be unique for all functional parameter values,
2169
+ you can add unique=True. In this case the filename of the function
2170
+
2171
+ from cdxbasics.subdir import SubDir
2172
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2173
+
2174
+ # Using a function 'id' with 'unique' to generate a unique ID.
2175
+ @cache.cache("0.1", id=lambda x,y,**_: f"h({x},{y})", unique=True)
2176
+ def h(x,y,debug=False):
2177
+ if debug:
2178
+ print(f"h(x={x},y={y})")
2179
+ return x*y
2180
+
2181
+ Numpy/Panda
2182
+ -----------
2183
+ Numpy/Panda data should not be hashed for identifying unique call IDs.
2184
+ Instead, use the defining characteristics for generating the data frames.
2185
+
2186
+ For example:
2187
+
2188
+ from cdxbasics.subdir import SubDir
2189
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2190
+
2191
+ from cdxbasics.prettydict import pdct
2192
+
2193
+ @cache.cache("0.1")
2194
+ def load_src( src_def ):
2195
+ result = ... load ...
2196
+ return result
2197
+
2198
+ # ignore 'src_result'. It is uniquely identified by 'src_def' -->
2199
+ @cache.cache("0.1", dependencies=[load_src], exclude_args=['data'])
2200
+ def statistics( stats_def, src_def, data ):
2201
+ stats = ... using data
2202
+ return stats
2203
+
2204
+ src_def = pdct()
2205
+ src_def.start = "2010-01-01"
2206
+ src_def.end = "2025-01-01"
2207
+ src_def.x = 0.1
2208
+
2209
+ stats_def = pdct()
2210
+ stats_def.lambda = 0.1
2211
+ stats_def.window = 100
2212
+
2213
+ data = load_src( src_def )
2214
+ stats = statistics( stats_def, src_def, data )
2215
+
2216
+ While instructive, this case is not optimal: we do not really need to load 'data'
2217
+ if we can reconstruct 'stats' from 'data' (unless we need 'data' further on).
2218
+
2219
+ Consider therefore
2220
+
2221
+ @cache.cache("0.1")
2222
+ def load_src( src_def ):
2223
+ result = ... load ...
2224
+ return result
2225
+
2226
+ # ignore 'src_result'. It is uniquely identified by 'src_def' -->
2227
+ @cache.cache("0.1", dependencies=[load_src])
2228
+ def statistics_only( stats_def, src_def ):
2229
+ data = load_src( src_def ) # <-- embedd call to load_src() here
2230
+ stats = ... using src_result
2231
+ return stats
2232
+
2233
+ stats = statistics_only( stats_def, src_def )
2234
+
2235
+ Member functions
2236
+ ----------------
2237
+ You can cache member functions like any other function.
2238
+ Note that version information are by default inherited, i.e. member functions will be dependent on the version of their
2239
+ defining class, and class versions will be dependent on their base classes' versions.
2240
+
2241
+ from cdxbasics.subdir import SubDir, version
2242
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2243
+
2244
+ @version("0.1")
2245
+ class A(object):
2246
+ def __init__(self, x):
2247
+ self.x = x
2248
+
2249
+ @cache.cache("0.1")
2250
+ def f(self, y):
2251
+ return self.x*y
2252
+
2253
+ a = A(x=1)
2254
+ _ = a.f(y=1) # compute f and store result
2255
+ _ = a.f(y=1) # load result back from disk
2256
+ a.x = 2
2257
+ _ = a.f(y=1) # 'a' changed: compute f and store result
2258
+ b = A(x=2)
2259
+ _ = b.f(y=1) # same unique call ID as previous call -> restore result from disk
2260
+
2261
+ **WARNING**
2262
+ The hashing function used -- cdxbasics.util.uniqueHashExt() -- does by default *not* process members of objects or dictionaries
2263
+ which start with a "_". This behaviour can be changed using CacheController().
2264
+ For reasonably complex objects it is recommended to implement:
2265
+ __unique_hash__( self, length : int, parse_functions : bool, parse_underscore : str )
2266
+ (it is also possible to simply set this value to a string constant).
2267
+
2268
+ Bound Member Functions
2269
+ ----------------------
2270
+ Note that above is functionally different to decorating a bound member function:
2271
+
2272
+ from cdxbasics.subdir import SubDir, version
2273
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2274
+
2275
+ class A(object):
2276
+ def __init__(self,x):
2277
+ self.x = x
2278
+ def f(self,y):
2279
+ return self.x*y
2280
+
2281
+ a = A(x=1)
2282
+ f = cache.cache("0.1", id=lambda self, y : f"a.f({y})")(a.f) # <- decorate bound 'f'.
2283
+ r = c(y=2)
2284
+
2285
+ In this case the function 'f' is bound to 'a'. The object is added as 'self' to the function
2286
+ parameter list even though the bound function parameter list does not include 'self'.
2287
+ This, together with the comments on hashing objects above, ensures that (hashed) changes to 'a' will
2288
+ be reflected in the unique call ID for the member function.
2289
+
2290
+ Classes
2291
+ -------
2292
+ Classes can also be cached.
2293
+ This is done in two steps: first, the class itself is decorated to provide version information at its own level.
2294
+ Secondly, decorate __init__ which also helps to define the unique call id. You do not need to specify a version
2295
+ for __init__ as its version usually coincides with the version of the class.
2296
+
2297
+ Simple example:
2298
+
2299
+ cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2300
+
2301
+ @cache.cache("0.1")
2302
+ class A(object):
2303
+
2304
+ @cache.cache(exclude_args=['debug'])
2305
+ def __init__(self, x, debug):
2306
+ if debug:
2307
+ print("__init__",x)
2308
+ self.x = x
2309
+
2310
+ __init__ does not actually return a value; for this reason the actual function decorated will be __new__.
2311
+ Attempting to cache decorate __new__ will lead to an exception.
2312
+
2313
+ A nuance for __init__ vs ordinary member function is that 'self' is non-functional.
2314
+ It is therefore automatically excluded from computing a unique call ID.
2315
+ Specifically, 'self' is not part of the arguments passed to 'id':
2316
+
2317
+ @cache.cache("0.1")
2318
+ class A(object):
2319
+
2320
+ @cache.cache("0.1", id=lambda x, debug: f"A.__init__(x={x})") # <-- 'self' is not passed to the lambda function; no need to add **_
2321
+ def __init__(self, x, debug):
2322
+ if debug:
2323
+ print("__init__",x)
2324
+ self.x = x
2325
+
2326
+ Decorating classes with __slots__ does not yet work.
2327
+
2328
+ Non-functional parameters
2329
+ -------------------------
2330
+ Often functions have parameters which do not alter the output of the function but control i/o or other aspects of the overall environment.
2331
+ An example is a function parameter 'debug':
2332
+
2333
+ def f(x,y,debug=False):
2334
+ z = x*y
2335
+ if not debug:
2336
+ print(f"x={x}, y={y}, z={z}")
2337
+ return z
2338
+
2339
+ To specify which parameters are pertinent for identiying a unique id, use:
2340
+
2341
+ a) include_args: list of functions arguments to include. If None, use all as input in the next step
2342
+ b) exclude_args: list of funciton arguments to exclude, if not None.
2343
+ c) exclude_arg_types: a list of types to exclude. This is helpful if control flow is managed with dedicated data types.
2344
+ An example of such a type is cdxbasics.verbose.Context which is used to print hierarchical output messages.
2345
+ Types can be globally excluded using the CacheController.
2346
+
2347
+ See also
2348
+ --------
2349
+ For project-wide use it is usually inconvenient to control caching at the level of a 'directory'.
2350
+ See VersionedCacheRoot() is a thin wrapper around a SubDir with a CacheController.
2351
+
2352
+ Parameters
2353
+ ----------
2354
+ version : str, optional
2355
+ Version of the function.
2356
+ * If None then F must be decorated with cdxbasics.version.version().
2357
+ * If set, the function F is first decorated with cdxbasics.version.version().
2358
+ dependencies : list, optional
2359
+ List of version dependencies
2360
+
2361
+ id : str, Callable
2362
+ Create a call label for the function call and its parameters.
2363
+ See above for a description.
2364
+ * A plain string without {} formatting: this is the fully qualified id
2365
+ * A string with {} formatting: id.str( name=name, **parameters ) will be used to generate the fully qualified id
2366
+ * A Callable, in which case id( name=name, **parameters ) will be used to generate the fully qualified id
2367
+
2368
+ unique : bool
2369
+ Whether the 'id' generated by 'id' is unique for this function call with its parameters.
2370
+ If True, then the function will attempt to use 'id' as filename as long as it has no invalid characters and is short
2371
+ enough (see 'max_filename_length').
2372
+ If False, the function will append to the 'id' a unique hash of the qualified function name and all pertinent parameters
2373
+
2374
+ name : str
2375
+ The name of the function, or None for using the fully qualified function name.
2376
+
2377
+ include_args : list[str]
2378
+ List of arguments to include in generating a unqiue id, or None for all.
2379
+
2380
+ exclude_args : list[str]:
2381
+ List of argumernts to exclude
2382
+
2383
+ exclude_arg_types : list[type]
2384
+ List of types to exclude.
2385
+
2386
+ version_auto_class : bool
2387
+
2388
+
2389
+
2390
+ Returns
2391
+ -------
2392
+ A callable to execute F if need be.
2393
+ This callable has a member 'cache_info' which can be used to access information on caching activity.
2394
+
2395
+ Information available at any time after decoration:
2396
+ F.cache_info.name : qualified name of the function
2397
+ F.cache_info.signature : signature of the function
2398
+
2399
+ Additonal information available during a call to a decorated function F, and thereafter:
2400
+ F.cache_info.version : unique version string reflecting all dependencies.
2401
+ F.cache_info.uid : unique call ID.
2402
+ F.cache_info.label : last id generated, or None (if id was a string and unique was True)
2403
+ F.cache_info.arguments : arguments parsed to create a unique call ID, or None (if id was a string and unique was True)
2404
+
2405
+ Additonal information available after a call to F:
2406
+ F.cache_info.last_cached : whether the last function call returned a cached object
2407
+
2408
+ The function F has additional function parameters
2409
+ override_cache_mode : allows to override caching mode temporarily, in particular "off"
2410
+ track_cached_files : pass a CacheTracker object to keep track of all files used (loaded from or saved to).
2411
+ This can be used to delete intermediary files when a large operation was completed.
2412
+ """
2413
+ return CacheCallable(subdir = self,
2414
+ version = version,
2415
+ dependencies = dependencies,
2416
+ label = label,
2417
+ uid = uid,
2418
+ name = name,
2419
+ exclude_args = exclude_args,
2420
+ include_args = include_args,
2421
+ exclude_arg_types = exclude_arg_types,
2422
+ version_auto_class = version_auto_class )
2423
+
2424
+ def cache_class( self,
2425
+ version : str = None , *,
2426
+ name : str = None,
2427
+ dependencies : list = None,
2428
+ version_auto_class : bool = True
2429
+ ):
2430
+ """
2431
+ Short cut for SubDir.cache() for classes
2432
+ See SubDir.cache() for documentation.
2433
+ """
2434
+ return self.cache( name=name,
2435
+ version=version,
2436
+ dependencies=dependencies,
2437
+ version_auto_class=version_auto_class)
2438
+
2439
+
2440
+ def _ensure_has_version( F,
2441
+ version : str = None,
2442
+ dependencies : list = None,
2443
+ auto_class : bool = True,
2444
+ allow_default: bool = False):
2445
+ """
2446
+ Sets a version if requested, or ensures one is present
2447
+ """
2448
+ if version is None and not dependencies is None:
2449
+ raise ValueError(f"'{F.__qualname__}: you cannot specify version 'dependencies' without specifying also a 'version'")
2450
+
2451
+ version_info = getattr(F,"version", None)
2452
+ if not version_info is None and type(version_info).__name__ != Version.__name__:
2453
+ raise RuntimeError(f"'{F.__qualname__}: has a 'version' member, but it is not of class 'Version'. Found '{type(version_info).__name__}'")
2454
+
2455
+ if version is None:
2456
+ if not version_info is None:
2457
+ return F
2458
+ if allow_default:
2459
+ version = "0"
2460
+ else:
2461
+ raise ValueError(f"'{F.__qualname__}': cannot determine version. Specify 'version'")
2462
+ elif not version_info is None:
2463
+ raise ValueError(f"'{F.__qualname__}: function already has version information; cannot set version '{version}' again")
2464
+ return version_decorator( version=version,
2465
+ dependencies=dependencies,
2466
+ auto_class=auto_class)(F)
2467
+
2468
+ def _qualified_name( F, name ):
2469
+ """
2470
+ Return qualified name including module name, robustly
2471
+ """
2472
+ if name is None:
2473
+ try:
2474
+ name = F.__qualname__
2475
+ except:
2476
+ try:
2477
+ name = F.__name__
2478
+ finally:
2479
+ pass
2480
+ verify( not name is None, "Cannot determine qualified name for 'F': it has neither __qualname__ nor a type with a name. Please specify 'name'", exception=RuntimeError)
2481
+ try:
2482
+ name = name + "@" + F.__module__
2483
+ except:
2484
+ warn( f"Cannot determine module name for '{name}' of {type(F)}" )
2485
+ return name
2486
+
2487
+ class CacheCallable(object):
2488
+ """
2489
+ Utility class for SubDir.cache_callable.
2490
+ See documentation for that function.
2491
+ """
2492
+
2493
+ def __init__(self,
2494
+ subdir : SubDir, *,
2495
+ version : str = None,
2496
+ dependencies : list,
2497
+ label : Callable = None,
2498
+ uid : Callable = None,
2499
+ name : str = None,
2500
+ exclude_args : set[str] = None,
2501
+ include_args : set[str] = None,
2502
+ exclude_arg_types : set[type] = None,
2503
+ version_auto_class : bool = True,
2504
+ name_of_name_arg : str = "name"):
2505
+ """
2506
+ Utility class for SubDir.cache_callable.
2507
+ See documentation for that function.
2508
+ """
2509
+ if not label is None and not uid is None:
2510
+ error("Cannot specify both 'label' and 'uid'.")
2511
+
2512
+ self._subdir = SubDir(subdir)
2513
+ self._version = str(version) if not version is None else None
2514
+ self._dependencies = list(dependencies) if not dependencies is None else None
2515
+ self._label = label
2516
+ self._uid = uid
2517
+ self._name = str(name) if not name is None else None
2518
+ self._exclude_args = set(exclude_args) if not exclude_args is None and len(exclude_args) > 0 else None
2519
+ self._include_args = set(include_args) if not include_args is None and len(include_args) > 0 else None
2520
+ self._exclude_arg_types = set(exclude_arg_types) if not exclude_arg_types is None and len(exclude_arg_types) > 0 else None
2521
+ self._version_auto_class = bool(version_auto_class)
2522
+ self._name_of_name_arg = str(name_of_name_arg)
2523
+
2524
+ @property
2525
+ def uid_or_label(self) -> Callable:
2526
+ return self._uid if self._label is None else self._label
2527
+ @property
2528
+ def unique(self) -> bool:
2529
+ return not self._uid is None
2530
+
2531
+ @property
2532
+ def cacheController(self) -> CacheController:
2533
+ """ Returns the cache controller """
2534
+ return self._subdir.cacheController
2535
+ @property
2536
+ def cache_mode(self) -> Context:
2537
+ return self.cacheController.cache_mode
2538
+ @property
2539
+ def debug_verbose(self) -> Context:
2540
+ return self.cacheController.debug_verbose
2541
+ @property
2542
+ def uniqueNamedFileName(self) -> Callable:
2543
+ return self.cacheController.uniqueNamedFileName
2544
+ @property
2545
+ def uniqueLabelledFileName(self) -> Callable:
2546
+ return self.cacheController.uniqueLabelledFileName
2547
+ @property
2548
+ def global_exclude_arg_types(self) -> list[type]:
2549
+ return self.cacheController.exclude_arg_types
2550
+
2551
+ def __call__(self, F : Callable):
2552
+ """
2553
+ Decorate 'F' as cachable callable. Can also decorate classes via ClassCallable()
2554
+ See SubDir.cache() for documentation.
2555
+ """
2556
+ if inspect.isclass(F):
2557
+ if not self._label is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'label' for __init__, not the class")
2558
+ if not self._uid is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'uid' for __init__, not the class")
2559
+ if not self._exclude_args is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'exclude_args' for __init__, not the class")
2560
+ if not self._include_args is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'include_args' for __init__, not the class")
2561
+ if not self._exclude_arg_types is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'exclude_arg_types' for __init__, not the class")
2562
+ return self._wrap_class(F)
2563
+
2564
+ return self._wrap( F )
2565
+
2566
+ def _wrap_class(self, C : type):
2567
+ """
2568
+ Wrap class
2569
+ This wrapper:
2570
+ 1) Assigns a cdxbasics.version.version() for the class (if not yet present)
2571
+ 2) Extracts from __init__ the wrapper to decorate __new__
2572
+ """
2573
+ debug_verbose = self.cacheController.debug_verbose
2574
+
2575
+ assert not inspect.isclass(C), ("Not a class", C)
2576
+
2577
+ # apply decorator provided for __init__ to __new__
2578
+ C__init__ = getattr(C, "__init__", None)
2579
+ if C__init__ is None:
2580
+ raise RuntimeError("'{F.__qualname__}': define and decorate __init__")
2581
+ init_cache_callable = getattr(C__init__, "init_cache_callable", None)
2582
+ if init_cache_callable is None:
2583
+ raise RuntimeError("'{F.__qualname__}': must also decorate __init__")
2584
+ assert type(init_cache_callable).__name__ == CacheCallable.__name__, (f"*** Internal error: '{C.__qualname__}': __init__ has wrong type for 'init_cache_callable': {type(init_cache_callable)} ?")
2585
+
2586
+ C__init__.init_cache_callable = None # tell the __init__ wrapper we have processed this information
2587
+
2588
+ C__new__ = C.__new__
2589
+ class_parameter = list(inspect.signature(C__new__).parameters)[0]
2590
+ init_cache_callable._exclude_args = {class_parameter} if init_cache_callable._exclude_args is None else ( init_cache_callable._exclude_args | {class_parameter})
2591
+ init_cache_callable._name = _qualified_name( C, self._name ) if init_cache_callable._name is None else init_cache_callable._name
2592
+
2593
+ C.__new__ = init_cache_callable._wrap( C__new__, is_new = True )
2594
+ C.__new__.cache_info.signature = inspect.signature(C__init__) # signature of the function
2595
+
2596
+ # apply version
2597
+ # this also ensures that __init__ picks up a version dependency on the class itse
2598
+ # (as we forceed 'auto_class' to be true)
2599
+ C = _ensure_has_version( C, version=self._version,
2600
+ dependencies=self._dependencies,
2601
+ auto_class=self._version_auto_class)
2602
+
2603
+ if not debug_verbose is None:
2604
+ debug_verbose.write(f"cache_class({C.__qualname__}): class wrapped; class parameter '{class_parameter}' to __new__ will be ignored.")
2605
+
2606
+ return C
2607
+
2608
+ def _wrap(self, F : Callable, is_new : bool = False):
2609
+ """
2610
+ Decorate callable 'F'.
2611
+ """
2612
+
2613
+ debug_verbose = self.cacheController.debug_verbose
2614
+ assert not inspect.isclass(F), ("Internal error")
2615
+
2616
+ # check validity
2617
+ # --------------
2618
+ # Cannot currently decorate classes.
2619
+
2620
+
2621
+ is_method = inspect.ismethod(F)
2622
+ if is_method:
2623
+ assert not getattr(F, "__self__", None) is None, ("Method type must have __self__...?", F.__qualname__ )
2624
+ elif not inspect.isfunction(F):
2625
+ # if F is neither a function or class, attempt to decorate (bound) __call__
2626
+ if not callable(F):
2627
+ raise ValueError(f"{F.__qualname__}' is not callable")
2628
+ F_ = getattr(F, "__call__", None)
2629
+ if F_ is None:
2630
+ raise ValueError(f"{F.__qualname__}' is callable, but has no '__call__'. F is of type {type(F)}")
2631
+ if not debug_verbose is None:
2632
+ debug_verbose.write(f"cache({F.__qualname__}): 'F' is an object; will use bound __call__")
2633
+ F = F_
2634
+ del F_
2635
+ else:
2636
+ # __new__ should not be decorated manually
2637
+ if not is_new and F.__name__ == "__new__":
2638
+ raise ValueError(f"You cannot decorate __new__ of '{F.__qualname__}'.")
2639
+
2640
+ # handle __init__
2641
+ # ---------------
2642
+
2643
+ if F.__name__ == "__init__":
2644
+ # the decorate __init__ has two purposes
2645
+ # 1) during initializaton keep ahold of 'self' which will be the decorator for __new__ in fact
2646
+ # 2) during runtime, deciding based upon '__new__' caching status wherer to run the original __init__
2647
+
2648
+ def execute_init( self, *args, **kwargs ):
2649
+ """
2650
+ Overwriting __init__ directly does not work as __init__ does not return anything.
2651
+ """
2652
+ # ensure '__new__' was processed.
2653
+ # this will happen when the class is wrapped
2654
+ if not execute_init.init_cache_callable is None:
2655
+ raise RuntimeError(f"Class '{type(self).__qualname__}': __init__ was decorated for caching but it seems the class '{type(self).__qualname__}' was not decorated, too.")
2656
+
2657
+ __magic_cache_call_init__ = getattr(self, "__magic_cache_call_init__", None)
2658
+ assert not __magic_cache_call_init__ is None, ("*** Internal error: __init__ called illegally")
2659
+
2660
+ if __magic_cache_call_init__:
2661
+ # call __init__
2662
+ F( self, *args, **kwargs )
2663
+ #if not debug_verbose is None:
2664
+ # debug_verbose.write(f"cache({type(self).__qualname__}): __init__ called")
2665
+ else:
2666
+ pass
2667
+ # do not call __init___
2668
+ #if not debug_verbose is None:
2669
+ # debug_verbose.write(f"cache({type(self).__qualname__}): __init__ skipped")
2670
+ self.__magic_cache_call_init__ = None
2671
+
2672
+ update_wrapper( wrapper=execute_init, wrapped=F )
2673
+
2674
+ # for class decorator to pick up.
2675
+ # ClassCallable() will set this to None before excecute_init
2676
+ # is called (ie before the first object is created)
2677
+ execute_init.init_cache_callable = self
2678
+ return execute_init
2679
+
2680
+ # version
2681
+ # -------
2682
+ # Decorate now or pick up existing @version
2683
+
2684
+ F = _ensure_has_version( F, version=self._version,
2685
+ dependencies=self._dependencies,
2686
+ auto_class=self._version_auto_class,
2687
+ allow_default=is_new )
2688
+
2689
+ # name
2690
+ # ----
2691
+
2692
+ name = _qualified_name( F, self._name )
2693
+
2694
+ # any other function
2695
+ # ------------------
2696
+
2697
+ exclude_types = ( self._exclude_arg_types if not self._exclude_arg_types is None else set() )\
2698
+ | ( self.global_exclude_arg_types if not self.global_exclude_arg_types is None else set())
2699
+
2700
+ def execute( *args, override_cache_mode : CacheMode = None,
2701
+ track_cached_files : CacheTracker = None,
2702
+ **kwargs ):
2703
+ """
2704
+ Cached execution of the wrapped function
2705
+ """
2706
+
2707
+ if is_new:
2708
+ # if 'F' is __new__ then we might need to turn off all caching when deserializing cached objects from disk
2709
+ if execute.__new_during_read:
2710
+ return F(*args, **kwargs)
2711
+
2712
+ # determine unique id_ for this function call
2713
+ # -------------------------------------------
2714
+
2715
+ label = None
2716
+ uid = None
2717
+ uid_or_label = self.uid_or_label
2718
+ if isinstance(uid_or_label, str) and self.unique:
2719
+ # if 'id' does not contain formatting codes, and the result is 'unique' then do not bother collecting
2720
+ # function arguments
2721
+ try:
2722
+ uid = uid_or_label.format() # throws a KeyError if 'id' contains formatting information
2723
+ except KeyError:
2724
+ pass
2725
+
2726
+ if not uid is None:
2727
+ # generate name with the unique string provided by the user
2728
+ label = uid
2729
+ uid = self.uniqueLabelledFileName( self.id )
2730
+ arguments = None
2731
+
2732
+ else:
2733
+ # get dictionary of named arguments
2734
+ arguments = execute.cache_info.signature.bind(*args,**kwargs)
2735
+ arguments.apply_defaults()
2736
+ arguments = arguments.arguments # ordered dict
2737
+
2738
+ if is_new:
2739
+ # delete 'cls' from argument list
2740
+ assert len(arguments) >= 1, ("*** Internal error", F.__qualname__, is_new, arguments)
2741
+ del arguments[list(arguments)[0]]
2742
+ argus = set(arguments)
2743
+
2744
+ # filter dictionary
2745
+ if not self._exclude_args is None or not self._include_args is None:
2746
+ excl = set(self._exclude_args) if not self._exclude_args is None else set()
2747
+ if not self._exclude_args is None:
2748
+ if self._exclude_args > argus:
2749
+ raise ValueError(f"{name}: 'exclude_args' contains unknown argument names: exclude_args {sorted(self._exclude_args)} while argument names are {sorted(argus)}.")
2750
+ if not self._include_args is None:
2751
+ if self._include_args > argus:
2752
+ raise ValueError(f"{name}: 'include_args' contains unknown argument names: include_args {sorted(self._iinclude_args)} while argument names are {sorted(argus)}.")
2753
+ excl = argus - self._iinclude_args
2754
+ if not self._exclude_args is None:
2755
+ excl |= self._exclude_args
2756
+ for arg in excl:
2757
+ if arg in arguments:
2758
+ del arguments[arg]
2759
+ del excl
2760
+
2761
+ if len(exclude_types) > 0:
2762
+ excl = []
2763
+ for k, v in arguments.items():
2764
+ if type(v) in exclude_types or type(v).__name__ in exclude_types:
2765
+ excl.append( k )
2766
+ for arg in excl:
2767
+ if arg in arguments:
2768
+ del arguments[arg]
2769
+
2770
+ # apply logics
2771
+ if uid_or_label is None:
2772
+ label = name
2773
+
2774
+ else:
2775
+ if self._name_of_name_arg in arguments:
2776
+ error(f"{name}: '{self._name_of_name_arg}' is a reserved keyword and used as parameter name for the function name. Found it also in the function parameter list. Use 'name_of_name_arg' to change the internal parameter name used.")
2777
+
2778
+ # add standard arguments
2779
+ full_arguments = OrderedDict()
2780
+ if is_method:
2781
+ assert not 'self' in set(arguments), ("__self__ found in bound method argument list...?", F.__qualname__, execute.cache_info.signature.bind(*args,**kwargs).arguments )
2782
+ full_arguments['self'] = F.__self__
2783
+ full_arguments[self._name_of_name_arg] = name
2784
+ for k,v in arguments.items():
2785
+ full_arguments[k] = v
2786
+ arguments = full_arguments
2787
+ del full_arguments, k, v
2788
+
2789
+ # call format or function
2790
+ if isinstance( uid_or_label, str ):
2791
+ try:
2792
+ label = str.format( uid_or_label, **arguments )
2793
+ except KeyError as e:
2794
+ raise KeyError(e, f"Error while generating id for '{name}' using format string '{uid_or_label}': {e}. Available arguments: {list(arguments)}")
2795
+
2796
+ else:
2797
+ which = 'uid' if not self._uid is None else 'label'
2798
+ try:
2799
+ label = uid_or_label(**arguments)
2800
+ except TypeError as e:
2801
+ raise TypeError(e, f"Error while generating '{which}' for '{name}' using a function: {e}. Available arguments: {list(arguments)}")
2802
+ except Exception as e:
2803
+ raise type(e)(f"Error while generating '{which}' for '{name}': attempt to call '{which}' of type {type(uid_or_label)} failed: {e}")
2804
+ assert isinstance(label, str), ("Error:", which,"callable must return a string. Found",type(label))
2805
+
2806
+ if self.unique:
2807
+ uid = self.uniqueLabelledFileName( label )
2808
+ else:
2809
+ uid = self.uniqueNamedFileName( label, **arguments )
2810
+
2811
+ # determine version, cache mode
2812
+ # ------------------
2813
+
2814
+ version_ = self._version if not self._version is None else F.version.unique_id64
2815
+ cache_mode = CacheMode(override_cache_mode) if not override_cache_mode is None else self.cache_mode
2816
+ del override_cache_mode
2817
+
2818
+ # store process information
2819
+ # -------------------------
2820
+
2821
+ execute.cache_info.label = str(label) if not label is None else None
2822
+ execute.cache_info.uid = uid
2823
+ execute.cache_info.version = version_
2824
+
2825
+ if self.cacheController.keep_last_arguments:
2826
+ info_arguments = OrderedDict()
2827
+ for argname, argvalue in arguments.items():
2828
+ info_arguments[argname] = str(argvalue)[:100]
2829
+ execute.cache_info.arguments = info_arguments
2830
+ del argname, argvalue
2831
+
2832
+ # execute caching
2833
+ # ---------------
2834
+
2835
+ if cache_mode.delete:
2836
+ self._subdir.delete( uid )
2837
+ elif cache_mode.read:
2838
+ class Tag:
2839
+ pass
2840
+ tag = Tag()
2841
+ if not is_new:
2842
+ r = self._subdir.read( uid, tag, version=version_ )
2843
+ else:
2844
+ try:
2845
+ execute.__new_during_read = True
2846
+ r = self._subdir.read( uid, tag, version=version_ )
2847
+ finally:
2848
+ execute.__new_during_read = False
2849
+
2850
+ if not r is tag:
2851
+ if not track_cached_files is None:
2852
+ track_cached_files += self._fullFileName(uid)
2853
+ execute.cache_info.last_cached = True
2854
+ if not debug_verbose is None:
2855
+ debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.fullFileName(uid)}'.")
2856
+ if is_new:
2857
+ assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__", F.__qualname__, label)
2858
+ r.__magic_cache_call_init__ = False # since we called __new__, __init__ will be called next
2859
+
2860
+ return r
2861
+
2862
+ r = F(*args, **kwargs)
2863
+
2864
+ if is_new:
2865
+ # __new__ created the object, but __init__ was not called yet to initialize it
2866
+ # we simulate this here
2867
+ cls = args[0]
2868
+ assert not cls is None and inspect.isclass(cls), ("*** Internal error", cls)
2869
+ r.__magic_cache_call_init__ = True
2870
+ cls.__init__( r, *args[1:], **kwargs )
2871
+ assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__")
2872
+
2873
+ if cache_mode.write:
2874
+ self._subdir.write(uid,r,version=version_)
2875
+ if not track_cached_files is None:
2876
+ track_cached_files += self._subdir.fullFileName(uid)
2877
+ execute.cache_info.last_cached = False
2878
+
2879
+ if is_new:
2880
+ assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__")
2881
+ r.__magic_cache_call_init__ = False # since we called __new__, __init__ will be called next
2882
+ #debug_verbose.write(f"cache({name}): called __init__ after __new__ with: {args[1:]} / {kwargs}")
2883
+
2884
+ if not debug_verbose is None:
2885
+ if cache_mode.write:
2886
+ debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.fullFileName(uid)}'.")
2887
+ else:
2888
+ debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.fullFileName(uid)}'.")
2889
+ return r
2890
+
2891
+ update_wrapper( wrapper=execute, wrapped=F )
2892
+ execute.cache_info = CacheInfo()
2893
+
2894
+ execute.cache_info.name = name # decoded name of the function
2895
+ execute.cache_info.signature = inspect.signature(F) # signature of the function
2896
+
2897
+ execute.cache_info.uid = None # last function call ID
2898
+ execute.cache_info.label = None # last unique file name cached to
2899
+ execute.cache_info.version = None # last version used
2900
+
2901
+ execute.cache_info.last_cached = None # last function call restored from disk?
2902
+
2903
+ if self.cacheController.keep_last_arguments:
2904
+ execute.cache_info.arguments = None # last function call arguments dictionary of strings
2905
+
2906
+ if is_new:
2907
+ execute.__new_during_read = False
2908
+
2909
+ if not debug_verbose is None:
2910
+ debug_verbose.write(f"cache({name}): {'function' if not is_new else 'class constructor function'} registered for caching into '{self._subdir.path}'.")
2911
+ self.cacheController.versioned[name] = execute
2912
+ return execute
2913
+
2914
+ def VersionedCacheRoot( directory : str, *,
2915
+ ext : str = None,
2916
+ fmt : Format = None,
2917
+ createDirectory : bool = None,
2918
+ **controller_kwargs
2919
+ ):
2920
+ """
2921
+ Create a root directory for versioning caching on disk
2922
+
2923
+ Usage:
2924
+ In a central file, define a root directory
2925
+ vroot = VersionedCacheRoot("!/cache")
2926
+
2927
+ and a sub-directory
2928
+ vtest = vroot("test")
2929
+
2930
+ @vtest.cache("1.0")
2931
+ def f1( x=1, y=2 ):
2932
+ print(x,y)
2933
+
2934
+ @vtest.cache("1.0", dps=[f1])
2935
+ def f2( x=1, y=2, z=3 ):
2936
+ f1( x,y )
2937
+ print(z)
2938
+
2939
+ Parameters
2940
+ ----------
2941
+ directory : name of the directory. Using SubDir the following short cuts are supported:
2942
+ "!/dir" creates 'dir' in the temporary directory
2943
+ "~/dir" creates 'dir' in the home directory
2944
+ "./dir" created 'dir' relative to the current directory
2945
+ ext : extension, which will automatically be appended to file names (see SubDir). Default depends on format. For Format.PICKLE it is 'pck'
2946
+ fmt : format, see SubDir.Format. Default is Format.PICKLE
2947
+ createDirectory : whether to create the directory upon creation. Default is no.
2948
+ controller_kwargs: parameters passed to VersionController, for example:
2949
+ exclude_arg_types : list of types or names of types to exclude when auto-generating function signatures from function arguments.
2950
+ A standard example from cdxbasics is "Context" as it is used to print progress messages.
2951
+ max_filename_length : maximum filename length
2952
+ hash_length: length used for hashes, see cdxbasics.util.uniqueHash()
2953
+
2954
+ Returns
2955
+ -------
2956
+ A root cache directory
2957
+ """
2958
+ controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
2959
+ return SubDir( directory=directory, ext=ext, fmt=fmt, createDirectory=createDirectory, controller=controller )
2960
+
2961
+ version = version_decorator
2962
+
2963
+