cdxcore 0.1.6__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cdxcore might be problematic. Click here for more details.

cdxcore/subdir.py CHANGED
@@ -1,61 +1,381 @@
1
1
  """
2
- subdir
3
- Simple class to keep track of directory sturctures and for automated caching on disk
4
- Hans Buehler 2020
2
+ Overview
3
+ --------
4
+
5
+ This module contains utilities for file i/o, directory management and
6
+ streamlined versioned caching.
7
+
8
+ The key idea is to provide transparent, concise :mod:`pickle` access to the file system
9
+ via the :class:`cdxcore.subdir.SubDir` class.
10
+
11
+ **Key design features:**
12
+
13
+ * Simple path construction via ``()`` operator. By default directories which do not exist yet
14
+ are only created upon writing a first file.
15
+
16
+ * Files managed by :class:`cdxcore.subdir.SubDir` all have the same extension.
17
+
18
+ * Files support fast versioning: the version of a file can be read without having to read the
19
+ entire file.
20
+
21
+ * :dec:`cdxcore.subdir.SubDir.cache` implements a convenient versioned caching framework.
22
+
23
+ Directories
24
+ ^^^^^^^^^^^
25
+
26
+ The core of the framework is the :class:`cdxcore.subdir.SubDir` class which represents a directory
27
+ with files of a given extension.
28
+
29
+ Simply write::
30
+
31
+ from cdxcore.subdir import SubDir
32
+ subdir = SubDir("my_directory") # relative to current working directory
33
+ subdir = SubDir("./my_directory") # relative to current working directory
34
+ subdir = SubDir("~/my_directory") # relative to home directory
35
+ subdir = SubDir("!/my_directory") # relative to default temp directory
36
+
37
+ Note that ``my_directoy`` will not be created if it does not exist yet. It will be created the first
38
+ time we write a file.
39
+
40
+ You can specify a parent for relative path names::
41
+
42
+ from cdxcore.subdir import SubDir
43
+ subdir = SubDir("my_directory", "~") # relative to home directory
44
+ subdir = SubDir("my_directory", "!") # relative to default temp directory
45
+ subdir = SubDir("my_directory", ".") # relative to current directory
46
+ subdir2 = SubDir("my_directory", subdir) # subdir2 is relative to `subdir`
47
+
48
+ Change the extension to "bin"::
49
+
50
+ from cdxcore.subdir import SubDir
51
+ subdir = SubDir("~/my_directory;*.bin")
52
+ subdir = SubDir("~/my_directory", ext="bin")
53
+ subdir = SubDir("my_directory", "~", ext="bin")
54
+
55
+ You can turn off extension management by setting the extension to ""::
56
+
57
+ from cdxcore.subdir import SubDir
58
+ subdir = SubDir("~/my_directory", ext="")
59
+
60
+ You can also use :meth:`cdxcore.subdir.SubDir.__call__` to generate sub directories.
61
+
62
+ from cdxcore.subdir import SubDir
63
+ parent = SubDir("~/parent")
64
+ subdir = parent("subdir")
65
+
66
+ Be aware that when the operator :meth:`cdxcore.subdir.SubDir.__call__`
67
+ is called with two keyword arguments, then it reads files.
68
+
69
+ You can obtain a list of all sub directories in a directory by using :meth:`cdxcore.subdir.SubDir.sub_dirs`.
70
+ The list of files with the corresponding extension is accessible via :meth:`cdxcore.subdir.SubDir.files`.
71
+
72
+ File Format
73
+ ^^^^^^^^^^^
74
+
75
+ :class:`cdxcore.subdir.SubDir` supports file i/o with a number of different file formats
76
+ via :class:`cdxcore.subdir.Format`.
77
+
78
+ * "PICKLE": standard pickling with default extension is "pck".
79
+
80
+ * "JSON_PICKLE": uses the :mod:`jsonpickle` package; default extension "jpck".
81
+ The advantage of this format over "PICKLE" is that it is somewhat human-readable.
82
+ However, ``jsonpickle`` uses compressed formats for complex objects such as :mod:`numpy`
83
+ arrays, hence readablility is somewhat limited. Using "JSON_PICKLE"
84
+ comes at cost of slower i/o speed.
85
+
86
+ * "JSON_PLAIN": calls :func:`cdxcore.util.plain` is used to generate human readable files
87
+ which cannot be loaded back from disk.
88
+ In this mode ``SubDir`` converts objects into plain Python objects before using :mod:`json`
89
+ to write them to disk.
90
+ That means that deserialized data does not have the correct object structure
91
+ to be able to restore files written in "JSON_PLAIN".
92
+ However, such files are much easier to read.
93
+
94
+ * "BLOSC" uses `blosc <https://github.com/blosc/python-blosc>`__
95
+ to read/write compressed binary data. The blosc compression algorithm is very fast,
96
+ hence using this mode will not usually lead to notably slower performanbce than using
97
+ "PICKLE" but will generate smaller files, depending on your data structure.
98
+
99
+ The default extension for "BLOSC" is "zbsc".
100
+
101
+ * "GZIP": uses :mod:`gzip` to
102
+ to read/write compressed binary data. The default extension is "pgz".
103
+
104
+ Summary of properties:
105
+
106
+
107
+ +--------------+------------------+----------------+-------+-------------+-----------+
108
+ | Format | Restores objects | Human readable | Speed | Compression | Extension |
109
+ +==============+==================+================+=======+=============+===========+
110
+ | PICKLE | yes | no | high | no | .pck |
111
+ +--------------+------------------+----------------+-------+-------------+-----------+
112
+ | JSON_PLAIN | no | yes | low | no | .json |
113
+ +--------------+------------------+----------------+-------+-------------+-----------+
114
+ | JSON_PICKLE | yes | limited | low | no | .jpck |
115
+ +--------------+------------------+----------------+-------+-------------+-----------+
116
+ | BLOSC | yes | no | high | yes | .zbsc |
117
+ +--------------+------------------+----------------+-------+-------------+-----------+
118
+ | GZIP | yes | no | high | yes | .pgz |
119
+ +--------------+------------------+----------------+-------+-------------+-----------+
120
+
121
+
122
+ You may specify the file format when instantiating :class:`cdxcore.subdir.SubDir`::
123
+
124
+ from cdxcore.subdir import SubDir
125
+ subdir = SubDir("~/my_directory", fmt=SubDir.PICKLE)
126
+ subdir = SubDir("~/my_directory", fmt=SubDir.JSON_PICKLE)
127
+ ...
128
+
129
+ If ``ext`` is not specified the extension will defaulted to
130
+ the respective default extension of the format requested.
131
+
132
+ Reading Files
133
+ ^^^^^^^^^^^^^
134
+
135
+ To read the data contained in a ``file`` from our subdirectory
136
+ with its reference extension use :meth:`cdxcore.subdir.SubDir.read`::
137
+
138
+ from cdxcore.subdir import SubDir
139
+ subdir = SubDir("!/test")
140
+
141
+ data = subdir.read("file") # returns the default `None` if file is not found
142
+ data = subdir.read("file", default=[]) # returns the default [] if file is not found
143
+
144
+ This function will return the "default"``" (which in turns defaults to ``None``)
145
+ if ``file.ext`` does not exist.
146
+ You can opt for :meth:`cdxcore.subdir.SubDir.read` to raise an error instead of returning a default
147
+ by using ``raise_on_error=True``::
148
+
149
+ data = subdir.read("file", raise_on_error=True) # raises 'KeyError' if not found
150
+
151
+ When calling ``read()`` you may specify an alternative extension::
152
+
153
+ data = subdir.read("file", ext="bin") # change extension to "bin"
154
+ data = subdir.read("file.bin", ext="") # no automatic extension
155
+
156
+ Specifying a different format for :meth:`cdxcore.subdir.SubDir.read` only changes
157
+ the extension automatically if you have not overwritten it before:
158
+
159
+ .. code-block:: python
160
+
161
+ subdir = SubDir("!/test") # default format PICKLE with extension pck
162
+ data = subdir.read("file", fmt=Subdir.JSON_PICKLE ) # uses "json" extension
163
+
164
+ subdir = SubDir("!/test", ext="bin") # user-specified extension
165
+ data = subdir.read("file", fmt=Subdir.JSON_PICKLE ) # keeps using "bin"
166
+
167
+ You can also use the :meth:`cdxcore.subdir.SubDir.__call__` to read files, in which case you must specify a default value
168
+ (if you don't, then the operator will return a sub directory)::
169
+
170
+ data = subdir("file", None) # returns None if file is not found
171
+
172
+ You can also use item notation to access files.
173
+ In this case, though, an error will be thrown if the file does not exist::
174
+
175
+ data = subdir['file'] # raises KeyError if file is not found
176
+
177
+ You can read a range of files in one function call::
178
+
179
+ data = subdir.read( ["file1", "file2"] ) # returns list
180
+
181
+ Finally, you can also iterate through all existing files using iterators::
182
+
183
+ # manual loading
184
+ for file in subdir:
185
+ data = subdir.read(file)
186
+ ...
187
+
188
+ # automatic loading, with "None" as a default
189
+ for file, data in subdir.items():
190
+ ...
191
+
192
+ To obtain a list of all files in our directory which have the correct extension, use :meth:`cdxcore.subdir.SubDir.files`.
193
+
194
+ Writing Files
195
+ ^^^^^^^^^^^^^
196
+
197
+ Writing files mirrors reading them::
198
+
199
+ from cdxcore.subdir import SubDir
200
+ subdir = SubDir("!/test")
201
+
202
+ subdir.write("file", data)
203
+ subdir['file'] = data
204
+
205
+ You may specifify different a extension::
206
+
207
+ subdir.write("file", data, ext="bin")
208
+
209
+ You can also specify a file :class:`cdxcore.subdir.Format`.
210
+ The extension will be changed automatically if you have not set it manually::
211
+
212
+ subdir = SubDir("!/test")
213
+ subdir.write("file", data, fmt=SubDir.JSON_PICKLE ) # will write to "file.json"
214
+
215
+ To write several files at once, write::
216
+
217
+ subdir.write(["file1", "file"], [data1, data2])
218
+
219
+ Note that when writing to a file, :meth:`cdxcore.subdir.SubDir.write`
220
+ will first write to a temporary file, and then rename this file into the target file name.
221
+ The temporary file name is generated by applying :func:`cdxcore.uniquehash.unique_hash48`
222
+ to the
223
+ target file name,
224
+ current time, process and thread ID, as well as the machines's UUID.
225
+ his is done to reduce collisions between processes/machines accessing the same files,
226
+ potentially accross a network.
227
+ It does not remove collision risk entirely, though.
228
+
229
+ Filenames
230
+ ^^^^^^^^^
231
+
232
+ :class:`cdxcore.subdir.SubDir` transparently handles directory access and extensions.
233
+ That means a user usually only uses ``file`` names which do not contain either.
234
+ To obtain the full qualified filename given a "file" use :meth:`cdxcore.subdir.SubDir.full_file_name`.
235
+
236
+ Reading and Writing Versioned Files
237
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
238
+
239
+ :class:`cdxcore.subdir.SubDir` supports versioned files.
240
+ If versions are used, then they *must* be used for both reading and writing.
241
+ :dec:`cdxcore.version.version` provides a standard decorator framework for definining
242
+ versions for classes and functions including the version dependencies.
243
+
244
+ If a ``version`` is provided to :func:`cdxcore.subdir.SubDir.write`
245
+ then ``SubDir`` will write the version in a block ahead of the main content of the file.
246
+ In case of the PICKLE format, this is a byte string. In case of JSON_PLAIN and JSON_PICKLE this is line of
247
+ text starting with ``#`` ahead of the file. (Note that this violates
248
+ the JSON file format.)
249
+
250
+ Writing short version block ahead of the main data allows :func:`cdxcore.subdir.SubDir.read`
251
+ reading this version information back quickly without needing to read the entire file.
252
+ ``read()`` does attempt so if its called with a ``version`` parameter.
253
+ In this case it will compare the read version with the provided version,
254
+ and only return the main content of the file if versions match.
255
+
256
+ Use :func:`cdxcore.subdir.SubDir.is_version` to check whether a given file has a specific version.
257
+ Like ``read()`` this function only reads the information required to obtain the information and will
258
+ be much faster than reading the whole file.
259
+
260
+ *Important:* note that if a file was written, it has to be read again with a test version.
261
+ You can specify ``version="*"`` for :func:`cdxcore.subdir.SubDir.read` to match any version.
262
+
263
+ **Examples:**
264
+
265
+ Writing a versioned file::
266
+
267
+ from cdxcore.subdir import SubDir
268
+ sub_dir = SubDir("!/test_version)
269
+ sub_dir.write("test", [1,2,3], version="0.0.1" )
270
+
271
+ To read ``[1,2,3]`` from "test" we need to use the correct version::
272
+
273
+ _ = sub_dir.read("test", version="0.0.1")
274
+
275
+ The following will not read "test" as the versions do not match::
276
+
277
+ _ = sub_dir.read("test", version="0.0.2")
278
+
279
+ By default :func:`cdxcore.subdir.SubDir.read`
280
+ will not fail if a version mismatch is encountered; rather it will
281
+ attempt to delete the file and then return the ``default`` value.
282
+
283
+ This can be turned off
284
+ with the keyword ``delete_wrong_version`` set to ``False``.
285
+
286
+ You can ignore the version used to write a file by using `*` as version::
287
+
288
+ _ = sub_dir.read("test", version="*")
289
+
290
+ Note that reading files which have been written with a ``version`` without
291
+ ``version`` keyword will fail because ``SubDir`` will only append additional version information
292
+ to the file if required.
293
+
294
+ Test existence of Files
295
+ ^^^^^^^^^^^^^^^^^^^^^^^
296
+
297
+ To test existence of 'file' in a directory, use one of::
298
+
299
+ subdir.exist('file')
300
+ 'file' in subdir
301
+
302
+ Deleting files
303
+ ^^^^^^^^^^^^^^
304
+
305
+ To delete a 'file', use any of the following::
306
+
307
+ subdir.delete("file")
308
+ del subdir['file']
309
+
310
+ All of these are *silent*, and will not throw errors if "file" does not exist.
311
+ In order to throw an error use::
312
+
313
+ subdir.delete('file', raise_on_error=True)
314
+
315
+ A few member functions assist in deleting a number of files:
316
+
317
+ * :func:`cdxcore.subdir.SubDir.delete_all_files`: delete all files in the directory with matching extension. Do not delete sub directories, or files with extensions different to our own.
318
+ * :func:`cdxcore.subdir.SubDir.delete_all_content`: delete all files with our extension, including in all sub-directories. If a sub-directory is left empty
319
+ upon ``delete_all_content`` delete it, too.
320
+ * :func:`cdxcore.subdir.SubDir.delete_everything`: deletes *everything*, not just files with matching extensions.
321
+
322
+ Caching
323
+ ^^^^^^^
324
+
325
+ A :class:`cdxcore.subdir.SubDir` object offers an advanced context for caching calls to :class:`collection.abc.Callable``
326
+ objects with :dec:`cdxcore.subdir.SubDir.cache`.
327
+
328
+ This involves keying the cache by the function name and its current parameters using :class:`cdxcore.uniquehash.UniqueHash`,
329
+ and monitoring the functions version using :dec:`cdxcore.version.version`. The caching behaviour itself can be controlled by
330
+ specifying the desired :class:`cdxcore.subdir.CacheMode`.
331
+
332
+ Import
333
+ ------
334
+ .. code-block:: python
335
+
336
+ import cdxcore.uniquehash as uniquehash
337
+
338
+ Documentation
339
+ -------------
5
340
  """
6
341
 
7
342
 
8
- import os
9
- import os.path
10
- import uuid
11
- import threading
12
- import pickle
13
- import tempfile
14
- import shutil
15
- import datetime
16
- import inspect
343
+ import os as os
344
+ import uuid as uuid
345
+ import threading as threading
346
+ import pickle as pickle
347
+ import tempfile as tempfile
348
+ import shutil as shutil
349
+ import datetime as datetime
350
+ import inspect as inspect
351
+ import platform as platform
17
352
  from collections import OrderedDict
18
- from collections.abc import Collection, Mapping, Callable
353
+ from collections.abc import Collection, Mapping, Callable, Iterable
19
354
  from enum import Enum
20
- import json as json
21
- import platform as platform
22
355
  from functools import update_wrapper
23
- import warnings as warnings
24
356
 
25
- import numpy as np
357
+ import json as json
26
358
  import jsonpickle as jsonpickle
27
359
  import jsonpickle.ext.numpy as jsonpickle_numpy
28
- import zlib as zlib
29
360
  import gzip as gzip
30
361
  import blosc as blosc
31
362
 
32
- from .prettydict import pdct
363
+ from .err import verify, error, warn, fmt as txtfmt
364
+ from .pretty import PrettyObject
33
365
  from .verbose import Context
34
- from .version import Version, version as version_decorator
35
- from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP, fmt as txtfmt, plain
36
- from .uniquehash import uniqueHash48, uniqueLabelExt, namedUniqueHashExt
37
-
38
- def error( text, *args, exception = RuntimeError, **kwargs ):
39
- raise exception( txtfmt(text, *args, **kwargs) )
40
- def verify( cond, text, *args, exception = RuntimeError, **kwargs ):
41
- if not cond:
42
- error( text, *args, **kwargs, exception=exception )
43
- def warn( text, *args, warning=warnings.RuntimeWarning, stack_level=1, **kwargs ):
44
- warnings.warn( txtfmt(text, *args, **kwargs), warning, stack_level=stack_level )
366
+ from .version import Version, version as version_decorator, VersionError
367
+ from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP, plain, is_filename
368
+ from .uniquehash import unique_hash48, UniqueLabel, NamedUniqueHash
369
+
45
370
 
46
371
  """
372
+ :meta private:
47
373
  compression
48
374
  """
49
375
  jsonpickle_numpy.register_handlers()
50
376
  BLOSC_MAX_BLOCK = 2147483631
51
377
  BLOSC_MAX_USE = 1147400000 # ... blosc really cannot handle large files
52
-
53
- """
54
- Hashing
55
- """
56
- uniqueFileName48 = uniqueHash48
57
- uniqueNamedFileName48_16 = namedUniqueHashExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
58
- uniqueLabelledFileName48_16 = uniqueLabelExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
378
+ #
59
379
 
60
380
  def _remove_trailing( path ):
61
381
  if len(path) > 0:
@@ -63,13 +383,34 @@ def _remove_trailing( path ):
63
383
  return _remove_trailing(path[:-1])
64
384
  return path
65
385
 
386
+
387
+ # ========================================================================
388
+ # Basics
389
+ # ========================================================================
390
+
66
391
  class Format(Enum):
67
- """ File formats for SubDir """
68
- PICKLE = 0
69
- JSON_PICKLE = 1
70
- JSON_PLAIN = 2
71
- BLOSC = 3
72
- GZIP = 4
392
+ """
393
+ File formats for :class:`cdxcore.subdir.SubDir`.
394
+
395
+ +--------------+------------------+----------------+-------+-------------+-----------+
396
+ | Format | Restores objects | Human readable | Speed | Compression | Extension |
397
+ +==============+==================+================+=======+=============+===========+
398
+ | PICKLE | yes | no | high | no | .pck |
399
+ +--------------+------------------+----------------+-------+-------------+-----------+
400
+ | JSON_PLAIN | no | yes | low | no | .json |
401
+ +--------------+------------------+----------------+-------+-------------+-----------+
402
+ | JSON_PICKLE | yes | limited | low | no | .jpck |
403
+ +--------------+------------------+----------------+-------+-------------+-----------+
404
+ | BLOSC | yes | no | high | yes | .zbsc |
405
+ +--------------+------------------+----------------+-------+-------------+-----------+
406
+ | GZIP | yes | no | high | yes | .pgz |
407
+ +--------------+------------------+----------------+-------+-------------+-----------+
408
+ """
409
+ PICKLE = 0 #: Standard binary :mod:`pickle` format.
410
+ JSON_PICKLE = 1 #: :mod:`jsonpickle` format.
411
+ JSON_PLAIN = 2 #: ``json`` format.
412
+ BLOSC = 3 #: :mod:`blosc` binary compressed format.
413
+ GZIP = 4 #: :mod:`gzip` binary compressed format.
73
414
 
74
415
  PICKLE = Format.PICKLE
75
416
  JSON_PICKLE = Format.JSON_PICKLE
@@ -77,31 +418,79 @@ JSON_PLAIN = Format.JSON_PLAIN
77
418
  BLOSC = Format.BLOSC
78
419
  GZIP = Format.GZIP
79
420
 
80
- """
81
- Use the following for config calls:
82
- format = subdir.mkFormat( config("format", "pickle", subdir.FORMAT_NAMES, "File format") )
83
- """
84
- FORMAT_NAMES = [ s.lower() for s in Format.__members__ ]
85
- def mkFormat( name ):
86
- if not name in FORMAT_NAMES:
87
- raise LookupError(f"Unknown format name '{name}'. Must be one of: {fmt_list(name)}")
88
- return Format[name.upper()]
421
+ class VersionPresentError(RuntimeError):
422
+ """
423
+ Exception raised in case a file was read which had a version, but no test version
424
+ was provided.
425
+ """
426
+ pass
427
+
428
+ # ========================================================================
429
+ # Caching utilities
430
+ # ========================================================================
89
431
 
90
432
  class CacheMode(object):
91
433
  """
92
- CacheMode
93
- A class which encodes standard behaviour of a caching strategy:
434
+ A class which encodes standard behaviour of a caching strategy.
94
435
 
95
- on gen off update clear readonly
96
- load cache from disk if exists x x - - - x
97
- write updates to disk x x - x - -
98
- delete existing object - - - - x -
99
- delete existing object if incompatible x - - x x -
436
+ **Summary mechanics:**
437
+
438
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
439
+ | Action | on | gen | off | update | clear | readonly |
440
+ +=========================================+=======+=======+=======+=========+========+==========+
441
+ | load cache from disk if exists | x | x | | | | x |
442
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
443
+ | write updates to disk | x | x | | x | | |
444
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
445
+ | delete existing object | | | | | x | |
446
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
447
+ | delete existing object if incompatible | x | | | x | x | |
448
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
100
449
 
101
- See cdxbasics.subdir for functions to manage files.
450
+
451
+ **Standard Caching Semantics**
452
+
453
+ Assuming we wish to cache results from calling a function ``f`` in a file named ``filename``
454
+ in a directory ``directory``, then this is the ``CacheMode`` waterfall:
455
+
456
+ .. code-block:: python
457
+
458
+ def cache_f( filename : str, directory : SubDir, version : str, cache_mode : CacheMode ):
459
+ if cache_mode.delete:
460
+ directory.delete(filename)
461
+ if cache_mode.read:
462
+ r = directory.read(filename,
463
+ default=None,
464
+ version=version,
465
+ raise_on_error=False,
466
+ delete_wrong_version=cache_mode.del_incomp
467
+ )
468
+ if not r is None:
469
+ return r
470
+
471
+ r = f(...) # compute result
472
+
473
+ if cache_mode.write:
474
+ directory.write(filename,
475
+ r,
476
+ version=version,
477
+ raise_on_error=False
478
+ )
479
+
480
+ return r
481
+
482
+ See :func:`cdxcore.subdir.SubDir.cache` for a comprehensive
483
+ implementation.
484
+
485
+ Parameters
486
+ ----------
487
+ mode : str, optional
488
+ Which mode to use: ``"on"``, ``"gen"``, ``"off"``, ``"update"``, ``"clear"`` or ``"readonly"``.
489
+
490
+ The default is ``None`` in which case ``"on"`` is used.
102
491
  """
103
492
 
104
- ON = "on"
493
+ ON = "on"
105
494
  GEN = "gen"
106
495
  OFF = "off"
107
496
  UPDATE = "update"
@@ -109,22 +498,31 @@ class CacheMode(object):
109
498
  READONLY = "readonly"
110
499
 
111
500
  MODES = [ ON, GEN, OFF, UPDATE, CLEAR, READONLY ]
501
+ """
502
+ List of available modes in text form.
503
+ This list can be used as ``cast`` parameter when calling :func:`cdxcore.config.Config.__call__`::
504
+
505
+ from cdxcore.config import Config
506
+ from cdxcore.subdir import CacheMode
507
+
508
+ def get_cache_mode( config : Config ) -> CacheMode:
509
+ return CacheMode( config("cache_mode", "on", CacheMode.MODES, CacheMode.HELP) )
510
+ """
511
+
112
512
  HELP = "'on' for standard caching; 'gen' for caching but keep existing incompatible files; 'off' to turn off; 'update' to overwrite any existing cache; 'clear' to clear existing caches; 'readonly' to read existing caches but not write new ones"
113
-
513
+ """
514
+ Standard ``config`` help text, to be used with :func:`cdxcore.config.Config.__call__` as follows::
515
+
516
+ from cdxcore.config import Config
517
+ from cdxcore.subdir import CacheMode
518
+
519
+ def get_cache_mode( config : Config ) -> CacheMode:
520
+ return CacheMode( config("cache_mode", "on", CacheMode.MODES, CacheMode.HELP) )
521
+ """
522
+
114
523
  def __init__(self, mode : str = None ):
115
524
  """
116
- Encodes standard behaviour of a caching strategy:
117
-
118
- on gen off update clear readonly
119
- load upon start from disk if exists x x - - - x
120
- write updates to disk x x - x - -
121
- delete existing object upon start - - - - x -
122
- delete existing object if incompatible x - - x x -
123
-
124
- Parameters
125
- ----------
126
- mode : str
127
- Which mode to use.
525
+ :meta private:
128
526
  """
129
527
  if isinstance( mode, CacheMode ):
130
528
  return# id copy constuctor
@@ -145,22 +543,22 @@ class CacheMode(object):
145
543
 
146
544
  @property
147
545
  def read(self) -> bool:
148
- """ Whether to load any existing data when starting """
546
+ """ Whether to load any existing cached data. """
149
547
  return self._read
150
548
 
151
549
  @property
152
550
  def write(self) -> bool:
153
- """ Whether to write cache data to disk """
551
+ """ Whether to cache newly computed data to disk. """
154
552
  return self._write
155
553
 
156
554
  @property
157
555
  def delete(self) -> bool:
158
- """ Whether to delete existing data """
556
+ """ Whether to delete existing data. """
159
557
  return self._delete
160
558
 
161
559
  @property
162
560
  def del_incomp(self) -> bool:
163
- """ Whether to delete existing data if it is not compatible """
561
+ """ Whether to delete existing data if it is not compatible or has the wrong version. """
164
562
  return self._del_in
165
563
 
166
564
  def __str__(self) -> str:# NOQA
@@ -175,311 +573,327 @@ class CacheMode(object):
175
573
 
176
574
  @property
177
575
  def is_off(self) -> bool:
178
- """ Whether this cache mode is OFF """
576
+ """ Whether this cache mode is OFF. """
179
577
  return self.mode == self.OFF
180
578
 
181
579
  @property
182
580
  def is_on(self) -> bool:
183
- """ Whether this cache mode is ON """
581
+ """ Whether this cache mode is ON. """
184
582
  return self.mode == self.ON
185
583
 
186
584
  @property
187
585
  def is_gen(self) -> bool:
188
- """ Whether this cache mode is GEN """
586
+ """ Whether this cache mode is GEN. """
189
587
  return self.mode == self.GEN
190
588
 
191
589
  @property
192
590
  def is_update(self) -> bool:
193
- """ Whether this cache mode is UPDATE """
591
+ """ Whether this cache mode is UPDATE. """
194
592
  return self.mode == self.UPDATE
195
593
 
196
594
  @property
197
595
  def is_clear(self) -> bool:
198
- """ Whether this cache mode is CLEAR """
596
+ """ Whether this cache mode is CLEAR. """
199
597
  return self.mode == self.CLEAR
200
598
 
201
599
  @property
202
600
  def is_readonly(self) -> bool:
203
- """ Whether this cache mode is READONLY """
601
+ """ Whether this cache mode is READONLY. """
204
602
  return self.mode == self.READONLY
205
603
 
206
604
  class CacheController( object ):
207
- """
208
- Central control for versioning.
209
- Enabes to to turn on/off caching, debugging and tracks all versions
605
+ r"""
606
+ Central control parameters for caching.
607
+
608
+ When a parameter object of this type
609
+ is assigned to a :class:`cdxcore.subdir.SubDir`,
610
+ then it is passed on when sub-directories are
611
+ created. This way all ``SubDir`` have the same
612
+ caching behaviour.
613
+
614
+ See :class:`cdxcore.subdir.CacheController` for
615
+ a list of control parameters.
616
+
617
+ Parameters
618
+ ----------
619
+ exclude_arg_types : list[type], optional
620
+ List of types to exclude from producing unique ids from function arguments.
621
+
622
+ Defaults to ``[Context]``.
623
+
624
+ cache_mode : CacheMode, optional
625
+ Top level cache control.
626
+ Set to "OFF" to turn off all caching.
627
+ Default is "ON".
628
+
629
+ max_filename_length : int, optional
630
+ Maximum filename length. If unique id's exceed the file name a hash of length
631
+ ``hash_length`` will be intergated into the file name.
632
+ See :class:`cdxcore.uniquehash.NamedUniqueHash`.
633
+ Default is ``48``.
634
+
635
+ hash_length : int, optional
636
+ Length of the hash used to make sure each filename is unique
637
+ See :class:`cdxcore.uniquehash.NamedUniqueHash`.
638
+ Default is ``8``.
639
+
640
+ debug_verbose : :class:`cdxcore.verbose.Context`, optional
641
+ If not ``None`` print caching process messages to this object.
642
+
643
+ Default is ``None``.
644
+
645
+ keep_last_arguments : bool, optional
646
+ Keep a dictionary of all parameters as string representations after each function call.
647
+ If the function ``F`` was decorated using :meth:``cdxcore.subdir.SubDir.cache``,
648
+ you can access this information via ``F.cache_info.last_arguments``.
649
+
650
+ Note that strings are limited to 100 characters per argument to avoid memory
651
+ overload when large objects are passed.
652
+
653
+ Default is ``False``.
210
654
  """
211
655
 
212
656
  def __init__(self, *,
213
657
  exclude_arg_types : list[type] = [Context],
214
658
  cache_mode : CacheMode = CacheMode.ON,
215
659
  max_filename_length: int = 48,
216
- hash_length : int = 16,
660
+ hash_length : int = 8,
217
661
  debug_verbose : Context = None,
218
662
  keep_last_arguments: bool = False
219
663
  ):
220
664
  """
221
- Background parameters to control caching
222
-
223
- Parameters
224
- ----------
225
- exclude_arg_types :
226
- List of types to exclude from producing unique ids from function arguments. Defaults to [SubDir, Context]
227
- cache_mode :
228
- Top level cache control. Set to "OFF" to turn off all caching. Default is "ON"
229
- max_filename_length :
230
- Maximum filename length. If unique id's exceed the file name a hash of length 'hash_length' will be intergated into the file name.
231
- See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
232
- hash_length :
233
- Length of the hash used to make sure each filename is unique
234
- See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
235
- debug_verbose :
236
- If non-None print caching process messages to this object.
237
- keep_last_arguments :
238
- keep a dictionary of all parameters as string representations after each function call.
239
- If the function F was decorated using SubDir.cache(), you can access this information via
240
- F.cache_info.last_arguments
241
- Note that strings are limited to 100 characters per argument to avoid memory
242
- overload when large objects are passed.
243
- """
244
- max_filename_length = int(max_filename_length)
245
- hash_length = int(hash_length)
665
+ :meta private:
666
+ """
667
+ max_filename_length = int(max_filename_length)
668
+ hash_length = int(hash_length)
246
669
  assert max_filename_length>0, ("'max_filename_length' must be positive")
247
670
  assert hash_length>0 and hash_length<=max_filename_length, ("'hash_length' must be positive and at most 'max_filename_length'")
248
671
  assert max_filename_length>=hash_length, ("'hash_length' must not exceed 'max_filename_length")
249
672
  self.cache_mode = CacheMode(cache_mode if not cache_mode is None else CacheMode.ON)
250
- self.debug_verbose = debug_verbose
673
+ self.debug_verbose = Context(debug_verbose) if isinstance(debug_verbose, (int,str)) else debug_verbose
251
674
  self.exclude_arg_types = set(exclude_arg_types) if not exclude_arg_types is None else None
252
- self.versioned = pdct() # list
253
- self.uniqueNamedFileName = namedUniqueHashExt(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
254
- self.uniqueLabelledFileName = uniqueLabelExt(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
675
+ self.versioned = PrettyObject() # list
676
+ self.labelledFileName = NamedUniqueHash(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
677
+ self.uniqueFileName = UniqueLabel(max_length=max_filename_length,id_length=hash_length,filename_by=None)
255
678
  self.keep_last_arguments = keep_last_arguments
256
679
 
257
680
  default_cacheController = CacheController()
681
+ #
258
682
 
683
+ # ========================================================================
684
+ # SubDir
685
+ # ========================================================================
259
686
 
260
- class CacheTracker(object):
261
- """
262
- Utility class to track caching and be able to delete all dependent objects
687
+ class SubDir(object):
688
+ r"""
689
+ ``SubDir`` implements a transparent i/o
690
+ interface for storing data in files.
263
691
 
264
- """
265
- def __init__(self):
266
- """ track cache files """
267
- self._files = []
268
- def __iadd__(self, new_file):
269
- """ Add a new file to the tracker """
270
- self._files.append( new_file )
271
- def delete_cache_files(self):
272
- """ Delete all tracked files """
273
- for file in self._files:
274
- if os.path.exists(file):
275
- os.remove(file)
276
- self._files = []
277
- def __str__(self) -> str:#NOQA
278
- return f"Tracked: {self._files}"
279
- def __repr__(self) -> str:#NOQA
280
- return f"Tracked: {self._files}"
692
+ **Directories**
281
693
 
282
- class InitCacheInfo(object):
283
- pass
284
-
285
- class CacheInfo(object):
286
- pass
694
+ Instantiate a ``SubDir`` with a directory name. There are some
695
+ pre-defined relative system paths the name can refer to::
287
696
 
288
- # SubDir
289
- # ======
697
+ from cdxcore.subdir import SubDir
698
+ parent = SubDir("!/subdir") # relative to system temp directory
699
+ parent = SubDir("~/subdir") # relative to user home directory
700
+ parent = SubDir("./subdir") # relative to current working directory (explicit)
701
+ parent = SubDir("subdir") # relative to current working directory (implicit)
702
+ parent = SubDir("/tmp/subdir") # absolute path (linux)
703
+ parent = SubDir("C:/temp/subdir") # absolute path (windows)
704
+ parent = SubDir("") # current working directory
705
+
706
+ Sub-directories can be generated in a number of ways::
290
707
 
291
- class SubDir(object):
292
- """
293
- SubDir implements a transparent interface for storing data in files, with a common extension.
294
- The generic pattern is:
708
+ subDir = parent('subdir') # using __call__
709
+ subDir = SubDir('subdir', parent) # explicit constructor
710
+ subDir = SubDir('subdir', parent="!/") # explicit constructor with parent being a string
711
+
712
+ Files managed by ``SubDir`` will usually have the same extension.
713
+ This extension can be specified with ``ext``, or as part of the directory string::
714
+
715
+ subDir = SubDir("~/subdir", ext="bin") # set extension to 'bin'
716
+ subDir = SubDir("~/subdir;*.bin") # set extension to 'bin'
717
+
718
+ Leaving the extension as default ``None`` allows ``SubDir`` to automatically use
719
+ the extension associated with any specified format.
295
720
 
296
- 1) create a root 'parentDir':
297
- Absolute: parentDir = SubDir("C:/temp/root")
298
- In system temp directory: parentDir = SubDir("!/root")
299
- In user directory: parentDir = SubDir("~/root")
300
- Relative to current directory: parentDir = SubDir("./root")
721
+ **Copy Constructor**
301
722
 
302
- 2) Use SubDirs to transparently create hierachies of stored data:
303
- assume f() will want to store some data:
723
+ The constructor is shallow.
304
724
 
305
- def f(parentDir, ...):
725
+ **File I/O**
306
726
 
307
- subDir = parentDir('subdir') <-- note that the call () operator is overloaded: if a second argument is provided, the directory will try to read the respective file.
308
- or
309
- subDir = SubDir('subdir', parentDir)
310
- :
311
- :
312
- Write data:
727
+ Write data with :meth:`cdxcore.subdir.SubDir.write`::
313
728
 
314
- subDir['item1'] = item1 <-- dictionary style
315
- subDir.item2 = item2 <-- member style
316
- subDir.write('item3',item3) <-- explicit
729
+ subDir.write('item3',item3) # explicit
730
+ subDir['item1'] = item1 # dictionary style
317
731
 
318
- Note that write() can write to multiple files at the same time.
732
+ Note that :meth:`cdxcore.subdir.SubDir.write` can write to multiple files at the same time.
319
733
 
320
- 3) Reading is similar
734
+ Read data with :meth:`cdxcore.subdir.SubDir.read`::
321
735
 
322
- def readF(parentDir,...):
736
+ item = subDir('item', 'i1') # returns 'i1' if not found.
737
+ item = subdir.read('item') # returns None if not found
738
+ item = subdir.read('item','i2') # returns 'i2' if not found
739
+ item = subDir['item'] # raises a KeyError if not found
323
740
 
324
- subDir = parentDir('subdir')
741
+ Treat files in a directory like dictionaries::
325
742
 
326
- item = subDir('item', 'i1') <-- returns 'i1' if not found.
327
- item = subdir.read('item') <-- returns None if not found
328
- item = subdir.read('item','i2') <-- returns 'i2' if not found
329
- item = subDir['item'] <-- throws a KeyError if not found
330
- item = subDir.item <-- throws an AttributeError if not found
743
+ for file in subDir:
744
+ data = subDir[file]
745
+ f(item, data)
331
746
 
332
- 4) Treating data like dictionaries
747
+ for file, data in subDir.items():
748
+ f(item, data)
333
749
 
334
- def scanF(parentDir,...)
750
+ Delete items::
335
751
 
336
- subDir = parentDir('f')
752
+ del subDir['item'] # silently fails if 'item' does not exist
753
+ subDir.delete('item') # silently fails if 'item' does not exist
754
+ subDir.delete('item', True) # raises a KeyError if 'item' does not exit
337
755
 
338
- for item in subDir:
339
- data = subDir[item]
756
+ Cleaning up::
340
757
 
341
- Delete items:
758
+ parent.delete_all_content() # silently deletes all files with matching extensions, and sub directories.
342
759
 
343
- del subDir['item'] <-- silently fails if 'item' does not exist
344
- del subDir.item <-- silently fails if 'item' does not exist
345
- subDir.delete('item') <-- silently fails if 'item' does not exist
346
- subDir.delete('item', True) <-- throw a KeyError if 'item' does not exit
760
+ **File Format**
347
761
 
348
- 5) Cleaning up
762
+ ``SubDir`` supports a number of file formats via :class:`cdxcore.subdir.Format`.
763
+ Those can be controlled with the ``fmt`` keyword in various functions not least
764
+ :class:`cdxcore.subdir.SubDir`::
349
765
 
350
- parentDir.deleteAllContent() <-- silently deletes all files and sub directories.
766
+ subdir = SubDir("!/.test", fmt=SubDir.JSON_PICKLE)
351
767
 
352
- 6) As of version 0.2.59 subdir supports json file formats. Those can be controlled with the 'fmt' keyword in various functions.
353
- The most straightfoward way is to specify the format of the directory itself:
768
+ See :class:`cdxcore.subdir.Format` for supported formats.
354
769
 
355
- subdir = SubDir("!/.test", fmt=SubDir.JSON_PICKLE)
770
+ Parameters
771
+ ----------
772
+ name : str:
773
+ Name of the directory.
774
+
775
+ The name may start with any of the following special characters:
776
+
777
+ * ``'.'`` for current directory
778
+ * ``'~'`` for home directory
779
+ * ``'!'`` for system default temp directory
780
+
781
+ The directory name may also contain a formatting string for defining ``ext`` on the fly:
782
+ for example use ``"!/test;*.bin"`` to specify a directory ``"test"`` in the user's
783
+ temp directory with extension ``"bin"``.
784
+
785
+ The directory name can be set to ``None`` in which case it is always empty
786
+ and attempts to write to it fail with :class:`EOFError`.
787
+
788
+ parent : str | SubDir, optional
789
+ Parent directory.
790
+
791
+ If ``parent`` is a :class:`cdxcore.subdir.SubDir` then its parameters are used
792
+ as default values here.
356
793
 
357
- The following formats are supported:
794
+ Default is ``None``.
795
+
796
+ ext : str, optional
797
+ Extension for files managed by this ``SubDir``. All files will share the same extension.
358
798
 
359
- SubDir.PICKLE:
360
- Use pickle
361
- SubDir.JSON_PLAIN:
362
- Uses cdxbasics.util.plain() to convert data into plain Python objects and writes
363
- this to disk as text. Loading back such files will result in plain Python objects,
364
- but *not* the original objects
365
- SubDir.JSON_PICKLE:
366
- Uses the jsonpickle package to load/write data in somewhat readable text formats.
367
- Data can be loaded back from such a file, but files may not be readable (e.g. numpy arrays
368
- are written in compressed form).
369
- SubDir.BLOSC:
370
- Uses https://www.blosc.org/python-blosc/ to compress data on-the-fly.
371
- BLOSC is much faster than GZIP or ZLIB but is limited to 2GB data, sadly.
372
- SubDir.ZLIB:
373
- Uses https://docs.python.org/3/library/zlib.html to compress data on-the-fly
374
- using, essentially, GZIP.
799
+ If set to ``""`` no extension is assigned to this directory. That means, for example, that
800
+ :meth:`cdxcore.subdir.SubDir.files` returns all files contained in the directory, not
801
+ just files with a specific extension.
802
+
803
+ If ``None``, use an extension depending on ``fmt``:
804
+
805
+ * 'pck' for the default PICKLE format.
806
+ * 'json' for JSON_PLAIN.
807
+ * 'jpck' for JSON_PICKLE.
808
+ * 'zbsc' for BLOSC.
809
+ * 'pgz' for GZIP.
810
+
811
+ Default is ``None``.
812
+
813
+ fmt : :class:`cdxcore.subdir.Format`, optional
375
814
 
376
- Summary of properties:
815
+ One of the :class:`cdxcore.subdir.Format` codes.
816
+ If ``ext`` is left to ``None`` then setting the a format will also set the corrsponding ``ext``.
817
+
818
+ Default is ``Format.PICKLE``.
377
819
 
378
- | Restores objects | Human readable | Speed | Compression
379
- PICKLE | yes | no | high | no
380
- JSON_PLAIN | no | yes | low | no
381
- JSON_PICKLE | yes | limited | low | no
382
- BLOSC | yes | no | high | yes
383
- GZIP | yes | no | high | yes
820
+ create_directory : bool | None, optional
821
+
822
+ Whether to create the directory upon creation of the ``SubDir`` object; otherwise it will be created upon first
823
+ :meth:`cdxcore.subdir.SubDir.write`.
824
+
825
+ Set to ``None`` to use the setting of the parent directory, or ``False`` if no parent
826
+ is specified.
827
+
828
+ Default is ``False``.
384
829
 
385
- Several other operations are supported; see help()
830
+ delete_everything : bool, optional
831
+
832
+ Delete all contents in the newly defined sub directory upon creation.
386
833
 
387
- Hans Buehler May 2020
834
+ Default is ``False``.
835
+
836
+ cache_controller : :class:`cdxcore.subdir.CacheController`, optional
837
+
838
+ An object which fine-tunes the behaviour of :meth:`cdxcore.subdir.SubDir.cache`.
839
+ See that function's documentation for further details. Default is ``None``.
388
840
  """
389
841
 
390
842
  class __RETURN_SUB_DIRECTORY(object):
391
843
  pass
844
+ """ :meta private: """
392
845
 
393
- Format = Format
394
- PICKLE = Format.PICKLE
846
+ Format = Format # :meta private
847
+ """ :meta private: """
848
+
849
+ PICKLE = Format.PICKLE
850
+ """ :meta private: """
851
+
395
852
  JSON_PICKLE = Format.JSON_PICKLE
853
+ """ :meta private: """
854
+
396
855
  JSON_PLAIN = Format.JSON_PLAIN
856
+ """ :meta private: """
857
+
397
858
  BLOSC = Format.BLOSC
859
+ """ :meta private: """
860
+
398
861
  GZIP = Format.GZIP
399
-
400
- DEFAULT_RAISE_ON_ERROR = False
862
+ """ :meta private: """
863
+
401
864
  RETURN_SUB_DIRECTORY = __RETURN_SUB_DIRECTORY
865
+ """ :meta private: """
866
+
402
867
  DEFAULT_FORMAT = Format.PICKLE
403
- DEFAULT_CREATE_DIRECTORY = False # legacy behaviour so that self.path is a valid path
868
+ """ Default :class:`cdxcore.subdir.Format`: ``Format.PICKLE`` """
869
+
404
870
  EXT_FMT_AUTO = "*"
871
+ """ :meta private: """
405
872
 
406
873
  MAX_VERSION_BINARY_LEN = 128
407
-
874
+ """ :meta private: """
875
+
408
876
  VER_NORMAL = 0
877
+ """ :meta private: """
409
878
  VER_CHECK = 1
879
+ """ :meta private: """
410
880
  VER_RETURN = 2
881
+ """ :meta private: """
882
+
411
883
 
412
884
  def __init__(self, name : str,
413
- parent = None, *,
885
+ parent : str|type = None, *,
414
886
  ext : str = None,
415
887
  fmt : Format = None,
416
- eraseEverything : bool = False,
417
- createDirectory : bool = None,
418
- cacheController : CacheController = None
888
+ create_directory : bool = None,
889
+ delete_everything : bool = False,
890
+ cache_controller : CacheController = None
419
891
  ):
420
892
  """
421
- Instantiates a sub directory which contains pickle files with a common extension.
422
- By default the directory is created.
423
-
424
- Absolute directories
425
- sd = SubDir("!/subdir") - relative to system temp directory
426
- sd = SubDir("~/subdir") - relative to user home directory
427
- sd = SubDir("./subdir") - relative to current working directory (explicit)
428
- sd = SubDir("subdir") - relative to current working directory (implicit)
429
- sd = SubDir("/tmp/subdir") - absolute path (linux)
430
- sd = SubDir("C:/temp/subdir") - absolute path (windows)
431
- Short-cut
432
- sd = SubDir("") - current working directory
433
-
434
- It is often desired that the user specifies a sub-directory name under some common parent directory.
435
- You can create sub directories if you provide a 'parent' directory:
436
- sd2 = SubDir("subdir2", parent=sd) - relative to other sub directory
437
- sd2 = sd("subdir2") - using call operator
438
- Works with strings, too:
439
- sd2 = SubDir("subdir2", parent="~/my_config") - relative to ~/my_config
440
-
441
- All files managed by SubDir will have the same extension.
442
- The extension can be specified with 'ext', or as part of the directory string:
443
- sd = SubDir("~/subdir;*.bin") - set extension to 'bin'
444
-
445
- COPY CONSTRUCTION
446
- This function also allows copy construction and constrution from a repr() string.
447
-
448
- HANDLING KEYS
449
- SubDirs allows reading data using the item and attribute notation, i.e. we may use
450
- sd = SubDir("~/subdir")
451
- x = sd.x
452
- y = sd['y']
453
- If the respective keys are not found, exceptions are thrown.
454
-
455
- NONE OBJECTS
456
- It is possible to set the directory name to 'None'. In this case the directory will behave as if:
457
- No files exist
458
- Writing fails with a EOFError.
893
+ Instantiates a sub directory which contains files with a common extension.
459
894
 
460
- Parameters
461
- ----------
462
- name - Name of the directory.
463
- '.' for current directory
464
- '~' for home directory
465
- '!' for system default temp directory
466
- May contain a formatting string for defining 'ext' on the fly:
467
- Use "!/test;*.bin" to specify 'test' in the system temp directory as root directory with extension 'bin'
468
- Can be set to None, see above.
469
- parent - Parent directory. If provided, will also set defaults for 'ext' and 'raiseOnError'
470
- ext - standard file extenson for data files. All files will share the same extension.
471
- If None, use the parent extension, or if that is not specified use an extension depending on 'fmt':
472
- 'pck' for the default PICKLE format
473
- 'json' for JSON_PLAIN
474
- 'jpck' for JSON_PICKLE
475
- Set to "" to turn off managing extensions.
476
- fmt - format, current pickle or json
477
- eraseEverything - delete all contents in the newly defined subdir
478
- createDirectory - whether to create the directory.
479
- Otherwise it will be created upon first write().
480
- Set to None to use the setting of the parent directory
481
- """
482
- createDirectory = bool(createDirectory) if not createDirectory is None else None
895
+ """
896
+ create_directory = bool(create_directory) if not create_directory is None else None
483
897
 
484
898
  # copy constructor support
485
899
  if isinstance(name, SubDir):
@@ -487,9 +901,9 @@ class SubDir(object):
487
901
  self._path = name._path
488
902
  self._ext = name._ext if ext is None else ext
489
903
  self._fmt = name._fmt if fmt is None else fmt
490
- self._crt = name._crt if createDirectory is None else createDirectory
491
- self._cctrl = name._cctrl if cacheController is None else cacheController
492
- if eraseEverything: raise ValueError( "Cannot use 'eraseEverything' when cloning a directory")
904
+ self._crt = name._crt if create_directory is None else create_directory
905
+ self._cctrl = name._cctrl if cache_controller is None else cache_controller
906
+ if delete_everything: raise ValueError( "Cannot use 'delete_everything' when cloning a directory")
493
907
  return
494
908
 
495
909
  # reconstruction from a dictionary
@@ -498,14 +912,14 @@ class SubDir(object):
498
912
  self._path = name['_path']
499
913
  self._ext = name['_ext'] if ext is None else ext
500
914
  self._fmt = name['_fmt'] if fmt is None else fmt
501
- self._crt = name['_crt'] if createDirectory is None else createDirectory
502
- self._cctrl = name['_cctrl'] if cacheController is None else cacheController
503
- if eraseEverything: raise ValueError( "Cannot use 'eraseEverything' when cloning a directory")
915
+ self._crt = name['_crt'] if create_directory is None else create_directory
916
+ self._cctrl = name['_cctrl'] if cache_controller is None else cache_controller
917
+ if delete_everything: raise ValueError( "Cannot use 'delete_everything' when cloning a directory")
504
918
  return
505
919
 
506
920
  # parent
507
921
  if isinstance(parent, str):
508
- parent = SubDir( parent, ext=ext, fmt=fmt, createDirectory=createDirectory, cacheController=cacheController )
922
+ parent = SubDir( parent, ext=ext, fmt=fmt, create_directory=create_directory, cache_controller=cache_controller )
509
923
  if not parent is None and not isinstance(parent, SubDir):
510
924
  raise ValueError( "'parent' must be SubDir, str, or None. Found object of type '{type(parent)}'")
511
925
 
@@ -543,15 +957,15 @@ class SubDir(object):
543
957
  else:
544
958
  self._ext = SubDir._extract_ext(ext)
545
959
 
546
- # createDirectory
547
- if createDirectory is None:
548
- self._crt = self.DEFAULT_CREATE_DIRECTORY if parent is None else parent._crt
960
+ # create_directory
961
+ if create_directory is None:
962
+ self._crt = False if parent is None else parent._crt
549
963
  else:
550
- self._crt = bool(createDirectory)
964
+ self._crt = bool(create_directory)
551
965
 
552
966
  # cache controller
553
- assert type(cacheController).__name__ == CacheController.__name__, ("'cacheController' should be of type 'CacheController'", type(cacheController))
554
- self._cctrl = cacheController
967
+ assert cache_controller is None or type(cache_controller).__name__ == CacheController.__name__, ("'cache_controller' should be of type 'CacheController'", type(cache_controller))
968
+ self._cctrl = cache_controller
555
969
 
556
970
  # name
557
971
  if name is None:
@@ -566,12 +980,12 @@ class SubDir(object):
566
980
  if len(name) > 1 and name[1] != '/':
567
981
  raise ValueError( txtfmt("If 'name' starts with '%s', then the second character must be '/' (or '\\' on windows). Found 'name' set to '%s'", name[:1], _name ))
568
982
  if name[0] == '!':
569
- name = SubDir.tempDir()[:-1] + name[1:]
983
+ name = SubDir.temp_dir()[:-1] + name[1:]
570
984
  elif name[0] == ".":
571
- name = SubDir.workingDir()[:-1] + name[1:]
985
+ name = SubDir.working_dir()[:-1] + name[1:]
572
986
  else:
573
987
  assert name[0] == "~", ("Internal error", name[0] )
574
- name = SubDir.userDir()[:-1] + name[1:]
988
+ name = SubDir.user_dir()[:-1] + name[1:]
575
989
  elif name == "..":
576
990
  error("Cannot use name '..'")
577
991
  elif not parent is None:
@@ -587,33 +1001,37 @@ class SubDir(object):
587
1001
  self._path = os.path.abspath(name) + '/'
588
1002
  self._path = self._path.replace('\\','/')
589
1003
 
590
- if eraseEverything:
591
- self.eraseEverything(keepDirectory=self._crt)
1004
+ if delete_everything:
1005
+ self.delete_everything(keep_directory=self._crt)
592
1006
  if self._crt:
593
- self.createDirectory()
1007
+ self.create_directory()
594
1008
 
595
1009
  @staticmethod
596
- def expandStandardRoot( name ):
1010
+ def expand_std_root( name ):
597
1011
  """
598
- Expands 'name' by a standardized root directory if provided:
599
- If 'name' starts with -> return
600
- ! -> tempDir()
601
- . -> workingDir()
602
- ~ -> userDir()
1012
+ Expands ``name`` by a standardized root directory if provided:
1013
+
1014
+ The first character of ``name`` can be either of:
1015
+
1016
+ * ``"!"`` returns :meth:`cdxcore.subdir.SubDir.temp_dir()`.
1017
+ * ``"."`` returns :meth:`cdxcore.subdir.SubDir.working_dir()`.
1018
+ * ``"~"`` returns :meth:`cdxcore.subdir.SubDir.user_dir()`.
1019
+
1020
+ If neither of these matches the first character, ``name``
1021
+ is returned as is.
603
1022
  """
604
1023
  if len(name) < 2 or name[0] not in ['.','!','~'] or name[1] not in ["\\","/"]:
605
1024
  return name
606
1025
  if name[0] == '!':
607
- return SubDir.tempDir() + name[2:]
1026
+ return SubDir.temp_dir() + name[2:]
608
1027
  elif name[0] == ".":
609
- return SubDir.workingDir() + name[2:]
1028
+ return SubDir.working_dir() + name[2:]
610
1029
  else:
611
- return SubDir.userDir() + name[2:]
1030
+ return SubDir.user_dir() + name[2:]
612
1031
 
613
- def createDirectory( self ):
1032
+ def create_directory( self ):
614
1033
  """
615
- Creates the directory if it doesn't exist yet.
616
- Does not do anything if is_none.
1034
+ Creates the current directory if it doesn't exist yet.
617
1035
  """
618
1036
  # create directory/clean up
619
1037
  if self._path is None:
@@ -628,8 +1046,8 @@ class SubDir(object):
628
1046
  if not os.path.isdir(self._path[:-1]):
629
1047
  raise NotADirectoryError(txtfmt( "Cannot use sub directory %s: object exists but is not a directory", self._path[:-1] ))
630
1048
 
631
- def pathExists(self) -> bool:
632
- """ Returns True if the current directory exists """
1049
+ def path_exists(self) -> bool:
1050
+ """ Whether the current directory exists """
633
1051
  return os.path.exists( self._path[:-1] ) if not self._path is None else False
634
1052
 
635
1053
  # -- a few basic properties --
@@ -659,60 +1077,79 @@ class SubDir(object):
659
1077
 
660
1078
  @property
661
1079
  def is_none(self) -> bool:
662
- """ Whether this object is 'None' or not """
1080
+ """ Whether this object is ``None`` or not. For such ``SubDir`` object no files exists, and writing any file will fail. """
663
1081
  return self._path is None
664
1082
 
665
1083
  @property
666
1084
  def path(self) -> str:
667
1085
  """
668
- Return current path, including trailing '/'
669
- Note that the path may not exist yet. If this is required, consider using existing_path
1086
+ Return current path, including trailing ``'/'``.
1087
+
1088
+ Note that the path may not exist yet. If existence is required, consider using
1089
+ :meth:`cdxcore.subdir.SubDir.existing_path`.
670
1090
  """
671
1091
  return self._path
672
1092
 
673
1093
  @property
674
1094
  def existing_path(self) -> str:
675
1095
  """
676
- Return current path, including training '/'.
677
- In addition to self.path this property ensures that the directory structure exists (or raises an exception)
1096
+ Return current path, including training ``'/'``.
1097
+
1098
+ ``existing_path`` ensures that the directory structure exists (or raises an exception).
1099
+ Use :meth:`cdxcore.subdir.SubDir.path` if creation on the fly is not desired.
678
1100
  """
679
- self.createDirectory()
1101
+ self.create_directory()
680
1102
  return self.path
681
1103
 
682
1104
  @property
683
1105
  def fmt(self) -> Format:
684
- """ Returns current format """
1106
+ """ Returns current :class:`cdxcore.subdir.Format`. """
685
1107
  return self._fmt
686
1108
 
687
1109
  @property
688
1110
  def ext(self) -> str:
689
1111
  """
690
- Returns the common extension of the files in this directory, including leading '.'
691
- Resolves '*' into the extension associated with the current format.
1112
+ Returns the common extension of the files in this directory, including leading ``'.'``.
1113
+ Resolves ``"*"`` into the extension associated with the current :class:`cdxcore.subdir.Format`.
692
1114
  """
693
1115
  return self._ext if self._ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
694
1116
 
695
- def autoExt( self, ext : str = None ) -> str:
1117
+ def auto_ext( self, ext_or_fmt : str|Format = None ) -> str:
696
1118
  """
697
- Computes the effective extension based on inputs 'ext', defaulting to the SubDir's extension.
698
- Resolves '*' into the extension associated with the specified format.
699
- This function allows setting 'ext' also as a Format.
1119
+ Computes the effective extension based on theh inputs ``ext_or_fmt``,
1120
+ and the current settings for ``self``.
1121
+
1122
+ If ``ext_or_fmt`` is set to ``"*"`` then the extension associated to
1123
+ the format of ``self`` is returned.
700
1124
 
701
- Returns the extension with leading '.'
1125
+ Parameters
1126
+ ----------
1127
+ ext_or_fmt : str or :class:`cdxcore.subdir.Format`
1128
+ An extension or a format.
1129
+
1130
+ Returns
1131
+ -------
1132
+ ext : str
1133
+ The extension with leading ``'.'``.
702
1134
  """
703
- if isinstance(ext, Format):
704
- return self._auto_ext(ext)
1135
+ if isinstance(ext_or_fmt, Format):
1136
+ return self._auto_ext(ext_or_fmt)
705
1137
  else:
706
- ext = self._ext if ext is None else SubDir._extract_ext(ext)
1138
+ ext = self._ext if ext_or_fmt is None else SubDir._extract_ext(ext_or_fmt)
707
1139
  return ext if ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
708
1140
 
709
- def autoExtFmt( self, *, ext : str = None, fmt : Format = None ) -> str:
1141
+ def auto_ext_fmt( self, *, ext : str = None, fmt : Format = None ) -> tuple[str]:
710
1142
  """
711
- Computes the effective extension and format based on inputs 'ext' and 'fmt', each of which defaults to the SubDir's current settings.
712
- Resolves '*' into the extension associated with the specified format.
713
- This function allows setting 'ext' also as a Format.
1143
+ Computes the effective extension and format based on inputs ``ext`` and ``fmt``,
1144
+ each of which defaults to the respective values of ``self``.
1145
+
1146
+ Resolves an ``ext`` of ``"*"`` into the extension associated with ``fmt``.
714
1147
 
715
- Returns (ext, fmt) where 'ext' contains the leading '.'
1148
+ Returns
1149
+ -------
1150
+ (ext, fmt) : tuple
1151
+ Here ``ext`` contains the leading ``'.'`` and ``fmt`` is
1152
+ of type :class:`cdxcore.subdir.Format`.
716
1153
  """
717
1154
  if isinstance(ext, Format):
718
1155
  verify( fmt is None or fmt == ext, "If 'ext' is a Format, then 'fmt' must match 'ext' or be None. Found '%s' and '%s', respectively.", ext, fmt, exception=ValueError )
@@ -724,8 +1161,8 @@ class SubDir(object):
724
1161
  return ext, fmt
725
1162
 
726
1163
  @property
727
- def cacheController(self):
728
- """ Returns an assigned CacheController, or None """
1164
+ def cache_controller(self):
1165
+ """ Returns an assigned :class:`cdxcore.subdir.CacheController`, or ``None`` """
729
1166
  return self._cctrl if not self._cctrl is None else default_cacheController
730
1167
 
731
1168
  # -- static helpers --
@@ -747,7 +1184,10 @@ class SubDir(object):
747
1184
 
748
1185
  @staticmethod
749
1186
  def _version_to_bytes( version : str ) -> bytearray:
750
- """ Convert string version to byte string of at most size MAX_VERSION_BINARY_LEN + 1 """
1187
+ """
1188
+ Convert string version to byte string of at most size
1189
+ :data:`cdxcore.subdir.SubDir.MAX_VERSION_BINARY_LEN` + 1
1190
+ """
751
1191
  if version is None:
752
1192
  return None
753
1193
  version_ = bytearray(version,'utf-8')
@@ -790,69 +1230,67 @@ class SubDir(object):
790
1230
 
791
1231
  # -- public utilities --
792
1232
 
793
- def fullFileName(self, key : str, *, ext : str = None) -> str:
1233
+ def full_file_name(self, file : str, *, ext : str = None) -> str:
794
1234
  """
795
1235
  Returns fully qualified file name.
796
- The function tests that 'key' does not contain directory information.
797
-
798
- If 'self' is None, then this function returns None
799
- If key is None then this function returns None
1236
+
1237
+ The function tests that ``file`` does not contain directory information.
800
1238
 
801
1239
  Parameters
802
1240
  ----------
803
- key : str
804
- Core file name, e.g. the 'key' in a data base sense
1241
+ file : str
1242
+ Core file name without path or extension.
805
1243
  ext : str
806
- If not None, use this extension rather than self.ext
1244
+ If not ``None``, use this extension rather than :attr:`cdxcore.subdir.SubDir.ext`.
807
1245
 
808
1246
  Returns
809
1247
  -------
810
- Fully qualified system file name
811
-
812
- [This function has an alias 'fullKeyName' for backward compatibility]
1248
+ Filename : str
1249
+ Fully qualified system file name.
1250
+ If ``self`` is ``None``, then this function returns ``None``; if ``file`` is ``None`` then this function also returns ``None``.
813
1251
  """
814
- if self._path is None or key is None:
1252
+ if self._path is None or file is None:
815
1253
  return None
816
- key = str(key)
817
- verify( len(key) > 0, "'key' cannot be empty")
1254
+ file = str(file)
1255
+ verify( len(file) > 0, "'file' cannot be empty")
818
1256
 
819
- sub, _ = os.path.split(key)
820
- verify( len(sub) == 0, "Key '%s' contains directory information", key)
1257
+ sub, _ = os.path.split(file)
1258
+ verify( len(sub) == 0, "Key '%s' contains directory information", file)
821
1259
 
822
- verify( key[0] != "!", "Key '%s' cannot start with '!' (this symbol indicates the temp directory)", key, exception=ValueError )
823
- verify( key[0] != "~", "Key '%s' cannot start with '~' (this symbol indicates the user's directory)", key, exception=ValueError )
1260
+ verify( file[0] != "!", "Key '%s' cannot start with '!' (this symbol indicates the temp directory)", file, exception=ValueError )
1261
+ verify( file[0] != "~", "Key '%s' cannot start with '~' (this symbol indicates the user's directory)", file, exception=ValueError )
824
1262
 
825
- ext = self.autoExt( ext )
826
- if len(ext) > 0 and key[-len(ext):] != ext:
827
- return self._path + key + ext
828
- return self._path + key
829
- fullKeyName = fullFileName # backwards compatibility
1263
+ ext = self.auto_ext( ext )
1264
+ if len(ext) > 0 and file[-len(ext):] != ext:
1265
+ return self._path + file + ext
1266
+ return self._path + file
1267
+ full_file_name = full_file_name # backwards compatibility
830
1268
 
831
1269
  @staticmethod
832
- def tempDir() -> str:
1270
+ def temp_dir() -> str:
833
1271
  """
834
- Return system temp directory. Short cut to tempfile.gettempdir()
835
- Result contains trailing '/'
1272
+ Return system temp directory. Short-cut to :func:`tempfile.gettempdir`.
1273
+ Result contains trailing ``'/'``.
836
1274
  """
837
1275
  d = tempfile.gettempdir()
838
1276
  assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-1", d)
839
1277
  return d + "/"
840
1278
 
841
1279
  @staticmethod
842
- def workingDir() -> str:
1280
+ def working_dir() -> str:
843
1281
  """
844
- Return current working directory. Short cut for os.getcwd()
845
- Result contains trailing '/'
1282
+ Return current working directory. Short-cut for :func:`os.getcwd`.
1283
+ Result contains trailing ``'/'``.
846
1284
  """
847
1285
  d = os.getcwd()
848
1286
  assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-2", d)
849
1287
  return d + "/"
850
1288
 
851
1289
  @staticmethod
852
- def userDir() -> str:
1290
+ def user_dir() -> str:
853
1291
  """
854
- Return current working directory. Short cut for os.path.expanduser('~')
855
- Result contains trailing '/'
1292
+ Return current working directory. Short-cut for :func:`os.path.expanduser` with parameter ``'~'``.
1293
+ Result contains trailing ``'/'``.
856
1294
  """
857
1295
  d = os.path.expanduser('~')
858
1296
  assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-3", d)
@@ -860,95 +1298,101 @@ class SubDir(object):
860
1298
 
861
1299
  # -- read --
862
1300
 
863
- def _read_reader( self, reader, key : str, default, raiseOnError : bool, *, ext : str = None ):
1301
+ def _read_reader( self, reader, file : str, default, raise_on_error : bool, *, ext : str = None ):
864
1302
  """
865
1303
  Utility function for read() and readLine()
866
1304
 
867
1305
  Parameters
868
1306
  ----------
869
- reader( key, fullFileName, default )
1307
+ reader( file, full_file_name, default )
870
1308
  A function which is called to read the file once the correct directory is identified
871
- key : key (for error messages, might include '/')
872
- fullFileName : full file name
1309
+ file : file (for error messages, might include '/')
1310
+ full_file_name : full file name
873
1311
  default value
874
- key : str or list
875
- str: fully qualified key
1312
+ file : str or list
1313
+ str: fully qualified file
876
1314
  list: list of fully qualified names
877
1315
  default :
878
1316
  default value. None is a valid default value
879
1317
  list : list of defaults for a list of keys
880
- raiseOnError : bool
1318
+ raise_on_error : bool
881
1319
  If True, and the file does not exist, throw exception
882
1320
  ext :
883
1321
  Extension or None for current extension.
884
1322
  list : list of extensions for a list of keys
885
1323
  """
886
1324
  # vector version
887
- if not isinstance(key,str):
888
- if not isinstance(key, Collection): raise ValueError(txtfmt( "'key' must be a string, or an interable object. Found type %s", type(key)))
889
- l = len(key)
1325
+ if not isinstance(file,str):
1326
+ if not isinstance(file, Collection): raise ValueError(txtfmt( "'file' must be a string, or an interable object. Found type %s", type(file)))
1327
+ l = len(file)
890
1328
  if default is None or isinstance(default,str) or not isinstance(default, Collection):
891
1329
  default = [ default ] * l
892
1330
  else:
893
- if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(default), l ))
1331
+ if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(default), l ))
894
1332
  if ext is None or isinstance(ext, str) or not isinstance(ext, Collection):
895
1333
  ext = [ ext ] * l
896
1334
  else:
897
- if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l ))
898
- return [ self._read_reader(reader=reader,key=k,default=d,raiseOnError=raiseOnError,ext=e) for k, d, e in zip(key,default,ext) ]
1335
+ if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l ))
1336
+ return [ self._read_reader(reader=reader,file=k,default=d,raise_on_error=raise_on_error,ext=e) for k, d, e in zip(file,default,ext) ]
899
1337
 
900
1338
  # deleted directory?
901
1339
  if self._path is None:
902
- verify( not raiseOnError, "Trying to read '%s' from an empty directory object", key, exception=NotADirectoryError)
1340
+ verify( not raise_on_error, "Trying to read '%s' from an empty directory object", file, exception=NotADirectoryError)
903
1341
  return default
904
1342
 
905
- # single key
906
- if len(key) == 0: raise ValueError(txtfmt("'key' missing (the filename)" ))
907
- sub, key_ = os.path.split(key)
1343
+ # single file
1344
+ if len(file) == 0: raise ValueError(txtfmt("'file' missing (the filename)" ))
1345
+ sub, key_ = os.path.split(file)
908
1346
  if len(sub) > 0:
909
- return self(sub)._read_reader(reader=reader,key=key_,default=default,raiseOnError=raiseOnError,ext=ext)
910
- if len(key_) == 0: ValueError(txtfmt("'key' %s indicates a directory, not a file", key))
1347
+ return self(sub)._read_reader(reader=reader,file=key_,default=default,raise_on_error=raise_on_error,ext=ext)
1348
+ if len(key_) == 0: ValueError(txtfmt("'file' %s indicates a directory, not a file", file))
911
1349
 
912
1350
  # don't try if directory doesn't exist
913
- fullFileName = self.fullFileName(key,ext=ext)
914
- if not self.pathExists():
915
- if raiseOnError:
916
- raise KeyError(key, fullFileName)
1351
+ full_file_name = self.full_file_name(file,ext=ext)
1352
+ if not self.path_exists():
1353
+ if raise_on_error:
1354
+ raise KeyError(file, full_file_name)
917
1355
  return default
918
1356
 
919
1357
  # does file exit?
920
- if not os.path.exists(fullFileName):
921
- if raiseOnError:
922
- raise KeyError(key,fullFileName)
1358
+ if not os.path.exists(full_file_name):
1359
+ if raise_on_error:
1360
+ raise KeyError(file,full_file_name)
923
1361
  return default
924
- if not os.path.isfile(fullFileName):
925
- raise IOError(txtfmt( "Cannot read %s: object exists, but is not a file (full path %s)", key, fullFileName ))
1362
+ if not os.path.isfile(full_file_name):
1363
+ raise IOError(txtfmt( "Cannot read '%s': object exists, but is not a file (full path %s)", file, full_file_name ))
926
1364
 
927
1365
  # read content
928
1366
  # delete existing files upon read error
929
1367
  try:
930
- return reader( key, fullFileName, default )
1368
+ return reader( file, full_file_name, default )
931
1369
  except EOFError as e:
932
1370
  try:
933
- os.remove(fullFileName)
934
- warn("Cannot read %s; file deleted (full path %s).\nError: %s",key,fullFileName, str(e))
1371
+ os.remove(full_file_name)
1372
+ warn("Cannot read '%s'; file deleted (full path '%s').\nError: %s",file,full_file_name, str(e))
935
1373
  except Exception as e:
936
- warn("Cannot read %s; attempt to delete file failed (full path %s): %s",key,fullFileName,str(e))
1374
+ warn("Cannot read '%s'; subsequent attempt to delete file failed (full path '%s''): %s",file,full_file_name,str(e))
937
1375
  except FileNotFoundError as e:
938
- if raiseOnError:
939
- raise KeyError(key, fullFileName, str(e)) from e
1376
+ if raise_on_error:
1377
+ raise KeyError(file, full_file_name, str(e)) from e
1378
+ except VersionError as e:
1379
+ if raise_on_error:
1380
+ raise e
1381
+ except VersionPresentError as e:
1382
+ if raise_on_error:
1383
+ raise e
940
1384
  except Exception as e:
941
- if raiseOnError:
942
- raise KeyError(key, fullFileName, str(e)) from e
1385
+ if raise_on_error:
1386
+ raise KeyError(file, full_file_name, str(e)) from e
943
1387
  except (ImportError, BaseException) as e:
944
- e.add_note( key )
945
- e.add_note( fullFileName )
1388
+ e.add_note( file )
1389
+ e.add_note( full_file_name )
946
1390
  raise e
947
1391
  return default
948
1392
 
949
- def _read( self, key : str,
1393
+ def _read( self, file : str,
950
1394
  default = None,
951
- raiseOnError : bool = False,
1395
+ raise_on_error : bool = False,
952
1396
  *,
953
1397
  version : str = None,
954
1398
  ext : str = None,
@@ -957,18 +1401,34 @@ class SubDir(object):
957
1401
  handle_version : int = 0
958
1402
  ):
959
1403
  """ See read() """
960
- ext, fmt = self.autoExtFmt(ext=ext, fmt=fmt)
1404
+ ext, fmt = self.auto_ext_fmt(ext=ext, fmt=fmt)
961
1405
  version = str(version) if not version is None else None
962
1406
  version = version if handle_version != SubDir.VER_RETURN else ""
963
1407
  assert not fmt == self.EXT_FMT_AUTO, ("'fmt' is '*' ...?")
964
1408
 
965
1409
  if version is None and fmt in [Format.BLOSC, Format.GZIP]:
966
- version = ""
1410
+ # blosc and gzip have unexpected side effects
1411
+ # a version is attempted to be read but is not present
1412
+ # (e.g. blosc causes a MemoryError)
1413
+ version = ""
967
1414
 
968
- def reader( key, fullFileName, default ):
1415
+ def reader( file, full_file_name, default ):
969
1416
  test_version = "(unknown)"
970
- if fmt == Format.PICKLE or fmt == Format.BLOSC:
971
- with open(fullFileName,"rb") as f:
1417
+
1418
+ def handle_pickle_error(e):
1419
+ err = "invalid load key, '\\x03'."
1420
+ if not version is None or e.args[0] != err:
1421
+ print("####", e.args)
1422
+ raise e
1423
+ raise VersionPresentError(
1424
+ f"Error reading '{full_file_name}': encountered an unpickling error '{err}' "+\
1425
+ f"while attempting to read file using {str(fmt)}. "+\
1426
+ "This is likely caused by attempting to read a file which was written with "+\
1427
+ "version information without providing a test version during read(). If the version is of the file "+\
1428
+ "is not important, use `version=\"*\"'", e) from e
1429
+ if fmt == Format.PICKLE:
1430
+ # we do not read any version information if not requested
1431
+ with open(full_file_name,"rb") as f:
972
1432
  # handle version as byte string
973
1433
  ok = True
974
1434
  if not version is None:
@@ -981,37 +1441,55 @@ class SubDir(object):
981
1441
  if ok:
982
1442
  if handle_version == SubDir.VER_CHECK:
983
1443
  return True
984
- if fmt == Format.PICKLE:
1444
+ try:
985
1445
  data = pickle.load(f)
986
- elif fmt == Format.BLOSC:
987
- if blosc is None:
988
- raise ModuleNotFoundError("blosc", "'blosc' not found.")
989
- nnbb = f.read(2)
990
- num_blocks = int.from_bytes( nnbb, 'big', signed=False )
991
- data = bytearray()
992
- for i in range(num_blocks):
993
- blockl = int.from_bytes( f.read(6), 'big', signed=False )
994
- if blockl>0:
995
- bdata = blosc.decompress( f.read(blockl) )
996
- data += bdata
997
- del bdata
1446
+ except pickle.UnpicklingError as e:
1447
+ handle_pickle_error(e)
1448
+ return data
1449
+
1450
+ elif fmt == Format.BLOSC:
1451
+ # we do not write
1452
+ # any version information if not requested
1453
+ with open(full_file_name,"rb") as f:
1454
+ # handle version as byte string
1455
+ ok = True
1456
+ if not version is None: # it's never None
1457
+ test_len = int( f.read( 1 )[0] )
1458
+ test_version = f.read(test_len)
1459
+ test_version = test_version.decode("utf-8")
1460
+ if handle_version == SubDir.VER_RETURN:
1461
+ return test_version
1462
+ ok = (version == "*" or test_version == version)
1463
+ if ok:
1464
+ if handle_version == SubDir.VER_CHECK:
1465
+ return True
1466
+ nnbb = f.read(2)
1467
+ num_blocks = int.from_bytes( nnbb, 'big', signed=False )
1468
+ data = bytearray()
1469
+ for i in range(num_blocks):
1470
+ blockl = int.from_bytes( f.read(6), 'big', signed=False )
1471
+ if blockl>0:
1472
+ bdata = blosc.decompress( f.read(blockl) )
1473
+ data += bdata
1474
+ del bdata
1475
+ try:
998
1476
  data = pickle.loads(data)
999
- else:
1000
- raise NotImplementedError(fmt, txtfmt("Unkown format '%s'", fmt))
1477
+ except pickle.UnpicklingError as e:
1478
+ handle_pickle_error(e)
1001
1479
  return data
1002
1480
 
1003
1481
  elif fmt == Format.GZIP:
1004
- if gzip is None:
1005
- raise ModuleNotFoundError("gzip", "'gzip' not found'")
1006
- with gzip.open(fullFileName,"rb") as f:
1482
+ # always read version information
1483
+ with gzip.open(full_file_name,"rb") as f:
1007
1484
  # handle version as byte string
1008
- ok = True
1009
- test_len = int( f.read( 1 )[0] )
1010
- test_version = f.read(test_len)
1011
- test_version = test_version.decode("utf-8")
1012
- if handle_version == SubDir.VER_RETURN:
1013
- return test_version
1014
- ok = (version == "*" or test_version == version)
1485
+ ok = True
1486
+ if not version is None: # it's never None
1487
+ test_len = int( f.read( 1 )[0] )
1488
+ test_version = f.read(test_len)
1489
+ test_version = test_version.decode("utf-8")
1490
+ if handle_version == SubDir.VER_RETURN:
1491
+ return test_version
1492
+ ok = (version == "*" or test_version == version)
1015
1493
  if ok:
1016
1494
  if handle_version == SubDir.VER_CHECK:
1017
1495
  return True
@@ -1019,13 +1497,16 @@ class SubDir(object):
1019
1497
  return data
1020
1498
 
1021
1499
  elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
1022
- with open(fullFileName,"rt",encoding="utf-8") as f:
1500
+ # only read version information if requested
1501
+ with open(full_file_name,"rt",encoding="utf-8") as f:
1023
1502
  # handle versioning
1024
1503
  ok = True
1025
1504
  if not version is None:
1026
1505
  test_version = f.readline()
1027
1506
  if test_version[:2] != "# ":
1028
- raise EnvironmentError("Error reading '%s': file does not appear to contain a version (it should start with '# ')" % fullFileName)
1507
+ raise VersionError("Error reading '{full_file_name}' using {fmt}: file does not appear to contain a version (it should start with '# ')",
1508
+ version_found="",
1509
+ version_expected=version)
1029
1510
  test_version = test_version[2:]
1030
1511
  if test_version[-1:] == "\n":
1031
1512
  test_version = test_version[:-1]
@@ -1037,8 +1518,6 @@ class SubDir(object):
1037
1518
  return ok
1038
1519
  # read
1039
1520
  if fmt == Format.JSON_PICKLE:
1040
- if jsonpickle is None:
1041
- raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found'")
1042
1521
  return jsonpickle.decode( f.read() )
1043
1522
  else:
1044
1523
  assert fmt == Format.JSON_PLAIN, ("Internal error: unknown Format", fmt)
@@ -1048,25 +1527,33 @@ class SubDir(object):
1048
1527
 
1049
1528
  # arrive here if version is wrong
1050
1529
  # delete a wrong version
1530
+
1531
+ if version == "":
1532
+ raise VersionPresentError(f"Error reading '{full_file_name}' using {fmt}: the file has version '{test_version}', but was attempted to be read without "+\
1533
+ "a test version. If you intended to accept any version, use 'version=\"*\"' instead.")
1534
+
1051
1535
  deleted = ""
1052
1536
  if delete_wrong_version:
1053
1537
  try:
1054
- os.remove(fullFileName)
1538
+ os.remove(full_file_name)
1055
1539
  e = None
1056
1540
  except Exception as e_:
1057
1541
  e = str(e_)
1058
1542
  if handle_version == SubDir.VER_CHECK:
1059
1543
  return False
1060
- if not raiseOnError:
1544
+ if not raise_on_error:
1061
1545
  return default
1062
1546
  deleted = " (file was deleted)" if e is None else " (attempt to delete file failed: %s)" % e
1063
- raise EnvironmentError("Error reading '%s': found version '%s' not '%s'%s" % (fullFileName,str(test_version),str(version),deleted))
1547
+ raise VersionError( f"Error reading '{full_file_name}' using {fmt}: found version '{test_version}' not '{version}'{deleted}",
1548
+ version_found=test_version,
1549
+ version_expected=version
1550
+ )
1064
1551
 
1065
- return self._read_reader( reader=reader, key=key, default=default, raiseOnError=raiseOnError, ext=ext )
1552
+ return self._read_reader( reader=reader, file=file, default=default, raise_on_error=raise_on_error, ext=ext )
1066
1553
 
1067
- def read( self, key : str,
1554
+ def read( self, file : str,
1068
1555
  default = None,
1069
- raiseOnError : bool = False,
1556
+ raise_on_error : bool = False,
1070
1557
  *,
1071
1558
  version : str = None,
1072
1559
  delete_wrong_version : bool = True,
@@ -1074,296 +1561,323 @@ class SubDir(object):
1074
1561
  fmt : Format = None
1075
1562
  ):
1076
1563
  """
1077
- Read pickled data from 'key' if the file exists, or return 'default'
1078
- -- Supports 'key' containing directories
1079
- -- Supports 'key' (and default, ext) being iterable.
1080
- In this case any any iterable 'default' except strings are considered accordingly.
1081
- In order to have a unit default which is an iterable, you will have to wrap it in another iterable, e.g.
1082
- E.g.:
1083
- keys = ['file1', 'file2']
1084
-
1085
- sd.read( keys )
1086
- --> works, both are using default None
1087
-
1088
- sd.read( keys, 1 )
1089
- --> works, both are using default '1'
1564
+ Read data from a file if the file exists, or return ``default``.
1090
1565
 
1091
- sd.read( keys, [1,2] )
1092
- --> works, defaults 1 and 2, respectively
1566
+ * Supports ``file`` containing directory information.
1567
+ * Supports ``file`` (and ``default``as well as ``ext``) being iterable.
1568
+ Examples::
1569
+
1570
+ from cdxcore.subdir import SubDir
1571
+ files = ['file1', 'file2']
1572
+ sd = SubDir("!/test")
1093
1573
 
1094
- sd.read( keys, [1] )
1095
- --> produces error as len(keys) != len(default)
1574
+ sd.read( files ) # both files are using default None
1575
+ sd.read( files, 1 ) # both files are using default '1'
1576
+ sd.read( files, [1,2] ) # files use defaults 1 and 2, respectively
1096
1577
 
1097
- Strings are iterable but are treated as single value.
1098
- Therefore
1099
- sd.read( keys, '12' )
1100
- means the default value '12' is used for both files.
1101
- Use
1102
- sd.read( keys, ['1','2'] )
1103
- in case the intention was using '1' and '2', respectively.
1578
+ sd.read( files, [1] ) # produces error as len(keys) != len([1])
1104
1579
 
1105
- Returns the read object, or a list of objects if 'key' was iterable.
1106
- If the current directory is 'None', then behaviour is as if the file did not exist.
1580
+ Strings are iterable but are treated as single value.
1581
+ Therefore::
1582
+
1583
+ sd.read( files, '12' ) # the default value '12' is used for both files
1584
+ sd.read( files, ['1','2'] ) # use defaults '1' and '2', respectively
1107
1585
 
1108
1586
  Parameters
1109
1587
  ----------
1110
- key : str
1111
- A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
1588
+ file : str
1589
+ A file name or a list thereof. ``file`` may contain subdirectories.
1590
+
1112
1591
  default :
1113
- Default value, or default values if key is a list
1114
- raiseOnError : bool
1592
+ Default value, or default values if ``file`` is a list.
1593
+
1594
+ raise_on_error : bool
1115
1595
  Whether to raise an exception if reading an existing file failed.
1116
1596
  By default this function fails silently and returns the default.
1597
+
1117
1598
  version : str
1118
- If not None, specifies the version of the current code base.
1599
+ If not ``None``, specifies the version of the current code base.
1600
+
1119
1601
  In this case, this version will be compared to the version of the file being read.
1120
- If they do not match, read fails (either by returning default or throwing an exception).
1121
- You can specify version "*" to read any version. This is distrinct from reading a file without version.
1602
+ If they do not match, read fails (either by returning default or throwing a :class:`cdxcore.version.VersionError` exception).
1603
+
1604
+ You can specify version ``"*"`` to accept any version.
1605
+ Note that this is distinct
1606
+ to using ``None`` which stipulates that the file should not
1607
+ have version information.
1608
+
1122
1609
  delete_wrong_version : bool
1123
- If True, and if a wrong version was found, delete the file.
1610
+ If ``True``, and if a wrong version was found, delete the file.
1611
+
1124
1612
  ext : str
1125
- Extension overwrite, or a list thereof if key is a list
1126
- Set to:
1127
- -- None to use directory's default
1128
- -- '*' to use the extension implied by 'fmt'
1129
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1130
- fmt : Format
1131
- File format or None to use the directory's default.
1132
- Note that 'fmt' cannot be a list even if 'key' is.
1133
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1613
+ Extension overwrite, or a list thereof if ``file`` is a list.
1614
+
1615
+ Use:
1616
+
1617
+ * ``None`` to use directory's default.
1618
+ * ``'*'`` to use the extension implied by ``fmt``.
1619
+ * ``""`` to turn of extension management.
1620
+
1621
+ fmt : :class:`cdxcore.subdir.Format`
1622
+ File :class:`cdxcore.subdir.Format` or ``None`` to use the directory's default.
1623
+
1624
+ Note:
1625
+
1626
+ * ``fmt`` cannot be a list even if ``file`` is.
1627
+ * Unless ``ext`` or the SubDir's extension is ``'*'``, changing the format does not automatically change the extension.
1134
1628
 
1135
1629
  Returns
1136
1630
  -------
1137
- For a single 'key': Content of the file if successfully read, or 'default' otherwise.
1138
- If 'key' is a list: list of contents.
1631
+ Content
1632
+ For a single ``file`` returns the content of the file if successfully read, or ``default`` otherwise.
1633
+ If ``file``` is a list: list of contents.
1634
+
1635
+ Raises
1636
+ ------
1637
+ :class:`cdxcore.version.VersionError`:
1638
+ If the file's version did not match the ``version`` provided.
1639
+
1139
1640
  """
1140
- return self._read( key=key,
1641
+ return self._read( file=file,
1141
1642
  default=default,
1142
- raiseOnError=raiseOnError,
1643
+ raise_on_error=raise_on_error,
1143
1644
  version=version,
1144
1645
  ext=ext,
1145
1646
  fmt=fmt,
1146
1647
  delete_wrong_version=delete_wrong_version,
1147
1648
  handle_version=SubDir.VER_NORMAL )
1148
1649
 
1149
- get = read # backwards compatibility
1150
-
1151
- def is_version( self, key : str, version : str = None, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
1650
+ def is_version( self, file : str, version : str = None, raise_on_error : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
1152
1651
  """
1153
- Compares the version of 'key' with 'version'.
1652
+ Tests the version of a file.
1154
1653
 
1155
1654
  Parameters
1156
1655
  ----------
1157
- key : str
1158
- A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
1656
+ file : str
1657
+ A filename, or a list thereof.
1658
+
1159
1659
  version : str
1160
- Specifies the version of the current code base to compare with.
1161
- You can use '*' to match any version
1660
+ Specifies the version to compare the file's version with.
1661
+
1662
+ You can use ``"*"`` to match any version.
1162
1663
 
1163
- raiseOnError : bool
1664
+ raise_on_error : bool
1164
1665
  Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
1165
1666
  By default this function fails silently and returns the default.
1667
+
1166
1668
  delete_wrong_version : bool
1167
- If True, and if a wrong version was found, delete the file.
1669
+ If True, and if a wrong version was found, delete ``file``.
1670
+
1168
1671
  ext : str
1169
- Extension overwrite, or a list thereof if key is a list.
1672
+ Extension overwrite, or a list thereof if file is a list.
1673
+
1170
1674
  Set to:
1171
- -- None to use directory's default
1172
- -- '*' to use the extension implied by 'fmt'
1173
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1174
- fmt : Format
1175
- File format or None to use the directory's default.
1176
- Note that 'fmt' cannot be a list even if 'key' is.
1177
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1675
+
1676
+ * ``None`` to use directory's default.
1677
+ * ``"*"`` to use the extension implied by ``fmt``.
1678
+ * ``""`` for no extension.
1679
+
1680
+ fmt : :class:`cdxcore.subdir.Format`
1681
+ File format or ``None`` to use the directory's default.
1682
+ Note that ``fmt`` cannot be a list even if ``file`` is.
1178
1683
 
1179
1684
  Returns
1180
1685
  -------
1181
- Returns True only if the file exists and has the correct version.
1686
+ Status : bool
1687
+ Returns `True` only if the file exists, has version information, and its version is equal to ``version``.
1182
1688
  """
1183
- return self._read( key=key,default=False,raiseOnError=raiseOnError,version=version,ext=ext,fmt=fmt,delete_wrong_version=delete_wrong_version,handle_version=SubDir.VER_CHECK )
1689
+ return self._read( file=file,default=False,raise_on_error=raise_on_error,version=version,ext=ext,fmt=fmt,delete_wrong_version=delete_wrong_version,handle_version=SubDir.VER_CHECK )
1184
1690
 
1185
- def get_version( self, key : str, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None ):
1691
+ def get_version( self, file : str, raise_on_error : bool = False, *, ext : str = None, fmt : Format = None ):
1186
1692
  """
1187
- Returns the version ID stored in 'key'.
1693
+ Returns a version stored in a file.
1694
+
1188
1695
  This requires that the file has previously been saved with a version.
1189
- Otherwise this function will return unpredictable results.
1696
+ Otherwise this function will have unpredictable results.
1190
1697
 
1191
1698
  Parameters
1192
1699
  ----------
1193
- key : str
1194
- A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
1195
- raiseOnError : bool
1700
+ file : str
1701
+ A filename, or a list thereof.
1702
+
1703
+ raise_on_error : bool
1196
1704
  Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
1197
1705
  By default this function fails silently and returns the default.
1706
+
1707
+ delete_wrong_version : bool
1708
+ If ``True``, and if a wrong version was found, delete ``file``.
1709
+
1198
1710
  ext : str
1199
- Extension overwrite, or a list thereof if key is a list.
1711
+ Extension overwrite, or a list thereof if ``file`` is a list.
1712
+
1200
1713
  Set to:
1201
- -- None to use directory's default
1202
- -- '*' to use the extension implied by 'fmt'
1203
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1204
- fmt : Format
1205
- File format or None to use the directory's default.
1206
- Note that 'fmt' cannot be a list even if 'key' is.
1207
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1714
+
1715
+ * ``None`` to use directory's default.
1716
+ * ``"*"`` to use the extension implied by ``fmt``.
1717
+ * ``""`` for no extension.
1718
+
1719
+ fmt : :class:`cdxcore.subdir.Format`
1720
+ File format or ``None`` to use the directory's default.
1721
+ Note that ``fmt`` cannot be a list even if ``file`` is.
1208
1722
 
1209
1723
  Returns
1210
1724
  -------
1211
- Version ID.
1725
+ version : str
1212
1726
  """
1213
- return self._read( key=key,default=None,raiseOnError=raiseOnError,version="",ext=ext,fmt=fmt,delete_wrong_version=False,handle_version=SubDir.VER_RETURN )
1727
+ return self._read( file=file,default=None,raise_on_error=raise_on_error,version="",ext=ext,fmt=fmt,delete_wrong_version=False,handle_version=SubDir.VER_RETURN )
1214
1728
 
1215
- def readString( self, key : str, default = None, raiseOnError : bool = False, *, ext : str = None ) -> str:
1729
+ def read_string( self, file : str, default = None, raise_on_error : bool = False, *, ext : str = None ) -> str:
1216
1730
  """
1217
- Reads text from 'key' or returns 'default'. Removes trailing EOLs
1218
- -- Supports 'key' containing directories#
1219
- -- Supports 'key' being iterable. In this case any 'default' can be a list, too.
1220
-
1221
- Returns the read string, or a list of strings if 'key' was iterable.
1222
- If the current directory is 'None', then behaviour is as if the file did not exist.
1223
-
1224
- Use 'ext' to specify the extension.
1225
- You cannot use 'ext' to specify a format as the format is plain text.
1226
- If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
1731
+ Reads text from a file. Removes trailing EOLs.
1732
+
1733
+ Returns the read string, or a list of strings if ``file`` was iterable.
1227
1734
  """
1228
1735
  verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext)
1229
1736
  ext = ext if not ext is None else self._ext
1230
1737
  ext = ext if ext != self.EXT_FMT_AUTO else ".txt"
1231
1738
 
1232
- def reader( key, fullFileName, default ):
1233
- with open(fullFileName,"rt",encoding="utf-8") as f:
1739
+ def reader( file, full_file_name, default ):
1740
+ with open(full_file_name,"rt",encoding="utf-8") as f:
1234
1741
  line = f.readline()
1235
1742
  if len(line) > 0 and line[-1] == '\n':
1236
1743
  line = line[:-1]
1237
1744
  return line
1238
- return self._read_reader( reader=reader, key=key, default=default, raiseOnError=raiseOnError, ext=ext )
1745
+ return self._read_reader( reader=reader, file=file, default=default, raise_on_error=raise_on_error, ext=ext )
1239
1746
 
1240
1747
  # -- write --
1241
1748
 
1242
- def _write( self, writer, key : str, obj, raiseOnError : bool, *, ext : str = None ) -> bool:
1749
+ def _write( self, writer, file : str, obj, raise_on_error : bool, *, ext : str = None ) -> bool:
1243
1750
  """ Utility function for write() and writeLine() """
1244
1751
  if self._path is None:
1245
- raise EOFError("Cannot write to '%s': current directory is not specified" % key)
1246
- self.createDirectory()
1752
+ raise EOFError("Cannot write to '%s': current directory is not specified" % file)
1753
+ self.create_directory()
1247
1754
 
1248
1755
  # vector version
1249
- if not isinstance(key,str):
1250
- if not isinstance(key, Collection): error( "'key' must be a string or an interable object. Found type %s", type(key))
1251
- l = len(key)
1756
+ if not isinstance(file,str):
1757
+ if not isinstance(file, Collection): error( "'file' must be a string or an interable object. Found type %s", type(file), exception=ValueError)
1758
+ l = len(file)
1252
1759
  if obj is None or isinstance(obj,str) or not isinstance(obj, Collection):
1253
1760
  obj = [ obj ] * l
1254
1761
  else:
1255
- if len(obj) != l: error("'obj' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(obj), l )
1762
+ if len(obj) != l: error("'obj' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(obj), l, exception=ValueError )
1256
1763
  if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1257
1764
  ext = [ ext ] * l
1258
1765
  else:
1259
- if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1766
+ if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l, exception=ValueError )
1260
1767
  ok = True
1261
- for k,o,e in zip(key,obj,ext):
1262
- ok |= self._write( writer, k, o, raiseOnError=raiseOnError, ext=e )
1768
+ for k,o,e in zip(file,obj,ext):
1769
+ ok |= self._write( writer, k, o, raise_on_error=raise_on_error, ext=e )
1263
1770
  return ok
1264
1771
 
1265
- # single key
1266
- if not len(key) > 0: error("'key is empty (the filename)" )
1267
- sub, key = os.path.split(key)
1268
- if len(key) == 0: error("'key '%s' refers to a directory, not a file", key)
1772
+ # single file
1773
+ if not len(file) > 0: error("'file is empty (the filename)" )
1774
+ sub, file = os.path.split(file)
1775
+ if len(file) == 0: error("'file '%s' refers to a directory, not a file", file)
1269
1776
  if len(sub) > 0:
1270
- return SubDir(sub,parent=self)._write(writer,key,obj, raiseOnError=raiseOnError,ext=ext )
1777
+ return SubDir(sub,parent=self)._write(writer,file,obj, raise_on_error=raise_on_error,ext=ext )
1271
1778
 
1272
1779
  # write to temp file, then rename into target file
1273
1780
  # this reduces collision when i/o operations are slow
1274
- fullFileName = self.fullKeyName(key,ext=ext)
1275
- tmp_file = uniqueHash48( [ key, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
1781
+ full_file_name = self.full_file_name(file,ext=ext)
1782
+ tmp_file = unique_hash48( [ file, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
1276
1783
  tmp_i = 0
1277
- fullTmpFile = self.fullKeyName(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
1784
+ fullTmpFile = self.full_file_name(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
1278
1785
  while os.path.exists(fullTmpFile):
1279
- fullTmpFile = self.fullKeyName(tmp_file) + "." + str(tmp_i) + ".tmp"
1786
+ fullTmpFile = self.full_file_name(tmp_file) + "." + str(tmp_i) + ".tmp"
1280
1787
  tmp_i += 1
1281
1788
  if tmp_i >= 10:
1282
- raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % ( fullFileName, fullTmpFile ) )
1789
+ raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % ( full_file_name, fullTmpFile ) )
1283
1790
 
1284
1791
  # write
1285
- if not writer( key, fullTmpFile, obj ):
1792
+ if not writer( file, fullTmpFile, obj ):
1286
1793
  return False
1287
- assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile, fullFileName)
1794
+ assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile, full_file_name)
1288
1795
  try:
1289
- if os.path.exists(fullFileName):
1290
- os.remove(fullFileName)
1291
- os.rename(fullTmpFile, fullFileName)
1796
+ if os.path.exists(full_file_name):
1797
+ os.remove(full_file_name)
1798
+ os.rename(fullTmpFile, full_file_name)
1292
1799
  except Exception as e:
1293
1800
  os.remove(fullTmpFile)
1294
- if raiseOnError:
1801
+ if raise_on_error:
1295
1802
  raise e
1296
1803
  return False
1297
1804
  return True
1298
1805
 
1299
- def write( self, key : str,
1806
+ def write( self, file : str,
1300
1807
  obj,
1301
- raiseOnError : bool = True,
1808
+ raise_on_error : bool = True,
1302
1809
  *,
1303
1810
  version : str = None,
1304
1811
  ext : str = None,
1305
1812
  fmt : Format = None ) -> bool:
1306
1813
  """
1307
- Pickles 'obj' into key.
1308
- -- Supports 'key' containing directories
1309
- -- Supports 'key' being a list.
1310
- In this case, if obj is an iterable it is considered the list of values for the elements of 'keys'
1311
- If 'obj' is not iterable, it will be written into all 'key's
1312
-
1313
- keys = ['file1', 'file2']
1314
-
1315
- sd.write( keys, 1 )
1316
- --> works, writes '1' in both files.
1317
-
1318
- sd.read( keys, [1,2] )
1319
- --> works, writes 1 and 2, respectively
1814
+ Writes an object to file.
1815
+
1816
+ * Supports ``file`` containing directories.
1817
+ * Supports ``file`` being a list.
1818
+ In this case, if ``obj`` is an iterable it is considered the list of values for the elements of ``file``.
1819
+ If ``obj`` is not iterable, it will be written into all files from ``file``::
1320
1820
 
1321
- sd.read( keys, "12" )
1322
- --> works, writes '12' in both files
1821
+ from cdxcore.subdir import SubDir
1323
1822
 
1324
- sd.write( keys, [1] )
1325
- --> produces error as len(keys) != len(obj)
1823
+ keys = ['file1', 'file2']
1824
+ sd = SubDir("!/test")
1825
+ sd.write( keys, 1 ) # works, writes '1' in both files.
1826
+ sd.write( keys, [1,2] ) # works, writes 1 and 2, respectively
1827
+ sd.write( keys, "12" ) # works, writes '12' in both files
1828
+ sd.write( keys, [1] ) # produces error as len(keys) != len(obj)
1326
1829
 
1327
- If the current directory is 'None', then the function throws an EOFError exception
1830
+ If the current directory is ``None``, then the function raises an :class:`EOFError` exception.
1328
1831
 
1329
1832
  Parameters
1330
1833
  ----------
1331
- key : str
1332
- Core filename ("key"), or list thereof
1834
+ file : str
1835
+ Core filename, or list thereof.
1836
+
1333
1837
  obj :
1334
- Object to write, or list thereof if 'key' is a list
1335
- raiseOnError : bool
1336
- If False, this function will return False upon failure
1838
+ Object to write, or list thereof if ``file`` is a list.
1839
+
1840
+ raise_on_error : bool
1841
+ If ``False``, this function will return ``False`` upon failure.
1842
+
1337
1843
  version : str
1338
- If not None, specifies the version of the code which generated 'obj'.
1844
+ If not ``None``, specifies the version of the code which generated ``obj``.
1339
1845
  This version will be written to the beginning of the file.
1846
+
1340
1847
  ext : str
1341
- Extension, or list thereof if 'key' is a list.
1342
- Set to:
1343
- -- None to use directory's default
1344
- -- '*' to use the extension implied by 'fmt'
1345
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1346
- fmt : Format
1347
- File format or None to use the directory's default.
1348
- Note that 'fmt' cannot be a list even if 'key' is.
1349
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1848
+ Extension, or list thereof if ``file`` is a list.
1849
+
1850
+ * Use ``None`` to use directory's default extension.
1851
+ * Use ``"*"`` to use the extension implied by ``fmt``.
1852
+
1853
+ fmt : :class:`cdxcore.subdir.Format`
1854
+ File format or ``None`` to use the directory's default.
1855
+ Note that ``fmt`` cannot be a list even if ``file`` is.
1856
+ Note that unless ``ext`` or the SubDir's extension is '*',
1857
+ changing the format does not automatically change the extension used.
1350
1858
 
1351
1859
  Returns
1352
1860
  -------
1353
- Boolean to indicate success if raiseOnError is False.
1861
+ Success : bool
1862
+ Boolean to indicate success if ``raise_on_error`` is ``False``.
1354
1863
  """
1355
- ext, fmt = self.autoExtFmt(ext=ext, fmt=fmt)
1864
+ ext, fmt = self.auto_ext_fmt(ext=ext, fmt=fmt)
1356
1865
  version = str(version) if not version is None else None
1357
1866
  assert ext != self.EXT_FMT_AUTO, ("'ext' is '*'...?")
1358
1867
 
1359
1868
  if version=='*': error("You cannot write version '*'. Use None to write a file without version.")
1869
+
1360
1870
  if version is None and fmt in [Format.BLOSC, Format.GZIP]:
1361
- version = ""
1871
+ # blosc and gzip have unexpected side effects
1872
+ # a version is attempted to be read but is not present
1873
+ # (e.g. blosc causes a MemoryError)
1874
+ version = ""
1362
1875
 
1363
- def writer( key, fullFileName, obj ):
1876
+ def writer( file, full_file_name, obj ):
1364
1877
  try:
1365
- if fmt == Format.PICKLE or fmt == Format.BLOSC:
1366
- with open(fullFileName,"wb") as f:
1878
+ if fmt == Format.PICKLE:
1879
+ # only if a version is provided write it into the file
1880
+ with open(full_file_name,"wb") as f:
1367
1881
  # handle version as byte string
1368
1882
  if not version is None:
1369
1883
  version_ = bytearray(version, "utf-8")
@@ -1372,35 +1886,41 @@ class SubDir(object):
1372
1886
  len8[0] = len(version_)
1373
1887
  f.write(len8)
1374
1888
  f.write(version_)
1375
- if fmt == Format.PICKLE:
1376
- pickle.dump(obj,f,-1)
1377
- else:
1378
- assert fmt == fmt.BLOSC, ("Internal error: unknown format", fmt)
1379
- if blosc is None:
1380
- raise ModuleNotFoundError("blosc", "'blosc' not found")
1381
- pdata = pickle.dumps(obj) # returns data as a bytes object
1382
- del obj
1383
- len_data = len(pdata)
1384
- num_blocks = max(0,len_data-1) // BLOSC_MAX_USE + 1
1385
- f.write(num_blocks.to_bytes(2, 'big', signed=False))
1386
- for i in range(num_blocks):
1387
- start = i*BLOSC_MAX_USE
1388
- end = min(len_data,start+BLOSC_MAX_USE)
1389
- assert end>start, ("Internal error; nothing to write")
1390
- block = blosc.compress( pdata[start:end] )
1391
- blockl = len(block)
1392
- f.write( blockl.to_bytes(6, 'big', signed=False) )
1393
- if blockl > 0:
1394
- f.write( block )
1395
- del block
1396
- del pdata
1889
+ pickle.dump(obj,f,-1)
1890
+
1891
+ elif fmt == Format.BLOSC:
1892
+ # only if a version is provided write it into the file
1893
+ with open(full_file_name,"wb") as f:
1894
+ # handle version as byte string
1895
+ if not version is None: # it's never None
1896
+ version_ = bytearray(version, "utf-8")
1897
+ if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
1898
+ len8 = bytearray(1)
1899
+ len8[0] = len(version_)
1900
+ f.write(len8)
1901
+ f.write(version_)
1902
+ pdata = pickle.dumps(obj) # returns data as a bytes object
1903
+ del obj
1904
+ len_data = len(pdata)
1905
+ num_blocks = max(0,len_data-1) // BLOSC_MAX_USE + 1
1906
+ f.write(num_blocks.to_bytes(2, 'big', signed=False))
1907
+ for i in range(num_blocks):
1908
+ start = i*BLOSC_MAX_USE
1909
+ end = min(len_data,start+BLOSC_MAX_USE)
1910
+ assert end>start, ("Internal error; nothing to write")
1911
+ block = blosc.compress( pdata[start:end] )
1912
+ blockl = len(block)
1913
+ f.write( blockl.to_bytes(6, 'big', signed=False) )
1914
+ if blockl > 0:
1915
+ f.write( block )
1916
+ del block
1917
+ del pdata
1397
1918
 
1398
1919
  elif fmt == Format.GZIP:
1399
- if gzip is None:
1400
- raise ModuleNotFoundError("gzip", "'gzip' not found")
1401
- with gzip.open(fullFileName,"wb") as f:
1920
+ # only if a version is provided write it into the file
1921
+ with gzip.open(full_file_name,"wb") as f:
1402
1922
  # handle version as byte string
1403
- if not version is None:
1923
+ if not version is None: # it's never None
1404
1924
  version_ = bytearray(version, "utf-8")
1405
1925
  if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
1406
1926
  len8 = bytearray(1)
@@ -1410,12 +1930,11 @@ class SubDir(object):
1410
1930
  pickle.dump(obj,f,-1)
1411
1931
 
1412
1932
  elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
1413
- with open(fullFileName,"wt",encoding="utf-8") as f:
1933
+ # only if a version is provided write it into the file
1934
+ with open(full_file_name,"wt",encoding="utf-8") as f:
1414
1935
  if not version is None:
1415
1936
  f.write("# " + version + "\n")
1416
1937
  if fmt == Format.JSON_PICKLE:
1417
- if jsonpickle is None:
1418
- raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found")
1419
1938
  f.write( jsonpickle.encode(obj) )
1420
1939
  else:
1421
1940
  assert fmt == Format.JSON_PLAIN, ("Internal error: invalid Format", fmt)
@@ -1424,27 +1943,21 @@ class SubDir(object):
1424
1943
  else:
1425
1944
  raise NotImplementedError(fmt, txtfmt("Internal error: invalid format '%s'", fmt))
1426
1945
  except Exception as e:
1427
- if raiseOnError:
1946
+ if raise_on_error:
1428
1947
  raise e
1429
1948
  return False
1430
1949
  return True
1431
- return self._write( writer=writer, key=key, obj=obj, raiseOnError=raiseOnError, ext=ext )
1950
+ return self._write( writer=writer, file=file, obj=obj, raise_on_error=raise_on_error, ext=ext )
1432
1951
 
1433
- set = write
1434
-
1435
- def writeString( self, key : str, line : str, raiseOnError : bool = True, *, ext : str = None ) -> bool:
1952
+ def write_string( self, file : str, line : str, raise_on_error : bool = True, *, ext : str = None ) -> bool:
1436
1953
  """
1437
- Writes 'line' into key. A trailing EOL will not be read back
1438
- -- Supports 'key' containing directories
1439
- -- Supports 'key' being a list.
1440
- In this case, line can either be the same value for all key's or a list, too.
1441
-
1442
- If the current directory is 'None', then the function throws an EOFError exception
1443
- See additional comments for write()
1954
+ Writes a line of text into a file.
1444
1955
 
1445
- Use 'ext' to specify the extension.
1446
- You cannot use 'ext' to specify a format as the format is plain text.
1447
- If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
1956
+ * Supports ``file``` containing directories.
1957
+ * Supports ``file``` being a list.
1958
+ In this case, ``line`` can either be the same value for all file's or a list, too.
1959
+
1960
+ If the current directory is ``None``, then the function throws an EOFError exception
1448
1961
  """
1449
1962
  verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext, exception=ValueError )
1450
1963
  ext = ext if not ext is None else self._ext
@@ -1452,38 +1965,37 @@ class SubDir(object):
1452
1965
 
1453
1966
  if len(line) == 0 or line[-1] != '\n':
1454
1967
  line += '\n'
1455
- def writer( key, fullFileName, obj ):
1968
+ def writer( file, full_file_name, obj ):
1456
1969
  try:
1457
- with open(fullFileName,"wt",encoding="utf-8") as f:
1970
+ with open(full_file_name,"wt",encoding="utf-8") as f:
1458
1971
  f.write(obj)
1459
1972
  except Exception as e:
1460
- if raiseOnError:
1973
+ if raise_on_error:
1461
1974
  raise e
1462
1975
  return False
1463
1976
  return True
1464
- return self._write( writer=writer, key=key, obj=line, raiseOnError=raiseOnError, ext=ext )
1977
+ return self._write( writer=writer, file=file, obj=line, raise_on_error=raise_on_error, ext=ext )
1465
1978
 
1466
1979
  # -- iterate --
1467
1980
 
1468
1981
  def files(self, *, ext : str = None) -> list:
1469
1982
  """
1470
- Returns a list of keys in this subdirectory with the current extension, or the specified extension.
1983
+ Returns a list of files in this subdirectory with the current extension, or the specified extension.
1471
1984
 
1472
1985
  In other words, if the extension is ".pck", and the files are "file1.pck", "file2.pck", "file3.bin"
1473
1986
  then this function will return [ "file1", "file2" ]
1474
1987
 
1475
- If 'ext' is
1476
- -- None, the directory's default extension will be used
1477
- -- "" then this function will return all files in this directory.
1478
- -- a Format, then the default extension of the format will be used.
1479
-
1480
- This function ignores directories. Use subDirs() to retrieve those.
1988
+ If ``ext`` is:
1989
+
1990
+ * ``None``, then the directory's default extension will be used.
1991
+ * ``""`` then this function will return all files in this directory.
1992
+ * ``"*"`` then the extension corresponding to the current format will be used.
1481
1993
 
1482
- [This function has an alias 'keys']
1994
+ This function ignores directories. Use :meth:`cdxcore.subdir.SubDir.sub_dirs` to retrieve those.
1483
1995
  """
1484
- if not self.pathExists():
1996
+ if not self.path_exists():
1485
1997
  return []
1486
- ext = self.autoExt( ext=ext )
1998
+ ext = self.auto_ext( ext )
1487
1999
  ext_l = len(ext)
1488
2000
  keys = []
1489
2001
  with os.scandir(self._path) as it:
@@ -1497,15 +2009,15 @@ class SubDir(object):
1497
2009
  else:
1498
2010
  keys.append( entry.name )
1499
2011
  return keys
1500
- keys = files
1501
2012
 
1502
- def subDirs(self) -> list:
2013
+ def sub_dirs(self) -> list:
1503
2014
  """
1504
- Returns a list of all sub directories
1505
- If self does not refer to an existing directory, then this function returns an empty list.
2015
+ Retrieve a list of all sub directories.
2016
+
2017
+ If ``self`` does not refer to an existing directory, then this function returns an empty list.
1506
2018
  """
1507
2019
  # do not do anything if the object was deleted
1508
- if not self.pathExists():
2020
+ if not self.path_exists():
1509
2021
  return []
1510
2022
  subdirs = []
1511
2023
  with os.scandir(self._path[:-1]) as it:
@@ -1517,322 +2029,345 @@ class SubDir(object):
1517
2029
 
1518
2030
  # -- delete --
1519
2031
 
1520
- def delete( self, key : str, raiseOnError: bool = False, *, ext : str = None ):
2032
+ def delete( self, file : str, raise_on_error: bool = False, *, ext : str = None ):
1521
2033
  """
1522
- Deletes 'key'; 'key' might be a list.
2034
+ Deletes ``file``.
2035
+
2036
+ This function will quietly fail if ``file`` does not exist unless ``raise_on_error``
2037
+ is set to ``True``.
1523
2038
 
1524
2039
  Parameters
1525
2040
  ----------
1526
- key :
2041
+ file :
1527
2042
  filename, or list of filenames
1528
- raiseOnError :
1529
- if False, do not throw KeyError if file does not exist.
1530
- ext :
1531
- Extension, or list thereof if 'key' is an extension.
2043
+
2044
+ raise_on_error : bool
2045
+ If ``False``, do not throw :class:`KeyError` if file does not exist
2046
+ or another error occurs.
2047
+
2048
+ ext : str
2049
+ Extension, or list thereof if ``file`` is a list.
2050
+
1532
2051
  Use
1533
- -- None for the directory default
1534
- -- "" to not use an automatic extension.
1535
- -- A Format to specify the default extension for that format.
2052
+
2053
+ * ``None`` for the directory default.
2054
+ * ``""`` to not use an automatic extension.
2055
+ * ``"*"`` to use the extension associated with the format of the directory.
1536
2056
  """
1537
2057
  # do not do anything if the object was deleted
1538
2058
  if self._path is None:
1539
- if raiseOnError: raise EOFError("Cannot delete '%s': current directory not specified" % key)
2059
+ if raise_on_error: raise EOFError("Cannot delete '%s': current directory not specified" % file)
1540
2060
  return
1541
2061
 
1542
2062
  # vector version
1543
- if not isinstance(key,str):
1544
- if not isinstance(key, Collection): error( "'key' must be a string or an interable object. Found type %s", type(key))
1545
- l = len(key)
2063
+ if not isinstance(file,str):
2064
+ if not isinstance(file, Collection): error( "'file' must be a string or an interable object. Found type %s", type(file))
2065
+ l = len(file)
1546
2066
  if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1547
2067
  ext = [ ext ] * l
1548
2068
  else:
1549
- if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1550
- for k, e in zip(key,ext):
1551
- self.delete(k, raiseOnError=raiseOnError, ext=e)
2069
+ if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
2070
+ for k, e in zip(file,ext):
2071
+ self.delete(k, raise_on_error=raise_on_error, ext=e)
1552
2072
  return
1553
2073
 
1554
- # handle directories in 'key'
1555
- if len(key) == 0: error( "'key' is empty" )
1556
- sub, key_ = os.path.split(key)
1557
- if len(key_) == 0: error("'key' %s indicates a directory, not a file", key)
1558
- if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,raiseOnError=raiseOnError,ext=ext)
2074
+ # handle directories in 'file'
2075
+ if len(file) == 0: error( "'file' is empty" )
2076
+ sub, key_ = os.path.split(file)
2077
+ if len(key_) == 0: error("'file' %s indicates a directory, not a file", file)
2078
+ if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,raise_on_error=raise_on_error,ext=ext)
1559
2079
  # don't try if directory doesn't existy
1560
- if not self.pathExists():
1561
- if raiseOnError:
1562
- raise KeyError(key)
2080
+ if not self.path_exists():
2081
+ if raise_on_error:
2082
+ raise KeyError(file)
1563
2083
  return
1564
- fullFileName = self.fullKeyName(key, ext=ext)
1565
- if not os.path.exists(fullFileName):
1566
- if raiseOnError:
1567
- raise KeyError(key)
2084
+ full_file_name = self.full_file_name(file, ext=ext)
2085
+ if not os.path.exists(full_file_name):
2086
+ if raise_on_error:
2087
+ raise KeyError(file)
1568
2088
  else:
1569
- os.remove(fullFileName)
2089
+ os.remove(full_file_name)
1570
2090
 
1571
- def deleteAllKeys( self, raiseOnError : bool = False, *, ext : str = None ):
2091
+ def delete_all_files( self, raise_on_error : bool = False, *, ext : str = None ):
1572
2092
  """
1573
2093
  Deletes all valid keys in this sub directory with the correct extension.
1574
2094
 
1575
2095
  Parameters
1576
2096
  ----------
1577
- key :
1578
- filename, or list of filenames
1579
- raiseOnError :
1580
- if False, do not throw KeyError if file does not exist.
1581
- ext :
1582
- File extension to match.
1583
- Use
1584
- -- None for the directory default
1585
- -- "" to match all files regardless of extension.
1586
- -- A Format to specify the default extension for that format.
2097
+ raise_on_error : bool
2098
+ Set to ``False`` to quietly ignore errors.
2099
+
2100
+ ext : str
2101
+ Extension to be used:
2102
+
2103
+ * ``None`` for the directory default.
2104
+ * ``""`` to not use an automatic extension.
2105
+ * ``"*"`` to use the extension associated with the format of the directory.
1587
2106
  """
1588
2107
  if self._path is None:
1589
- if raiseOnError: raise EOFError("Cannot delete all files: current directory not specified")
2108
+ if raise_on_error: raise EOFError("Cannot delete all files: current directory not specified")
1590
2109
  return
1591
- if not self.pathExists():
2110
+ if not self.path_exists():
1592
2111
  return
1593
- self.delete( self.keys(ext=ext), raiseOnError=raiseOnError, ext=ext )
2112
+ self.delete( self.files(ext=ext), raise_on_error=raise_on_error, ext=ext )
1594
2113
 
1595
- def deleteAllContent( self, deleteSelf : bool = False, raiseOnError : bool = False, *, ext : str = None ):
2114
+ def delete_all_content( self, delete_self : bool = False, raise_on_error : bool = False, *, ext : str = None ):
1596
2115
  """
1597
2116
  Deletes all valid keys and subdirectories in this sub directory.
2117
+
1598
2118
  Does not delete files with other extensions.
1599
- Use eraseEverything() if the aim is to delete everything.
2119
+ Use :meth:`cdxcore.subdir.SubDir.delete_everything` if the aim is to delete, well, everything.
1600
2120
 
1601
2121
  Parameters
1602
2122
  ----------
1603
- deleteSelf:
1604
- whether to delete the directory or only its contents
1605
- raiseOnError:
1606
- False for silent failure
1607
- ext:
1608
- Extension for keys, or None for the directory's default.
1609
- You can also provide a Format for 'ext'.
1610
- Use "" to match all files regardless of extension.
2123
+ delete_self: bool
2124
+ Whether to delete the directory itself as well, or only its contents.
2125
+ raise_on_error: bool
2126
+ ``False`` for silent failure
2127
+ ext: str
2128
+ Extension for keys, or ``None`` for the directory's default.
2129
+ Use ``""`` to match all files regardless of extension.
1611
2130
  """
1612
2131
  # do not do anything if the object was deleted
1613
2132
  if self._path is None:
1614
- if raiseOnError: raise EOFError("Cannot delete all contents: current directory not specified")
2133
+ if raise_on_error: raise EOFError("Cannot delete all contents: current directory not specified")
1615
2134
  return
1616
- if not self.pathExists():
2135
+ if not self.path_exists():
1617
2136
  return
1618
2137
  # delete sub directories
1619
- subdirs = self.subDirs();
2138
+ subdirs = self.sub_dirs();
1620
2139
  for subdir in subdirs:
1621
- SubDir(subdir, parent=self).deleteAllContent( deleteSelf=True, raiseOnError=raiseOnError, ext=ext )
2140
+ SubDir(subdir, parent=self).delete_all_content( delete_self=True, raise_on_error=raise_on_error, ext=ext )
1622
2141
  # delete keys
1623
- self.deleteAllKeys( raiseOnError=raiseOnError,ext=ext )
2142
+ self.delete_all_files( raise_on_error=raise_on_error,ext=ext )
1624
2143
  # delete myself
1625
- if not deleteSelf:
2144
+ if not delete_self:
1626
2145
  return
1627
2146
  rest = list( os.scandir(self._path[:-1]) )
1628
2147
  txt = str(rest)
1629
2148
  txt = txt if len(txt) < 50 else (txt[:47] + '...')
1630
2149
  if len(rest) > 0:
1631
- if raiseOnError: error( "Cannot delete my own directory %s: directory not empty: found %ld object(s): %s", self._path,len(rest), txt)
2150
+ if raise_on_error: error( "Cannot delete my own directory %s: directory not empty: found %ld object(s): %s", self._path,len(rest), txt)
1632
2151
  return
1633
2152
  os.rmdir(self._path[:-1]) ## does not work ????
1634
2153
  self._path = None
1635
2154
 
1636
- def eraseEverything( self, keepDirectory : bool = True ):
2155
+ def delete_everything( self, keep_directory : bool = True ):
1637
2156
  """
1638
- Deletes the entire sub directory will all contents
1639
- WARNING: deletes ALL files, not just those with the present extension.
1640
- Will keep the subdir itself by default.
1641
- If not, it will invalidate 'self._path'
1642
-
1643
- If self is None, do nothing. That means you can call this function several times.
2157
+ Deletes the entire sub directory will all contents.
2158
+
2159
+ *WARNING:* deletes *all* files and sub-directories, not just those with the present extension.
2160
+ If ``keep_directory`` is ``False``, the directory referred to by this object will also be deleted.
2161
+ In this case, ``self`` will be set to ``None``.
1644
2162
  """
1645
2163
  if self._path is None:
1646
2164
  return
1647
- if not self.pathExists():
2165
+ if not self.path_exists():
1648
2166
  return
1649
2167
  shutil.rmtree(self._path[:-1], ignore_errors=True)
1650
- if not keepDirectory and os.path.exists(self._path[:-1]):
2168
+ if not keep_directory and os.path.exists(self._path[:-1]):
1651
2169
  os.rmdir(self._path[:-1])
1652
2170
  self._path = None
1653
- elif keepDirectory and not os.path.exists(self._path[:-1]):
2171
+ elif keep_directory and not os.path.exists(self._path[:-1]):
1654
2172
  os.makedirs(self._path[:-1])
1655
2173
 
1656
2174
  # -- file ops --
1657
2175
 
1658
- def exists(self, key : str, *, ext : str = None ) -> bool:
2176
+ def exists(self, file : str, *, ext : str = None ) -> bool:
1659
2177
  """
1660
- Checks whether 'key' exists. Works with iterables
2178
+ Checks whether a file exists.
1661
2179
 
1662
2180
  Parameters
1663
2181
  ----------
1664
- key :
1665
- filename, or list of filenames
1666
- ext :
1667
- Extension, or list thereof if 'key' is an extension.
2182
+ file :
2183
+ Filename, or list of filenames.
2184
+
2185
+ ext : str
2186
+ Extension, or list thereof if ``file`` is a list.
2187
+
1668
2188
  Use
1669
- -- None for the directory default
1670
- -- "" for no automatic extension
1671
- -- A Format to specify the default extension for that format.
2189
+
2190
+ * ``None`` for the directory default.
2191
+ * ``""`` to not use an automatic extension.
2192
+ * ``"*"`` to use the extension associated with the format of the directory.
1672
2193
 
1673
2194
  Returns
1674
2195
  -------
1675
- If 'key' is a string, returns True or False, else it will return a list of bools.
2196
+ Status : bool
2197
+ If ``file`` is a string, returns ``True`` or ``False``, else it will return a list of ``bool`` values.
1676
2198
  """
1677
2199
  # vector version
1678
- if not isinstance(key,str):
1679
- verify( isinstance(key, Collection), "'key' must be a string or an interable object. Found type %s", type(key))
1680
- l = len(key)
2200
+ if not isinstance(file,str):
2201
+ verify( isinstance(file, Collection), "'file' must be a string or an interable object. Found type %s", type(file))
2202
+ l = len(file)
1681
2203
  if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1682
2204
  ext = [ ext ] * l
1683
2205
  else:
1684
- if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1685
- return [ self.exists(k,ext=e) for k,e in zip(key,ext) ]
2206
+ if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
2207
+ return [ self.exists(k,ext=e) for k,e in zip(file,ext) ]
1686
2208
  # empty directory
1687
2209
  if self._path is None:
1688
2210
  return False
1689
- # handle directories in 'key'
1690
- if len(key) == 0: raise ValueError("'key' missing (the filename)")
1691
- sub, key_ = os.path.split(key)
1692
- if len(key_) == 0: raise IsADirectoryError( key, txtfmt("'key' %s indicates a directory, not a file", key) )
2211
+ # handle directories in 'file'
2212
+ if len(file) == 0: raise ValueError("'file' missing (the filename)")
2213
+ sub, key_ = os.path.split(file)
2214
+ if len(key_) == 0: raise IsADirectoryError( file, txtfmt("'file' %s indicates a directory, not a file", file) )
1693
2215
  if len(sub) > 0:
1694
- return self(sub).exists(key=key_,ext=ext)
2216
+ return self(sub).exists(file=key_,ext=ext)
1695
2217
  # if directory doesn't exit
1696
- if not self.pathExists():
2218
+ if not self.path_exists():
1697
2219
  return False
1698
- # single key
1699
- fullFileName = self.fullKeyName(key, ext=ext)
1700
- if not os.path.exists(fullFileName):
2220
+ # single file
2221
+ full_file_name = self.full_file_name(file, ext=ext)
2222
+ if not os.path.exists(full_file_name):
1701
2223
  return False
1702
- if not os.path.isfile(fullFileName):
1703
- raise IsADirectoryError("Structural error: key %s: exists, but is not a file (full path %s)",key,fullFileName)
2224
+ if not os.path.isfile(full_file_name):
2225
+ raise IsADirectoryError("Structural error: file %s: exists, but is not a file (full path %s)",file,full_file_name)
1704
2226
  return True
1705
2227
 
1706
- def _getFileProperty( self, *, key : str, ext : str, func ):
2228
+ def _getFileProperty( self, *, file : str, ext : str, func ):
1707
2229
  # vector version
1708
- if not isinstance(key,str):
1709
- verify( isinstance(key, Collection), "'key' must be a string or an interable object. Found type %s", type(key))
1710
- l = len(key)
2230
+ if not isinstance(file,str):
2231
+ verify( isinstance(file, Collection), "'file' must be a string or an interable object. Found type %s", type(file))
2232
+ l = len(file)
1711
2233
  if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1712
2234
  ext = [ ext ] * l
1713
2235
  else:
1714
- if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1715
- return [ self._getFileProperty(key=k,ext=e,func=func) for k,e in zip(key,ext) ]
2236
+ if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
2237
+ return [ self._getFileProperty(file=k,ext=e,func=func) for k,e in zip(file,ext) ]
1716
2238
  # empty directory
1717
2239
  if self._path is None:
1718
2240
  return None
1719
- # handle directories in 'key'
1720
- if len(key) == 0: raise ValueError("'key' missing (the filename)")
1721
- sub, key_ = os.path.split(key)
1722
- if len(key_) == 0: raise IsADirectoryError( key, txtfmt("'key' %s indicates a directory, not a file", key) )
1723
- if len(sub) > 0: return self(sub)._getFileProperty(key=key_,ext=ext,func=func)
2241
+ # handle directories in 'file'
2242
+ if len(file) == 0: raise ValueError("'file' missing (the filename)")
2243
+ sub, key_ = os.path.split(file)
2244
+ if len(key_) == 0: raise IsADirectoryError( file, txtfmt("'file' %s indicates a directory, not a file", file) )
2245
+ if len(sub) > 0: return self(sub)._getFileProperty(file=key_,ext=ext,func=func)
1724
2246
  # if directory doesn't exit
1725
- if not self.pathExists():
2247
+ if not self.path_exists():
1726
2248
  return None
1727
- # single key
1728
- fullFileName = self.fullKeyName(key, ext=ext)
1729
- if not os.path.exists(fullFileName):
2249
+ # single file
2250
+ full_file_name = self.full_file_name(file, ext=ext)
2251
+ if not os.path.exists(full_file_name):
1730
2252
  return None
1731
- return func(fullFileName)
2253
+ return func(full_file_name)
1732
2254
 
1733
- def getCreationTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
2255
+ def get_creation_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
1734
2256
  """
1735
- Returns the creation time of 'key', or None if file was not found.
1736
- See comments on os.path.getctime() for compatibility
2257
+ Returns the creation time of a file.
2258
+
2259
+ See comments on :func:`os.path.getctime` for system compatibility information.
1737
2260
 
1738
2261
  Parameters
1739
2262
  ----------
1740
- key :
1741
- filename, or list of filenames
2263
+ file :
2264
+ filename, or list of filenames.
1742
2265
  ext :
1743
- Extension, or list thereof if 'key' is an extension.
1744
- Use
1745
- -- None for the directory default
1746
- -- "" for no automatic extension
1747
- -- A Format to specify the default extension for that format.
2266
+ Extension, or list thereof if ``file`` is an extension.
2267
+ Use:
2268
+
2269
+ * ``None`` for the directory default.
2270
+ * ``""`` for no automatic extension.
2271
+ * A :class:`cdxcore.subdir.Format` to use the default extension for that format.
1748
2272
 
1749
2273
  Returns
1750
2274
  -------
1751
- datetime.datetime if 'key' is a string, otherwise a list of datetime's
2275
+ Datetime : :class:`datetime.datetime`
2276
+ A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
2277
+ Returns ``None`` if an error occured.
1752
2278
  """
1753
- return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getctime(x)) )
2279
+ return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getctime(x)) )
1754
2280
 
1755
- def getLastModificationTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
2281
+ def get_last_modification_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
1756
2282
  """
1757
- Returns the last modification time of 'key', or None if file was not found.
1758
- See comments on os.path.getmtime() for compatibility
2283
+ Returns the last modification time a file.
2284
+
2285
+ See comments on :func:`os.path.getmtime` for system compatibility information.
1759
2286
 
1760
2287
  Parameters
1761
2288
  ----------
1762
- key :
1763
- filename, or list of filenames
2289
+ file :
2290
+ filename, or list of filenames.
1764
2291
  ext :
1765
- Extension, or list thereof if 'key' is an extension.
1766
- Use
1767
- -- None for the directory default
1768
- -- "" for no automatic extension
1769
- -- A Format to specify the default extension for that format.
2292
+ Extension, or list thereof if ``file`` is an extension.
2293
+ Use:
2294
+
2295
+ * ``None`` for the directory default.
2296
+ * ``""`` for no automatic extension.
2297
+ * A :class:`cdxcore.subdir.Format` to use the default extension for that format.
1770
2298
 
1771
2299
  Returns
1772
2300
  -------
1773
- datetime.datetime if 'key' is a string, otherwise a list of datetime's
2301
+ Datetime : :class:`datetime.datetime`
2302
+ A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
2303
+ Returns ``None`` if an error occured.
1774
2304
  """
1775
- return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getmtime(x)) )
2305
+ return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getmtime(x)) )
1776
2306
 
1777
- def getLastAccessTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
2307
+ def get_last_access_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
1778
2308
  """
1779
- Returns the last access time of 'key', or None if file was not found.
1780
- See comments on os.path.getatime() for compatibility
2309
+ Returns the last access time of a file.
2310
+
2311
+ See comments on :func:`os.path.getatime` for system compatibility information.
1781
2312
 
1782
2313
  Parameters
1783
2314
  ----------
1784
- key :
1785
- filename, or list of filenames
1786
- ext :
1787
- Extension, or list thereof if 'key' is an extension.
1788
- Use
1789
- -- None for the directory default
1790
- -- "" for no automatic extension
1791
- -- A Format to specify the default extension for that format.
2315
+ file : str
2316
+ Filename, or list of filenames.
2317
+
2318
+ ext : str
2319
+ Extension, or list thereof if ``file`` is an extension.
2320
+
2321
+ * Use ``None`` for the directory default.
2322
+ * Use ``""`` for no automatic extension.
1792
2323
 
1793
2324
  Returns
1794
2325
  -------
1795
- datetime.datetime if 'key' is a string, otherwise a list of datetime's
2326
+ Datetime : :class:`datetime.datetime`
2327
+ A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
2328
+ Returns ``None`` if an error occured.
1796
2329
  """
1797
- return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getatime(x)) )
2330
+ return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getatime(x)) )
1798
2331
 
1799
- def getFileSize( self, key : str, *, ext : str = None ) -> int:
2332
+ def file_size( self, file : str, *, ext : str = None ) -> int:
1800
2333
  """
1801
- Returns the file size of 'key', or None if file was not found.
1802
- See comments on os.path.getatime() for compatibility
2334
+ Returns the file size of a file.
2335
+
2336
+ See comments on :func:`os.path.getatime` for system compatibility information.
1803
2337
 
1804
2338
  Parameters
1805
2339
  ----------
1806
- key :
1807
- filename, or list of filenames
1808
- ext :
1809
- Extension, or list thereof if 'key' is an extension.
1810
- Use
1811
- -- None for the directory default
1812
- -- "" for no automatic extension
1813
- -- A Format to specify the default extension for that format.
2340
+ file : str
2341
+ Filename, or list of filenames.
2342
+
2343
+ ext : str
2344
+ Extension, or list thereof if ``file`` is an extension.
2345
+
2346
+ * Use ``None`` for the directory default.
2347
+ * Use ``""`` for no automatic extension.
1814
2348
 
1815
2349
  Returns
1816
2350
  -------
1817
- File size if 'key' is a string, otherwise a list thereof.
2351
+ File size if ``file``, or ``None`` if an error occured.
1818
2352
  """
1819
- return self._getFileProperty( key=key, ext=ext, func=lambda x : os.path.getsize(x) )
2353
+ return self._getFileProperty( file=file, ext=ext, func=lambda x : os.path.getsize(x) )
1820
2354
 
1821
2355
  def rename( self, source : str, target : str, *, ext : str = None ):
1822
2356
  """
1823
- Rename "source" key into "target" key.
1824
- Function will raise an exception if not successful
2357
+ Rename a file.
2358
+
2359
+ This function will raise an exception if not successful.
1825
2360
 
1826
2361
  Parameters
1827
2362
  ----------
1828
- source, target:
1829
- filenames
1830
- ext :
1831
- Extension, or list thereof if 'key' is an extension.
1832
- Use
1833
- -- None for the directory default
1834
- -- "" for no automatic extensions.
1835
- -- A Format to specify the default extension for that format.
2363
+ source, target : str
2364
+ Filenames.
2365
+
2366
+ ext : str
2367
+ Extension.
2368
+
2369
+ * Use ``None`` for the directory default.
2370
+ * Use ``""`` for no automatic extension.
1836
2371
  """
1837
2372
  # empty directory
1838
2373
  if self._path is None:
@@ -1843,9 +2378,9 @@ class SubDir(object):
1843
2378
  sub, source_ = os.path.split(source)
1844
2379
  if len(source_) == 0: raise IsADirectoryError( source, txtfmt("'source' %s indicates a directory, not a file", source ))
1845
2380
  if len(sub) > 0:
1846
- src_full = self(sub).fullKeyName(key=source_,ext=ext)
2381
+ src_full = self(sub).full_file_name(file=source_,ext=ext)
1847
2382
  else:
1848
- src_full = self.fullKeyName( source, ext=ext )
2383
+ src_full = self.full_file_name( source, ext=ext )
1849
2384
 
1850
2385
  # handle directories in 'target'
1851
2386
  if len(target) == 0: raise ValueError("'target' missing (the filename)" )
@@ -1853,191 +2388,257 @@ class SubDir(object):
1853
2388
  if len(target_) == 0: raise IsADirectoryError( target, txtfmt("'target' %s indicates a directory, not a file", target))
1854
2389
  if len(sub) > 0:
1855
2390
  tar_dir = self(sub)
1856
- tar_dir.createDirectory()
1857
- tar_full = tar_dir.fullKeyName(key=target_,ext=ext)
2391
+ tar_dir.create_directory()
2392
+ tar_full = tar_dir.full_file_name(file=target_,ext=ext)
1858
2393
  else:
1859
- tar_full = self.fullKeyName( target, ext=ext )
1860
- self.createDirectory()
2394
+ tar_full = self.full_file_name( target, ext=ext )
2395
+ self.create_directory()
1861
2396
 
1862
2397
  os.rename(src_full, tar_full)
1863
2398
 
1864
2399
  # utilities
1865
2400
 
1866
2401
  @staticmethod
1867
- def removeBadKeyCharacters( key:str, by:str=' ' ) -> str:
2402
+ def remove_bad_file_characters( file : str, by : str="default" ) -> str:
1868
2403
  """
1869
- Replaces invalid characters in a filename by 'by'.
1870
- See util.fmt_filename() for documentation and further options.
2404
+ Replaces invalid characters in a filename using the map ``by``.
2405
+
2406
+ See :func:`cdxcore.util.fmt_filename` for documentation and further options.
1871
2407
  """
1872
- return fmt_filename( key, by=by )
2408
+ return fmt_filename( file, by=by )
1873
2409
 
1874
- def unqiueLabelToKey( self, unique_label:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
1875
- """
1876
- Converts a unique label which might contain invalid characters into a unique file name, such that the full file name does not exceed 'max_length' bytes.
1877
- The returned key has the format
1878
- name + separator + ID
1879
- where ID has length id_length.
1880
- If unique_label is already guaranteed to be a valid filename, use unqiueLongFileNameToKey() instead.
1881
- """
1882
- len_ext = len(self.ext)
1883
- assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
1884
- uqf = uniqueLabelExt( max_length=max_length-len_ext, id_length=id_length, separator=separator, filename_by="default" )
1885
- return uqf( unique_label )
1886
-
1887
- def unqiueLongFileNameToKey( self, unique_filename:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
1888
- """
1889
- Converts a unique filename which might be too long to a unique filename such that the total length plus 'ext' does not exceed 'max_length' bytes.
1890
- If the filename is already short enough, no change is made.
1891
-
1892
- If 'unique_filename' is not guaranteed to be a valid filename, use unqiueLabelToKey() instead.
1893
- """
1894
- len_ext = len(self.ext)
1895
- assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
1896
- uqf = uniqueLabelExt( max_length=max_length-len_ext, id_length=id_length, separator=separator )
1897
- return uqf( unique_filename )
2410
+ if False:
2411
+ def unqiueLabelToKey( self, unique_label:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
2412
+ """
2413
+ Converts a unique label which might contain invalid characters into a unique file name, such that the full file name does not exceed 'max_length' bytes.
2414
+ The returned file has the format
2415
+ name + separator + ID
2416
+ where ID has length id_length.
2417
+ If unique_label is already guaranteed to be a valid filename, use unqiueLongFileNameToKey() instead.
2418
+ """
2419
+ len_ext = len(self.ext)
2420
+ assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
2421
+ uqf = UniqueLabel( max_length=max_length-len_ext, id_length=id_length, separator=separator, filename_by="default" )
2422
+ return uqf( unique_label )
2423
+
2424
+ def unqiueLongFileNameToKey( self, unique_filename:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
2425
+ """
2426
+ Converts a unique filename which might be too long to a unique filename such that the total length plus 'ext' does not exceed 'max_length' bytes.
2427
+ If the filename is already short enough, no change is made.
2428
+
2429
+ If 'unique_filename' is not guaranteed to be a valid filename, use unqiueLabelToKey() instead.
2430
+ """
2431
+ len_ext = len(self.ext)
2432
+ assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
2433
+ uqf = UniqueLabel( max_length=max_length-len_ext, id_length=id_length, separator=separator )
2434
+ return uqf( unique_filename )
1898
2435
 
1899
2436
  # -- dict-like interface --
1900
2437
 
1901
- def __call__(self, keyOrSub : str,
2438
+ def __call__(self, element : str,
1902
2439
  default = RETURN_SUB_DIRECTORY,
1903
- raiseOnError : bool = False,
2440
+ raise_on_error : bool = False,
1904
2441
  *,
1905
2442
  version : str = None,
1906
2443
  ext : str = None,
1907
2444
  fmt : Format = None,
1908
2445
  delete_wrong_version : bool = True,
1909
- createDirectory : bool = None ):
2446
+ create_directory : bool = None ):
1910
2447
  """
1911
- Return either the value of a sub-key (file), or return a new sub directory.
1912
- If only one argument is used, then this function returns a new sub directory.
1913
- If two arguments are used, then this function returns read( keyOrSub, default ).
2448
+ Read either data from a file, or return a new sub directory.
2449
+
2450
+ If only the ``element`` argument is used, then this function returns a new sub directory
2451
+ named ``element``.
2452
+
2453
+ If both ``element`` and ``default`` arguments are used, then this function attempts to read the file ``element``
2454
+ from disk, returning ``default`` if it does not exist.
1914
2455
 
1915
- sd = SubDir("!/test")
2456
+ Assume we have a subdirectory ``sd``::
2457
+
2458
+ from cdxcore.subdir import SubDir
2459
+ sd = SubDir("!/test")
1916
2460
 
1917
- Member access:
1918
- x = sd('x', None) reads 'x' with default value None
1919
- x = sd('sd/x', default=1) reads 'x' from sub directory 'sd' with default value 1
1920
- x = sd('x', default=1, ext="tmp") reads 'x.tmp' from sub directory 'sd' with default value 1
2461
+ Reading files::
2462
+
2463
+ x = sd('file', None) # reads 'file' with default value None
2464
+ x = sd('sd/file', default=1) # reads 'file' from sub directory 'sd' with default value 1
2465
+ x = sd('file', default=1, ext="tmp") # reads 'file.tmp' with default value 1
1921
2466
 
1922
- Create sub directory:
1923
- sd2 = sd("subdir") creates and returns handle to subdirectory 'subdir'
1924
- sd2 = sd("subdir1/subdir2") creates and returns handle to subdirectory 'subdir1/subdir2'
1925
- sd2 = sd("subdir1/subdir2", ext=".tmp") creates and returns handle to subdirectory 'subdir1/subdir2' with extension "tmp"
1926
- sd2 = sd(ext=".tmp") returns handle to current subdirectory with extension "tmp"
2467
+ Create sub directory::
2468
+
2469
+ sd2 = sd("subdir") # creates and returns handle to subdirectory 'subdir'
2470
+ sd2 = sd("subdir1/subdir2") # creates and returns handle to subdirectory 'subdir1/subdir2'
2471
+ sd2 = sd("subdir1/subdir2", ext=".tmp") # creates and returns handle to subdirectory 'subdir1/subdir2' with extension "tmp"
2472
+ sd2 = sd(ext=".tmp") # returns handle to current subdirectory with extension "tmp"
1927
2473
 
1928
2474
  Parameters
1929
2475
  ----------
1930
- keyOrSub : str
1931
- identify the object requested. Should be a string or a list of strings.
1932
- default:
1933
- If specified, this function reads 'keyOrSub' with read( keyOrSub, default, *args, **kwargs )
1934
- If not specified, then this function calls SubDir(keyOrSub,parent=self,ext=ext,fmt=fmt)
2476
+ element : str
2477
+ File or directory name, or a list thereof.
2478
+
2479
+ default : optional
2480
+ If specified, this function reads ``element`` with
2481
+ ``read( element, default, *args, **kwargs )``.
1935
2482
 
1936
- The following keywords are only relevant when reading files.
1937
- They echo the parameters of read()
2483
+ If ``default`` is not specified, then this function returns a new sub-directory by calling
2484
+ ``SubDir(element,parent=self,ext=ext,fmt=fmt)``.
1938
2485
 
1939
- raiseOnError : bool
2486
+ create_directory : bool, optional
2487
+ *When creating sub-directories:*
2488
+
2489
+ Whether or not to instantly create the sub-directory. The default, ``None``, is to inherit the behaviour from ``self``.
2490
+
2491
+ raise_on_error : bool, optional
2492
+ *When reading files:*
2493
+
1940
2494
  Whether to raise an exception if reading an existing file failed.
1941
- By default this function fails silently and returns the default.
1942
- version : str
1943
- If not None, specifies the version of the current code base.
1944
- Use '*' to read any version (this is distrinct from reading a file without version).
1945
- If version is not' '*', then this version will be compared to the version of the file being read.
1946
- If they do not match, read fails (either by returning default or throwing an exception).
1947
- delete_wrong_version : bool
1948
- If True, and if a wrong version was found, delete the file.
1949
- ext : str
1950
- Extension overwrite, or a list thereof if key is a list
1951
- Set to:
1952
- -- None to use directory's default
1953
- -- '*' to use the extension implied by 'fmt'
1954
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1955
- fmt : Format
1956
- File format or None to use the directory's default.
1957
- Note that 'fmt' cannot be a list even if 'key' is.
1958
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
2495
+ By default this function fails silently and returns ``default``.
1959
2496
 
1960
- The following keywords are only relevant when accessing directories
1961
- They echo the parameters of __init__
1962
-
1963
- createDirectory : bool
1964
- Whether or not to create the directory. The default, None, is to inherit the behaviour from self.
1965
- ext : str
1966
- Set to None to inherit the parent's extension.
1967
- fmt : Format
1968
- Set to None to inherit the parent's format.
2497
+ Default is ``False``.
2498
+
2499
+ version : str, optional
2500
+ *When reading files:*
2501
+
2502
+ If not ``None``, specifies the version of the current code base.
2503
+
2504
+ In this case, this version will be compared to the version of the file being read.
2505
+ If they do not match, read fails (either by returning default or throwing a :class:`cdxcore.version.VersionError` exception).
2506
+
2507
+ You can specify version ``"*"`` to accept any version.
2508
+ Note that this is distinct
2509
+ to using ``None`` which stipulates that the file should not
2510
+ have version information.
2511
+
2512
+ Default is ``None``.
2513
+
2514
+ delete_wrong_version : bool, optional
2515
+ *When reading files:*
2516
+
2517
+ If ``True``, and if a wrong version was found, delete the file.
2518
+
2519
+ Default is ``True``.
2520
+
2521
+ ext : str, optional
2522
+ *When reading files:*
2523
+
2524
+ Extension to be used, or a list thereof if ``element`` is a list. Defaults
2525
+ to the extension of ``self``.
2526
+
2527
+ Semantics:
2528
+
2529
+ * ``None`` to use the default extension of ``self``.
2530
+ * ``"*"`` to use the extension implied by ``fmt``.
2531
+ * ``""`` to turn off extension management.
2532
+
2533
+ *When creating sub-directories:*
2534
+
2535
+ Extension for the new subdirectory; set to ``None`` to inherit the parent's extension.
2536
+
2537
+ Default is ``None``.
2538
+
2539
+
2540
+ fmt : :class:`cdxcore.subdir.Format`, optional
2541
+ *When reading files:*
2542
+
2543
+ File format or ``None`` to use the directory's default.
2544
+ Note that ``fmt`` cannot be a list even if ``element`` is.
2545
+ Unless
2546
+ ``ext`` or the SubDir's extension is ``"*"``, changing the
2547
+ format does not automatically change the extension.
2548
+
2549
+ *When creating sub-directories:*
1969
2550
 
2551
+ Format for the new sub-directory; set to ``None`` to inherit the parent's format.
2552
+
2553
+ Default is ``None``.
2554
+
1970
2555
  Returns
1971
2556
  -------
2557
+ Object : type|SubDir
1972
2558
  Either the value in the file, a new sub directory, or lists thereof.
1973
- Returns None if an element was not found.
1974
2559
  """
1975
2560
  if default == SubDir.RETURN_SUB_DIRECTORY:
1976
- if not isinstance(keyOrSub, str):
1977
- if not isinstance(keyOrSub, Collection):
1978
- raise ValueError(txtfmt("'keyOrSub' must be a string or an iterable object. Found type '%s;", type(keyOrSub)))
1979
- return [ SubDir( k,parent=self,ext=ext,fmt=fmt,createDirectory=createDirectory) for k in keyOrSub ]
1980
- return SubDir(keyOrSub,parent=self,ext=ext,fmt=fmt,createDirectory=createDirectory)
1981
- return self.read( key=keyOrSub,
2561
+ if not isinstance(element, str):
2562
+ if not isinstance(element, Collection):
2563
+ raise ValueError(txtfmt("'element' must be a string or an iterable object. Found type '%s;", type(element)))
2564
+ return [ SubDir( k,parent=self,ext=ext,fmt=fmt,create_directory=create_directory) for k in element ]
2565
+ return SubDir(element,parent=self,ext=ext,fmt=fmt,create_directory=create_directory)
2566
+ return self.read( file=element,
1982
2567
  default=default,
1983
- raiseOnError=raiseOnError,
2568
+ raise_on_error=raise_on_error,
1984
2569
  version=version,
1985
2570
  delete_wrong_version=delete_wrong_version,
1986
2571
  ext=ext,
1987
2572
  fmt=fmt )
1988
2573
 
1989
- def __getitem__( self, key ):
2574
+ def __getitem__( self, file ):
1990
2575
  """
1991
- Reads self[key]
1992
- If 'key' does not exist, throw a KeyError
2576
+ Reads ``file`` using :meth:`cdxcore.subdir.SubDir.read`.
2577
+ If '`file'` does not exist, throw a :class:`KeyError`.
1993
2578
  """
1994
- return self.read( key=key, default=None, raiseOnError=True )
2579
+ return self.read( file=file, default=None, raise_on_error=True )
1995
2580
 
1996
- def __setitem__( self, key, value):
1997
- """ Writes 'value' to 'key' """
1998
- self.write(key,value)
2581
+ def __setitem__( self, file, value):
2582
+ """ Writes ``value`` to ``file`` using :meth:`cdxcore.subdir.SubDir.write`. """
2583
+ self.write(file,value)
1999
2584
 
2000
- def __delitem__(self,key):
2001
- """ Silently delete self[key] """
2002
- self.delete(key, False )
2585
+ def __delitem__(self,file):
2586
+ """ Silently delete ``file`` using :meth:`cdxcore.subdir.SubDir.delete`. """
2587
+ self.delete(file, False )
2003
2588
 
2004
2589
  def __len__(self) -> int:
2005
- """ Return the number of files (keys) in this directory """
2006
- return len(self.keys())
2590
+ """ Return the number of files in this directory with matching extension. """
2591
+ return len(self.files())
2007
2592
 
2008
2593
  def __iter__(self):
2009
- """ Returns an iterator which allows traversing through all keys (files) below this directory """
2010
- return self.keys().__iter__()
2011
-
2012
- def __contains__(self, key):
2013
- """ Implements 'in' operator """
2014
- return self.exists(key)
2015
-
2016
- # -- object like interface --
2594
+ """ Returns an iterator which allows traversing through all files below in this directory with matching extension. """
2595
+ return self.files().__iter__()
2017
2596
 
2018
- def __getattr__(self, key):
2019
- """
2020
- Allow using member notation to get data
2021
- This function throws an AttributeError if 'key' is not found.
2597
+ def __contains__(self, file):
2598
+ """ Tests whether ``file`` :meth:`cdxcore.subdir.SubDir.exists`. """
2599
+ return self.exists(file)
2600
+
2601
+ def items(self, *, ext : str = None, raise_on_error : bool = False) -> Iterable:
2022
2602
  """
2023
- if not self.exists(key):
2024
- raise AttributeError(key)
2025
- return self.read( key=key, raiseOnError=True )
2603
+ Dictionary-style iterable of filenames and their content.
2604
+
2605
+ Usage::
2606
+
2607
+ subdir = SubDir("!")
2608
+ for file, data in subdir.items():
2609
+ print( file, str(data)[:100] )
2026
2610
 
2027
- def __setattr__(self, key, value):
2611
+ Parameters
2612
+ ----------
2613
+ ext : str
2614
+ Extension or ``None`` for the directory's current extension. Use ``""``
2615
+ for all file extension.
2616
+
2617
+ Returns
2618
+ -------
2619
+ Iterable
2620
+ An iterable generator
2621
+ """
2622
+ class ItemIterable(Iterable):
2623
+ def __init__(_):
2624
+ _._files = self.files(ext=ext)
2625
+ _._subdir = self
2626
+ def __len__(_):
2627
+ return len(_._files)
2628
+ def __iter__(_):
2629
+ for file in _._files:
2630
+ data = _._subdir.read(file, ext=ext, raise_on_error=raise_on_error)
2631
+ yield file, data
2632
+ return ItemIterable()
2633
+
2634
+ # convenient path ops
2635
+ # -------------------
2636
+
2637
+ def __add__(self, directory : str) -> str:
2028
2638
  """
2029
- Allow using member notation to write data
2030
- Note: keys starting with '_' are /not/ written to disk
2639
+ Returns a the subdirectory ``directory`` of ``self``.
2031
2640
  """
2032
- if key[0] == '_':
2033
- self.__dict__[key] = value
2034
- else:
2035
- self.write(key,value)
2036
-
2037
- def __delattr__(self, key):
2038
- """ Silently delete a key with member notation. """
2039
- verify( key[:1] != "_", "Deleting protected or private members disabled. Fix __delattr__ to support this")
2040
- return self.delete( key=key, raiseOnError=False )
2641
+ return SubDir(directory,parent=self)
2041
2642
 
2042
2643
  # pickling
2043
2644
  # --------
@@ -2052,7 +2653,22 @@ class SubDir(object):
2052
2653
  self._ext = state['ext']
2053
2654
  self._fmt = state['fmt']
2054
2655
  self._crt = state['crt']
2656
+
2657
+ @staticmethod
2658
+ def as_format( format_name : str ) -> int:
2659
+ """
2660
+ Converts a named format into the respective format code.
2661
+
2662
+ Example::
2055
2663
 
2664
+ format = SubDir.as_format( config("format", "pickle", SubDir.FORMAT_NAMES, "File format") )
2665
+ """
2666
+ format_name = format_name.upper()
2667
+ if not format_name in SubDir.FORMAT_NAMES:
2668
+ raise LookupError(f"Unknown format name '{format_name}'. Must be one of: {fmt_list(SubDir.FORMAT_NAMES)}")
2669
+ return Format[format_name]
2670
+
2671
+
2056
2672
  # caching
2057
2673
  # -------
2058
2674
 
@@ -2066,129 +2682,249 @@ class SubDir(object):
2066
2682
  exclude_arg_types : list[type] = None,
2067
2683
  version_auto_class : bool = True):
2068
2684
  """
2069
- Wraps a callable or a class into a cachable function.
2070
- Caching is based on the following two simple principles:
2071
-
2072
- 1) Unique Call ID:
2073
- When a function is called with some parameters, the wrapper identifies a unique ID based
2074
- on the qualified name of the function and on its runtime functional parameters (ie those
2075
- which alter the outcome of the function).
2076
- When a function is called the first time with a given unique call ID, it will store
2077
- the result of the call to disk. If the function is called with the same call ID again,
2078
- the result is read from disk and returned.
2079
-
2080
- To compute unique call IDs' cdxbasics.util.namedUniqueHashExt() is used.
2081
- Please read implementation comments there:
2082
- Key default features:
2083
- * It hashes objects via their __dict__ or __slot__ members.
2084
- This can be overwritten for a class by implementing __unique_hash__; see cdxbasics.util.namedUniqueHashExt().
2085
- * Function members of objects or any members starting with '_' are not considered
2086
- unless this behaviour is changed using CacheController().
2087
- * Numpy and panda frames are hashed using their byte representation.
2088
- That is slow and not recommended. It is better to identify numpy/panda inputs
2089
- via their generating characteristic ID.
2090
-
2091
- 2) Version:
2092
- Each function has a version, which includes dependencies on other functions or classes.
2093
- If the version of a result on disk does not match the current version, it is deleted
2094
- and the function is called again. This way you can use your code to drive updates
2095
- to data generated with cached functions.
2096
- Behind the scenes this is implemented using cdxbasics.version.version() which means
2097
- that the version of a cached function can also depend on versions of non-cached functions
2098
- or other objects.
2099
-
2100
- Functions
2101
- ---------
2102
- Example of caching functions:
2103
-
2104
- Cache a simple function 'f':
2105
-
2106
- from cdxbasics.subdir import SubDir
2107
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2108
-
2109
- @cache.cache("0.1")
2110
- def f(x,y):
2111
- return x*y
2112
-
2113
- _ = f(1,2) # function gets computed and the result cached
2114
- _ = f(1,2) # restore result from cache
2115
- _ = f(2,2) # different parameters: compute and store result
2685
+ Advanced versioned caching for callables.
2686
+
2687
+ Versioned caching is based on the following two simple principles:
2688
+
2689
+ 1) **Unique Call IDs:**
2690
+
2691
+ When a function is called with some parameters, the wrapper identifies a unique ID based
2692
+ on the qualified name of the function and on its runtime functional parameters (ie those
2693
+ which alter the outcome of the function).
2694
+ When a function is called the first time with a given unique call ID, it will store
2695
+ the result of the call to disk. If the function is called with the same call ID again,
2696
+ the result is read from disk and returned.
2697
+
2698
+ To compute unique call IDs :class:`cdxcore.uniquehash.NamedUniqueHash` is used
2699
+ by default.
2700
+
2701
+ 2) **Code Version:**
2702
+
2703
+ Each function has a version, which includes dependencies on other functions or classes.
2704
+ If the version of data on disk does not match the current version, it is deleted
2705
+ and the generating function is called again. This way you can use your code to drive updates
2706
+ to data generated with cached functions.
2707
+
2708
+ Behind the scenes this is implemented using :dec:`cdxcore.version.version` which means
2709
+ that the version of a cached function can also depend on versions of non-cached functions
2710
+ or other objects.
2711
+
2712
+ Caching Functions
2713
+ ^^^^^^^^^^^^^^^^^
2714
+
2715
+ Caching a simple function ``f`` is staight forward:
2716
+
2717
+ .. code-block:: python
2718
+
2719
+ from cdxcore.subdir import SubDir
2720
+ cache = SubDir("!/.cache")
2721
+ cache.delete_all_content() # for illustration
2722
+
2723
+ @cache.cache("0.1")
2724
+ def f(x,y):
2725
+ return x*y
2726
+
2727
+ _ = f(1,2) # function gets computed and the result cached
2728
+ _ = f(1,2) # restore result from cache
2729
+ _ = f(2,2) # different parameters: compute and store result
2730
+
2731
+ Cache another function ``g`` which calls ``f``, and whose version therefore on ``f``'s version:
2732
+
2733
+ .. code-block:: python
2734
+
2735
+ @cache.cache("0.1", dependencies=[f])
2736
+ def g(x,y):
2737
+ return g(x,y)**2
2738
+
2739
+ **Debugging**
2740
+
2741
+ When using automated caching it
2742
+ is important to understand how changes in parameters and the version of the a function
2743
+ affect caching. To this end, :dec:`cdxcore.subdir.SubDir.cache` supports
2744
+ a tracing mechanism via the use of a :class:`cdxcore.subdir.CacheController`:
2745
+
2746
+ .. code-block:: python
2747
+
2748
+ from cdxcore.subdir import SubDir, CacheController, Context
2749
+
2750
+ ctrl = CacheController( debug_verbose=Context("all") )
2751
+ cache = SubDir("!/.cache", cache_controller=ctrl )
2752
+ cache.delete_all_content() # <- delete previous cached files, for this example only
2753
+
2754
+ @cache.cache("0.1")
2755
+ def f(x,y):
2756
+ return x*y
2757
+
2758
+ _ = f(1,2) # function gets computed and the result cached
2759
+ _ = f(1,2) # restore result from cache
2760
+ _ = f(2,2) # different parameters: compute and store result
2761
+
2762
+ Returns:
2116
2763
 
2117
- Another function g which calls f, and whose version therefore on f's version:
2764
+ .. code-block:: python
2765
+
2766
+ 00: cache(f@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2767
+ 00: cache(f@__main__): called 'f@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ 668a6b111549e288.pck'.
2768
+ 00: cache(f@__main__): read 'f@__main__' version 'version 0.1' from cache 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ 668a6b111549e288.pck'.
2769
+ 00: cache(f@__main__): called 'f@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ b5609542d7da0b04.pck'.
2118
2770
 
2119
- from cdxbasics.subdir import SubDir
2120
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2771
+ **Non-Functional Parameters**
2772
+
2773
+ A function may have non-functional parameters which do not alter the function's outcome.
2774
+ An example are ``debug`` flags:
2775
+
2776
+ .. code-block:: python
2777
+
2778
+ from cdxcore.subdir import SubDir
2779
+ cache = SubDir("!/.cache")
2780
+
2781
+ @cache.cache("0.1", dependencies=[f], exclude_args='debug')
2782
+ def g(x,y,debug): # <--' 'debug' is a non-functional parameter
2783
+ if debug:
2784
+ print(f"h(x={x},y={y})")
2785
+ return g(x,y)**2
2786
+
2787
+ You can define certain types as non-functional for *all* functions wrapped
2788
+ by :meth:`cdxcore.subdir.SubDir.cache` when construcing
2789
+ the :class:`cdccore.cache.CacheController` parameter for in :class:`cdxcore.subdir.SubDir`:
2790
+
2791
+ .. code-block:: python
2121
2792
 
2122
- @cache.cache("0.1", dependencies=[f])
2123
- def g(x,y):
2124
- return g(x,y)**2
2793
+ from cdxcore.subdir import SubDir
2794
+
2795
+ class Debugger:
2796
+ def output( cond, message ):
2797
+ print(message)
2798
+
2799
+ ctrl = CacheController(exclude_arg_types=[Debugger]) # <- exclude 'Debugger' parameters from hasing
2800
+ cache = SubDir("!/.cache")
2125
2801
 
2126
- A function may have non-functional parameters which do not alter the function's outcome.
2127
- An example are 'debug' flags:
2128
-
2129
- from cdxbasics.subdir import SubDir
2130
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2802
+ @cache.cache("0.1", dependencies=[f], exclude_args='debug')
2803
+ def g(x,y,debugger : Debugger): # <-- 'debugger' is a non-functional parameter
2804
+ debugger.output(f"h(x={x},y={y})")
2805
+ return g(x,y)**2
2806
+
2807
+ **Unique IDs and File Naming**
2808
+
2809
+ The unique call ID of a decorated functions is by default generated by its fully qualified name
2810
+ and a unique hash of its functional parameters.
2811
+
2812
+ Key default behaviours of :class:`cdxcore.uniquehash.NamedUniqueHash`:
2813
+
2814
+ * The ``NamedUniqueHash`` hashes objects via their ``__dict__`` or ``__slot__`` members.
2815
+ This can be overwritten for a class by implementing ``__unique_hash__``; see :class:`cdxcore.uniquehash.NamedUniqueHash`.
2816
+
2817
+ * Function members of objects or any members starting with '_' are not hashed
2818
+ unless this behaviour is changed using :class:`cdxcore.subdir.CacheController`.
2819
+
2820
+ * Numpy and panda frames are hashed using their byte representation.
2821
+ That is slow and not recommended. It is better to identify numpy/panda inputs
2822
+ via their generating characteristic ID.
2823
+
2824
+ Either way, hashes are not particularly human readable. It is often useful
2825
+ to have unique IDs and therefore filenames which carry some context information.
2826
+
2827
+ This can be achieved by using ``label``:
2131
2828
 
2132
- @cache.cache("0.1", dependencies=[f], exclude_args='debug')
2133
- def g(x,y,debug): # <-- debug is a non-functional parameter
2134
- if debug:
2135
- print(f"h(x={x},y={y})")
2136
- return g(x,y)**2
2137
-
2138
- You can systematically define certain types as non-functional for *all* functions wrapped
2139
- by this SubDir by specifying the respective parameter for the CacheController() in SubDir.__init__().
2140
-
2141
- The Unique Call ID of a functions is by default generated by its fully qualified name
2142
- and a unique hash of its functional parameters.
2143
- This can be made more readable by using id=
2144
-
2145
- from cdxbasics.subdir import SubDir
2146
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2147
-
2148
- @cache.cache("0.1", id="f({x},{y}") # <- using a string to be passed to str.format()
2149
- def f(x,y):
2150
- return x*y
2829
+ .. code-block:: python
2151
2830
 
2152
- You can also use functions:
2153
-
2154
- from cdxbasics.subdir import SubDir
2155
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2156
-
2157
- # Using a function 'id'. Note the **_ to catch uninteresting parameters, here 'debug'
2158
- @cache.cache("0.1", id=lambda x,y,**_: f"h({x},{y})", exclude_args='debug')
2159
- def h(x,y,debug=False):
2160
- if debug:
2161
- print(f"h(x={x},y={y})")
2162
- return x*y
2831
+ from cdxcore.subdir import SubDir, CacheController
2832
+ ctrl = CacheController( debug_verbose=Context("all") )
2833
+ cache = SubDir("!/.cache", cache_controller=ctrl )
2834
+ cache.delete_all_content() # for illustration
2835
+
2836
+ @cache.cache("0.1") # <- no ID
2837
+ def f1(x,y):
2838
+ return x*y
2839
+
2840
+ @cache.cache("0.1", label="f2({x},{y})") # <- label uses a string to be passed to str.format()
2841
+ def f2(x,y):
2842
+ return x*y
2843
+
2844
+ We can also use a function to generate a ``label``. In that case all parameters
2845
+ to the function including its ``name`` are passed to the function. In below example
2846
+ we eat any parameters we are not interested in with ``** _``:
2163
2847
 
2164
- Note that by default it is not assumed that the call Id returned by id is unique,
2165
- and a hash generated from all pertinent arguments will be generated.
2166
- That is why in the previous example we still need to exclude_args 'debug' here.
2848
+ .. code-block:: python
2167
2849
 
2168
- If the id you generate is guaranteed to be unique for all functional parameter values,
2169
- you can add unique=True. In this case the filename of the function
2850
+ @cache.cache("0.1", label=lambda x,y,**_: f"h({x},{y})", exclude_args='debug')
2851
+ def h(x,y,debug=False):
2852
+ if debug:
2853
+ print(f"h(x={x},y={y})")
2854
+ return x*y
2170
2855
 
2171
- from cdxbasics.subdir import SubDir
2172
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2173
-
2174
- # Using a function 'id' with 'unique' to generate a unique ID.
2175
- @cache.cache("0.1", id=lambda x,y,**_: f"h({x},{y})", unique=True)
2176
- def h(x,y,debug=False):
2177
- if debug:
2178
- print(f"h(x={x},y={y})")
2179
- return x*y
2856
+ We obtain:
2857
+
2858
+ .. code-block:: python
2859
+
2860
+ f1(1,1)
2861
+ f2(1,1)
2862
+ h(1,1)
2863
+
2864
+ 00: cache(f1@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2865
+ 00: cache(f2@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2866
+ 00: cache(h@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2867
+ 00: cache(f1@__main__): called 'f1@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f1@__main__ ef197d80d6a0bbb0.pck'.
2868
+ 00: cache(f2@__main__): called 'f2(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f2(1,1) bdc3cd99157c10f7.pck'.
2869
+ 00: cache(h@__main__): called 'h(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/h(1,1) d3fdafc9182070f4.pck'.
2870
+
2871
+ Note that the file names ``f2(1,1) bdc3cd99157c10f7.pck``
2872
+ and ``h(1,1) d3fdafc9182070f4.pck`` for the ``f2`` and ``h`` function calls are now easier to read as
2873
+ they are comprised of the label
2874
+ of the function and a terminal hash key.
2875
+ The trailing hash is appended because we do not assume that the label returned by ``label`` is unique.
2876
+ Therefore, a hash generated from all the ``label`` itself and
2877
+ all pertinent arguments will be appended to the filename.
2878
+
2879
+ If we know how to generate truly unique IDs which are always valid filenames, then we can use ``uid``
2880
+ instead of ``label``:
2881
+
2882
+ .. code-block:: python
2883
+
2884
+ @cache.cache("0.1", uid=lambda x,y,**_: f"h2({x},{y})", exclude_args='debug')
2885
+ def h2(x,y,debug=False):
2886
+ if debug:
2887
+ print(f"h(x={x},y={y})")
2888
+ return x*y
2889
+ h2(1,1)
2890
+
2891
+ yields::
2892
+
2893
+ 00: cache(h2@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2894
+ 00: cache(h2@__main__): called 'h2(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/h2(1,1).pck'.
2895
+
2896
+ In particular, the filename is now ``h2(1,1).pck`` without any hash.
2897
+ If ``uid`` is used the parameter of the function are not hashed. Like ``label``
2898
+ the parameter ``uid`` can also be a :func:`str.format` string or a callable.
2180
2899
 
2181
- Numpy/Panda
2182
- -----------
2900
+ **Controlliong which Parameters to Hash**
2901
+
2902
+ To specify which parameters are pertinent for identifying a unique id, use:
2903
+
2904
+ * ``include_args``: list of functions arguments to include. If ``None``, use all parameteres as input in the next step
2905
+
2906
+ * ``exclude_args``: list of function arguments to exclude, if not ``None``.
2907
+
2908
+ * ``exclude_arg_types``: a list of types to exclude.
2909
+ This is helpful if control flow is managed with dedicated data types.
2910
+ An example of such a type is :class:`cdxcore.verbose.Context` which is used to print hierarchical output messages.
2911
+ Types can be globally excluded using a :class:`cdccore.cache.CacheController`
2912
+ when calling
2913
+ :class:`cdxcore.subdir.SubDir`.
2914
+
2915
+ **Numpy/Pandas**
2916
+
2183
2917
  Numpy/Panda data should not be hashed for identifying unique call IDs.
2184
2918
  Instead, use the defining characteristics for generating the data frames.
2185
2919
 
2186
2920
  For example:
2187
-
2188
- from cdxbasics.subdir import SubDir
2189
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2190
2921
 
2191
- from cdxbasics.prettydict import pdct
2922
+ .. code-block:: python
2923
+
2924
+ from cdxcore.pretty import PrettyObject
2925
+ from cdxcore.subdir import SubDir
2926
+ cache = SubDir("!/.cache")
2927
+ cache.delete_all_content() # for illustration
2192
2928
 
2193
2929
  @cache.cache("0.1")
2194
2930
  def load_src( src_def ):
@@ -2201,22 +2937,24 @@ class SubDir(object):
2201
2937
  stats = ... using data
2202
2938
  return stats
2203
2939
 
2204
- src_def = pdct()
2940
+ src_def = PrettyObject()
2205
2941
  src_def.start = "2010-01-01"
2206
2942
  src_def.end = "2025-01-01"
2207
2943
  src_def.x = 0.1
2208
2944
 
2209
- stats_def = pdct()
2945
+ stats_def = PrettyObject()
2210
2946
  stats_def.lambda = 0.1
2211
2947
  stats_def.window = 100
2212
2948
 
2213
2949
  data = load_src( src_def )
2214
2950
  stats = statistics( stats_def, src_def, data )
2215
2951
 
2216
- While instructive, this case is not optimal: we do not really need to load 'data'
2217
- if we can reconstruct 'stats' from 'data' (unless we need 'data' further on).
2952
+ While instructive, this case is not optimal: we do not really need to load ``data``
2953
+ if we can reconstruct ``stats`` from ``data`` (unless we need ``data`` further on).
2218
2954
 
2219
- Consider therefore
2955
+ Consider therefore:
2956
+
2957
+ .. code-block:: python
2220
2958
 
2221
2959
  @cache.cache("0.1")
2222
2960
  def load_src( src_def ):
@@ -2232,14 +2970,18 @@ class SubDir(object):
2232
2970
 
2233
2971
  stats = statistics_only( stats_def, src_def )
2234
2972
 
2235
- Member functions
2236
- ----------------
2973
+ Caching Member Functions
2974
+ ^^^^^^^^^^^^^^^^^^^^^^^^
2975
+
2237
2976
  You can cache member functions like any other function.
2238
- Note that version information are by default inherited, i.e. member functions will be dependent on the version of their
2239
- defining class, and class versions will be dependent on their base classes' versions.
2977
+ Note that :dec:`cdxcore.version.version` information are by default inherited, i.e. member functions will be dependent on the version of their
2978
+ defining class, and class versions will be dependent on their base classes' versions:
2240
2979
 
2241
- from cdxbasics.subdir import SubDir, version
2242
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2980
+ .. code-block:: python
2981
+
2982
+ from cdxcore.subdir import SubDir, version
2983
+ cache = SubDir("!/.cache")
2984
+ cache.delete_all_content() # for illustration
2243
2985
 
2244
2986
  @version("0.1")
2245
2987
  class A(object):
@@ -2259,18 +3001,26 @@ class SubDir(object):
2259
3001
  _ = b.f(y=1) # same unique call ID as previous call -> restore result from disk
2260
3002
 
2261
3003
  **WARNING**
2262
- The hashing function used -- cdxbasics.util.uniqueHashExt() -- does by default *not* process members of objects or dictionaries
2263
- which start with a "_". This behaviour can be changed using CacheController().
2264
- For reasonably complex objects it is recommended to implement:
2265
- __unique_hash__( self, length : int, parse_functions : bool, parse_underscore : str )
2266
- (it is also possible to simply set this value to a string constant).
3004
+ :class:`cdxcore.uniquehash.UniqueHash` does *not* by default process members of objects or dictionaries
3005
+ which start with a "_". This behaviour can be changed using :class:`cdxcore.subdir.CacheController`.
3006
+ For reasonably complex objects it is recommended to implement for your objects
3007
+ the a custom hashing function::
3008
+
3009
+ __unique_hash__( self, uniqueHash : UniqueHash, debug_trace : DebugTrace )
3010
+
3011
+ This function is described at :class:`cdxcore.uniquehash.UniqueHash`.
3012
+
3013
+ Caching Bound Member Functions
3014
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2267
3015
 
2268
- Bound Member Functions
2269
- ----------------------
2270
- Note that above is functionally different to decorating a bound member function:
3016
+ Caching bound member functions is technically quite different to caching a function of a class in general,
3017
+ but also supported:
2271
3018
 
2272
- from cdxbasics.subdir import SubDir, version
2273
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
3019
+ .. code-block:: python
3020
+
3021
+ from cdxcore.subdir import SubDir, version
3022
+ cache = SubDir("!/.cache", cache_controller : CacheController(debug_verbose=Context("all")))
3023
+ cache.delete_all_content() # for illustration
2274
3024
 
2275
3025
  class A(object):
2276
3026
  def __init__(self,x):
@@ -2282,133 +3032,177 @@ class SubDir(object):
2282
3032
  f = cache.cache("0.1", id=lambda self, y : f"a.f({y})")(a.f) # <- decorate bound 'f'.
2283
3033
  r = c(y=2)
2284
3034
 
2285
- In this case the function 'f' is bound to 'a'. The object is added as 'self' to the function
2286
- parameter list even though the bound function parameter list does not include 'self'.
2287
- This, together with the comments on hashing objects above, ensures that (hashed) changes to 'a' will
3035
+ In this case the function ``f`` is bound to ``a``. The object is added as ``self`` to the function
3036
+ parameter list even though the bound function parameter list does not include ``self``.
3037
+ This, together with the comments on hashing objects above, ensures that (hashed) changes to ``a`` will
2288
3038
  be reflected in the unique call ID for the member function.
2289
3039
 
2290
- Classes
2291
- -------
2292
- Classes can also be cached.
2293
- This is done in two steps: first, the class itself is decorated to provide version information at its own level.
2294
- Secondly, decorate __init__ which also helps to define the unique call id. You do not need to specify a version
2295
- for __init__ as its version usually coincides with the version of the class.
2296
-
2297
- Simple example:
2298
-
2299
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2300
-
2301
- @cache.cache("0.1")
2302
- class A(object):
2303
-
2304
- @cache.cache(exclude_args=['debug'])
2305
- def __init__(self, x, debug):
2306
- if debug:
2307
- print("__init__",x)
2308
- self.x = x
3040
+ Caching Classes
3041
+ ^^^^^^^^^^^^^^^
2309
3042
 
2310
- __init__ does not actually return a value; for this reason the actual function decorated will be __new__.
2311
- Attempting to cache decorate __new__ will lead to an exception.
3043
+ Classes can also be cached. In this case the creation of a class is cached, i.e. a call to
3044
+ the class constructor restores the respectiv object from disk.
2312
3045
 
2313
- A nuance for __init__ vs ordinary member function is that 'self' is non-functional.
2314
- It is therefore automatically excluded from computing a unique call ID.
2315
- Specifically, 'self' is not part of the arguments passed to 'id':
3046
+ This is done in two steps:
2316
3047
 
2317
- @cache.cache("0.1")
2318
- class A(object):
3048
+ 1) first, the class itself is decorated using
3049
+ :dec:`cdxcore.subdir.SubDir.cache`
3050
+ to provide version information at class level. Only version information are provided here.
3051
+
3052
+ 2) Secondly, decorate ``__init__``. You do not need to specify a version
3053
+ for ``__init__`` as its version usually coincides with the version of the class. At ``__init__``
3054
+ you define how unique IDs are generated from the parameters passed to object construction.
3055
+
3056
+ Simple example:
3057
+
3058
+ .. code-block:: python
3059
+
3060
+ from cdxcore.subdir import SubDir
3061
+ cache = SubDir("!/.cache")
3062
+ cache.delete_all_content() # for illustration
3063
+
3064
+ @cache.cache("0.1")
3065
+ class A(object):
3066
+
3067
+ @cache.cache(exclude_args=['debug'])
3068
+ def __init__(self, x, debug):
3069
+ if debug:
3070
+ print("__init__",x)
3071
+ self.x = x
2319
3072
 
2320
- @cache.cache("0.1", id=lambda x, debug: f"A.__init__(x={x})") # <-- 'self' is not passed to the lambda function; no need to add **_
2321
- def __init__(self, x, debug):
2322
- if debug:
2323
- print("__init__",x)
2324
- self.x = x
3073
+ a = A(1) # caches 'a'
3074
+ b = A(1) # reads the cached object into 'b'
2325
3075
 
2326
- Decorating classes with __slots__ does not yet work.
2327
-
2328
- Non-functional parameters
2329
- -------------------------
2330
- Often functions have parameters which do not alter the output of the function but control i/o or other aspects of the overall environment.
2331
- An example is a function parameter 'debug':
2332
-
2333
- def f(x,y,debug=False):
2334
- z = x*y
2335
- if not debug:
2336
- print(f"x={x}, y={y}, z={z}")
2337
- return z
2338
-
2339
- To specify which parameters are pertinent for identiying a unique id, use:
2340
-
2341
- a) include_args: list of functions arguments to include. If None, use all as input in the next step
2342
- b) exclude_args: list of funciton arguments to exclude, if not None.
2343
- c) exclude_arg_types: a list of types to exclude. This is helpful if control flow is managed with dedicated data types.
2344
- An example of such a type is cdxbasics.verbose.Context which is used to print hierarchical output messages.
2345
- Types can be globally excluded using the CacheController.
3076
+ **Technical Comments**
3077
+
3078
+ The function ``__init__`` does not actually return a value; for this reason
3079
+ behind the scenes it is actually ``__new__`` which is being decorated.
3080
+ Attempting to cache-decorate ``__new__`` manually will lead to an exception.
3081
+
3082
+ A nuance for ``__init__`` vs ordinary member function is that the
3083
+ ``self`` parameter is non-functional
3084
+ (in the sense that it is an empty object when ``__init__`` is called).
3085
+ ``self`` is therefore automatically excluded from computing a unique call ID.
3086
+ That also means ``self`` is not part of the arguments passed to ``uid``:
3087
+
3088
+ .. code-block:: python
3089
+
3090
+ @cache.cache("0.1")
3091
+ class A(object):
2346
3092
 
3093
+ @cache.cache("0.1", id=lambda x, debug: f"A.__init__(x={x})") # <-- 'self' is not passed to the lambda function; no need to add **_
3094
+ def __init__(self, x, debug):
3095
+ if debug:
3096
+ print("__init__",x)
3097
+ self.x = x
3098
+
3099
+ Decorating classes with ``__slots__`` does not yet work.
3100
+
2347
3101
  See also
2348
- --------
2349
- For project-wide use it is usually inconvenient to control caching at the level of a 'directory'.
2350
- See VersionedCacheRoot() is a thin wrapper around a SubDir with a CacheController.
3102
+ ^^^^^^^^
3103
+
3104
+ For project-wide use it is usually inconvenient to control caching at the level of a
3105
+ project-wide cache root directory.
3106
+ See :class:`cdxcore.subdir.VersionedCacheRoot` for a thin convenience wrapper around a :class:`cdxcore.subdir.SubDir`
3107
+ with a :class:`cdxcore.subdir.CacheController`.
2351
3108
 
2352
3109
  Parameters
2353
3110
  ----------
2354
3111
  version : str, optional
2355
3112
  Version of the function.
2356
- * If None then F must be decorated with cdxbasics.version.version().
2357
- * If set, the function F is first decorated with cdxbasics.version.version().
2358
- dependencies : list, optional
2359
- List of version dependencies
2360
-
2361
- id : str, Callable
2362
- Create a call label for the function call and its parameters.
2363
- See above for a description.
2364
- * A plain string without {} formatting: this is the fully qualified id
2365
- * A string with {} formatting: id.str( name=name, **parameters ) will be used to generate the fully qualified id
2366
- * A Callable, in which case id( name=name, **parameters ) will be used to generate the fully qualified id
2367
-
2368
- unique : bool
2369
- Whether the 'id' generated by 'id' is unique for this function call with its parameters.
2370
- If True, then the function will attempt to use 'id' as filename as long as it has no invalid characters and is short
2371
- enough (see 'max_filename_length').
2372
- If False, the function will append to the 'id' a unique hash of the qualified function name and all pertinent parameters
2373
-
2374
- name : str
2375
- The name of the function, or None for using the fully qualified function name.
2376
3113
 
3114
+ * If ``None`` then ``F`` must be decorated with :dec:`cdxcore.version.version`.
3115
+ * If set, the function ``F`` is first decorated with :dec:`cdxcore.version.version`.
3116
+
3117
+ dependencies : list[type], optional
3118
+ A list of version dependencies, either by reference or by name.
3119
+ See :dec:`cdxcore.version.version` for details on name lookup if strings are used.
3120
+
3121
+ label : str | Callable
3122
+ Specify a human-readable label for the function call given its parameters.
3123
+ This label is used to generate the cache file name, and is also printed in when tracing
3124
+ hashing operations. Labels are not assumed to be unique, hence a unique hash of
3125
+ the label and the parameters to this function will be appended to generate
3126
+ the actual cache file name.
3127
+
3128
+ Use ``uid`` instead if ``label`` represents valid unique filenames.
3129
+
3130
+
3131
+ **Usage:**
3132
+
3133
+ * If ``label`` is a plain string without ``{}`` formatting: use this string as-is.
3134
+
3135
+ * If ``label`` is a string with ``{}`` formatting, then ``label.format( name=name, **parameters )``
3136
+ will be used to generate the actual label.
3137
+
3138
+ * If ``label`` is a ``Callable`` then ``label( name=name, **parameters )`` will be called
3139
+ to generate the actual label.
3140
+
3141
+ See above for examples.
3142
+
3143
+ ``label`` cannot be used alongside ``uid``.
3144
+
3145
+ uid : str | Callable
3146
+ Alternative to ``label`` which is assumed to generate a unique cache file name. It has the same
3147
+ semantics as ``label``. When used, parameters to the decorated function are not hashed.
3148
+
3149
+ ``uid`` be used alongside ``label``.
3150
+
3151
+ name : str, optional
3152
+ Name of this function which is used either on its own if neither ``label`` not ``uid`` are used.
3153
+ If either of them is used, ``name`` is passed as a parameter to either the callable or the
3154
+ formatting operator.
3155
+
3156
+ If ``name`` is not specified it defaults to ``__qualname__`` expanded
3157
+ by the module name the function is defined in.
3158
+
2377
3159
  include_args : list[str]
2378
- List of arguments to include in generating a unqiue id, or None for all.
3160
+ List of arguments to include in generating an unqiue ID, or ``None`` for all.
2379
3161
 
2380
3162
  exclude_args : list[str]:
2381
- List of argumernts to exclude
3163
+ List of arguments to exclude from generating an unique ID.
2382
3164
 
2383
3165
  exclude_arg_types : list[type]
2384
- List of types to exclude.
3166
+ List of parameter types to exclude from generating an unique ID.
2385
3167
 
2386
3168
  version_auto_class : bool
2387
-
2388
-
3169
+ Whether to automaticallty add version dependencies on base classes or, for member functions, on containing
3170
+ classes. This is the ``auto_class`` parameter for :dec:`cdxcore.version.version`.
2389
3171
 
2390
3172
  Returns
2391
3173
  -------
2392
- A callable to execute F if need be.
2393
- This callable has a member 'cache_info' which can be used to access information on caching activity.
2394
-
2395
- Information available at any time after decoration:
2396
- F.cache_info.name : qualified name of the function
2397
- F.cache_info.signature : signature of the function
3174
+ Decorated F: Callable
3175
+
3176
+ A decorator ``cache(F)`` whose ``__call__`` implements the cached call to ``F``.
2398
3177
 
2399
- Additonal information available during a call to a decorated function F, and thereafter:
2400
- F.cache_info.version : unique version string reflecting all dependencies.
2401
- F.cache_info.uid : unique call ID.
2402
- F.cache_info.label : last id generated, or None (if id was a string and unique was True)
2403
- F.cache_info.arguments : arguments parsed to create a unique call ID, or None (if id was a string and unique was True)
3178
+ This callable has a member ``cache_info``
3179
+ of type :class:`cdxcore.subdir.CacheInfo`
3180
+ which can be used to access information on caching activity.
3181
+
3182
+ * Information available at any time after decoration:**
3183
+
3184
+ * ``F.cache_info.name`` : qualified name of the function
3185
+ * ``F.cache_info.signature`` : signature of the function
3186
+
3187
+ * Additonal information available during a call to a decorated function F, and thereafter:
3188
+
3189
+ * ``F.cache_info.version`` : unique version string reflecting all dependencies.
3190
+ * ``F.cache_info.filename`` : unique filename used for caching logic during the last function call.
3191
+ * ``F.cache_info.label`` : last label generated, or ``None``.
3192
+ * ``F.cache_info.arguments`` : arguments parsed to create a unique call ID, or ``None``.
2404
3193
 
2405
- Additonal information available after a call to F:
2406
- F.cache_info.last_cached : whether the last function call returned a cached object
3194
+ * Additonal information available after a call to ``F``:
3195
+
3196
+ * ``F.cache_info.last_cached`` : whether the last function call returned a cached object.
2407
3197
 
2408
- The function F has additional function parameters
2409
- override_cache_mode : allows to override caching mode temporarily, in particular "off"
2410
- track_cached_files : pass a CacheTracker object to keep track of all files used (loaded from or saved to).
2411
- This can be used to delete intermediary files when a large operation was completed.
3198
+ The decorated ``F()`` has additional function parameters, namely:
3199
+
3200
+ * ``override_cache_mode`` : allows to override caching mode temporarily, in particular you can set it to ``"off"``.
3201
+ * ``track_cached_files`` : allows passing a :class:`cdxcore.subdir.CacheTracker`
3202
+ object to keep track of all
3203
+ files used (loaded from or saved to).
3204
+ The function :meth:`cdxcore.subdir.CacheTracker.delete_cache_files` can be used
3205
+ to delete all files involved in caching.
2412
3206
  """
2413
3207
  return CacheCallable(subdir = self,
2414
3208
  version = version,
@@ -2428,14 +3222,153 @@ class SubDir(object):
2428
3222
  version_auto_class : bool = True
2429
3223
  ):
2430
3224
  """
2431
- Short cut for SubDir.cache() for classes
2432
- See SubDir.cache() for documentation.
3225
+ Short-cut for :dec:`cdxcore.subdir.SubDir.cache` applied to classes
3226
+ with a reduced number of available parameters.
3227
+
3228
+ Example::
3229
+
3230
+ cache = SubDir("!/.cache")
3231
+
3232
+ @cache.cache_class("0.1")
3233
+ class A(object):
3234
+
3235
+ @cache.cache(exclude_args=['debug'])
3236
+ def __init__(self, x, debug):
3237
+ if debug:
3238
+ print("__init__",x)
3239
+ self.x = x
3240
+
2433
3241
  """
2434
3242
  return self.cache( name=name,
2435
- version=version,
2436
- dependencies=dependencies,
2437
- version_auto_class=version_auto_class)
2438
-
3243
+ version=version,
3244
+ dependencies=dependencies,
3245
+ version_auto_class=version_auto_class)
3246
+
3247
+ # ========================================================================
3248
+ # Caching, convenience
3249
+ # ========================================================================
3250
+
3251
+ def VersionedCacheRoot( directory : str, *,
3252
+ ext : str = None,
3253
+ fmt : Format = None,
3254
+ create_directory : bool = False,
3255
+ **controller_kwargs
3256
+ ):
3257
+ """
3258
+ Create a root directory for versioned caching on disk
3259
+ using :dec:`cdxcore.subdir.SubDir.cache`.
3260
+
3261
+ **Usage:**
3262
+
3263
+ In a central file, define a root directory for all caching activity::
3264
+
3265
+ from cdxcore.subdir import VersionedCacheRoot
3266
+ vroot = VersionedCacheRoot("!/cache")
3267
+
3268
+ Create sub-directories as suitable, for example::
3269
+
3270
+ vtest = vroot("test")
3271
+
3272
+ Use these for caching::
3273
+
3274
+ @vtest.cache("1.0")
3275
+ def f1( x=1, y=2 ):
3276
+ print(x,y)
3277
+
3278
+ @vtest.cache("1.0", dps=[f1])
3279
+ def f2( x=1, y=2, z=3 ):
3280
+ f1( x,y )
3281
+ print(z)
3282
+
3283
+ Parameters
3284
+ ----------
3285
+ directory : str
3286
+ Name of the root directory for caching.
3287
+
3288
+ Using SubDir the following Short-cuts are supported:
3289
+
3290
+ * ``"!/dir"`` creates ``dir`` in the temporary directory.
3291
+ * ``"~/dir"`` creates ``dir`` in the home directory.
3292
+ * ``"./dir"`` creates ``dir`` relative to the current directory.
3293
+
3294
+ ext : str
3295
+ Extension, which will automatically be appended to file names.
3296
+ The default value depends on ``fmt`; for ``Format.PICKLE`` it is "pck".
3297
+
3298
+ fmt : :class:`cdxcore.subdir.Format`
3299
+ File format; if ``ext`` is not specified, the format drives the extension, too.
3300
+ Default is ``Format.PICKLE``.
3301
+
3302
+ create_directory : bool
3303
+ Whether to create the directory upon creation. Default is ``False``.
3304
+
3305
+ controller_kwargs: dict
3306
+ Parameters passed to :class:`cdxcore.subdir.CacheController``.
3307
+
3308
+ Common parameters used:
3309
+
3310
+ * ``exclude_arg_types``: list of types or names of types to exclude when auto-generating function
3311
+ signatures from function arguments.
3312
+ An example is :class:`cdxcore.verbose.Context` which is used to print progress messages.
3313
+
3314
+ * ``max_filename_length``: maximum filename length.
3315
+
3316
+ * ``hash_length``: length used for hashes, see :class:`cdxcore.uniquehash.UniqueHash`.
3317
+
3318
+ Returns
3319
+ -------
3320
+ Root : SubDir
3321
+ A root directory suitable for caching.
3322
+ """
3323
+ controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
3324
+ return SubDir( directory=directory, ext=ext, fmt=fmt, create_directory=create_directory, controller=controller )
3325
+
3326
+ version = version_decorator
3327
+
3328
+ class CacheTracker(object):
3329
+ """
3330
+ Utility class to track caching and be able to delete all dependent objects.
3331
+ """
3332
+ def __init__(self):
3333
+ """ track cache files """
3334
+ self._files = []
3335
+ def __iadd__(self, new_file):
3336
+ """ Add a new file to the tracker """
3337
+ self._files.append( new_file )
3338
+ def delete_cache_files(self):
3339
+ """ Delete all tracked files """
3340
+ for file in self._files:
3341
+ if os.path.exists(file):
3342
+ os.remove(file)
3343
+ self._files = []
3344
+ def __str__(self) -> str:#NOQA
3345
+ return f"Tracked: {self._files}"
3346
+ def __repr__(self) -> str:#NOQA
3347
+ return f"Tracked: {self._files}"
3348
+
3349
+ class CacheInfo(object):
3350
+ """
3351
+ Information on cfunctions decorated with :dec:`cdxcore.subdir.SubDir.cache`.
3352
+
3353
+ Functions decorated with :dec:`cdxcore.subdir.SubDir.cache`
3354
+ will have a member ``cache_info`` of this type
3355
+ """
3356
+ def __init__(self, name, F, keep_last_arguments):
3357
+ """
3358
+ :meta private:
3359
+ """
3360
+ self.name = name #: Decoded name of the function.
3361
+
3362
+ self.signature = inspect.signature(F) #: :func:`inspect.signature` of the function.
3363
+
3364
+ self.filename = None #: Unique filename of the last function call.
3365
+ self.label = None #: Label of the last function call.
3366
+ self.version = None #: Last version used.
3367
+
3368
+ self.last_cached = None #: Whether the last function call restored data from disk.
3369
+
3370
+ if keep_last_arguments:
3371
+ self.arguments = None #: Last arguments used. This member is only present if ``keep_last_arguments`` was set to ``True`` for the relevant :class:`cdxcore.subdir.CacheController`.
2439
3372
 
2440
3373
  def _ensure_has_version( F,
2441
3374
  version : str = None,
@@ -2486,8 +3419,9 @@ def _qualified_name( F, name ):
2486
3419
 
2487
3420
  class CacheCallable(object):
2488
3421
  """
2489
- Utility class for SubDir.cache_callable.
2490
- See documentation for that function.
3422
+ Wrapper for a cached function.
3423
+
3424
+ This is the wrapper returned by :dec:`cdxcore.subdir.SubDir.cache`.
2491
3425
  """
2492
3426
 
2493
3427
  def __init__(self,
@@ -2503,8 +3437,9 @@ class CacheCallable(object):
2503
3437
  version_auto_class : bool = True,
2504
3438
  name_of_name_arg : str = "name"):
2505
3439
  """
2506
- Utility class for SubDir.cache_callable.
2507
- See documentation for that function.
3440
+ Utility class for :dec:`cdxcore.subdir.SubDir.cache`.
3441
+
3442
+ *Do not use directly.*
2508
3443
  """
2509
3444
  if not label is None and not uid is None:
2510
3445
  error("Cannot specify both 'label' and 'uid'.")
@@ -2523,35 +3458,41 @@ class CacheCallable(object):
2523
3458
 
2524
3459
  @property
2525
3460
  def uid_or_label(self) -> Callable:
3461
+ """ ID or label """
2526
3462
  return self._uid if self._label is None else self._label
2527
3463
  @property
2528
3464
  def unique(self) -> bool:
3465
+ """ Whether the ID is unique """
2529
3466
  return not self._uid is None
2530
-
2531
3467
  @property
2532
- def cacheController(self) -> CacheController:
2533
- """ Returns the cache controller """
2534
- return self._subdir.cacheController
3468
+ def cache_controller(self) -> CacheController:
3469
+ """ Returns the :class:`cdxcore.subdir.CacheController` """
3470
+ return self._subdir.cache_controller
2535
3471
  @property
2536
- def cache_mode(self) -> Context:
2537
- return self.cacheController.cache_mode
3472
+ def cache_mode(self) -> CacheMode:
3473
+ """ Returns the :class:`cdxcore.subdir.CacheMode` of the underlying :class:`cdxcore.subdir.CacheController` """
3474
+ return self.cache_controller.cache_mode
2538
3475
  @property
2539
3476
  def debug_verbose(self) -> Context:
2540
- return self.cacheController.debug_verbose
3477
+ """ Returns the debug :class:`cdxcore.verbose.Context` used to print caching information, or ``None`` """
3478
+ return self.cache_controller.debug_verbose
2541
3479
  @property
2542
- def uniqueNamedFileName(self) -> Callable:
2543
- return self.cacheController.uniqueNamedFileName
3480
+ def labelledFileName(self) -> Callable:
3481
+ """ Returns ``labelledFileName()`` of the underlying :class:`cdxcore.subdir.CacheController` """
3482
+ return self.cache_controller.labelledFileName
2544
3483
  @property
2545
- def uniqueLabelledFileName(self) -> Callable:
2546
- return self.cacheController.uniqueLabelledFileName
3484
+ def uniqueFileName(self) -> Callable:
3485
+ """ Returns ``uniqueFileName()`` of the underlying :class:`cdxcore.subdir.CacheController` """
3486
+ return self.cache_controller.uniqueFileName
2547
3487
  @property
2548
3488
  def global_exclude_arg_types(self) -> list[type]:
2549
- return self.cacheController.exclude_arg_types
3489
+ """ Returns ``exclude_arg_types`` of the underlying :class:`cdxcore.subdir.CacheController` """
3490
+ return self.cache_controller.exclude_arg_types
2550
3491
 
2551
3492
  def __call__(self, F : Callable):
2552
3493
  """
2553
- Decorate 'F' as cachable callable. Can also decorate classes via ClassCallable()
2554
- See SubDir.cache() for documentation.
3494
+ Decorate ``F`` as cachable callable.
3495
+ See :dec:`cdxcore.subdir.SubDir.cache` for documentation.
2555
3496
  """
2556
3497
  if inspect.isclass(F):
2557
3498
  if not self._label is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'label' for __init__, not the class")
@@ -2566,11 +3507,13 @@ class CacheCallable(object):
2566
3507
  def _wrap_class(self, C : type):
2567
3508
  """
2568
3509
  Wrap class
3510
+
2569
3511
  This wrapper:
2570
- 1) Assigns a cdxbasics.version.version() for the class (if not yet present)
2571
- 2) Extracts from __init__ the wrapper to decorate __new__
3512
+
3513
+ * Assigns a :dec:`cdxcore.version.version` for the class (if not yet present).
3514
+ * Extracts from ``__init__`` the wrapper to decorate`` __new__``.
2572
3515
  """
2573
- debug_verbose = self.cacheController.debug_verbose
3516
+ debug_verbose = self.cache_controller.debug_verbose
2574
3517
 
2575
3518
  assert not inspect.isclass(C), ("Not a class", C)
2576
3519
 
@@ -2609,8 +3552,7 @@ class CacheCallable(object):
2609
3552
  """
2610
3553
  Decorate callable 'F'.
2611
3554
  """
2612
-
2613
- debug_verbose = self.cacheController.debug_verbose
3555
+ debug_verbose = self.cache_controller.debug_verbose
2614
3556
  assert not inspect.isclass(F), ("Internal error")
2615
3557
 
2616
3558
  # check validity
@@ -2712,21 +3654,24 @@ class CacheCallable(object):
2712
3654
  # determine unique id_ for this function call
2713
3655
  # -------------------------------------------
2714
3656
 
2715
- label = None
2716
- uid = None
2717
3657
  uid_or_label = self.uid_or_label
3658
+ filename = None
2718
3659
  if isinstance(uid_or_label, str) and self.unique:
2719
- # if 'id' does not contain formatting codes, and the result is 'unique' then do not bother collecting
3660
+ # if 'id' does not contain formatting codes,
3661
+ # and the result is 'unique' then do not bother collecting
2720
3662
  # function arguments
2721
3663
  try:
2722
- uid = uid_or_label.format() # throws a KeyError if 'id' contains formatting information
3664
+ filename = uid_or_label.format() # throws a KeyError if 'id' contains formatting information
2723
3665
  except KeyError:
2724
3666
  pass
2725
3667
 
2726
- if not uid is None:
3668
+ if not filename is None:
2727
3669
  # generate name with the unique string provided by the user
2728
- label = uid
2729
- uid = self.uniqueLabelledFileName( self.id )
3670
+ if not is_filename(filename):
3671
+ raise ValueError(f"The unique filename '{filename}' computed for '{name}' contains invalid characters for filename. When using `uid` make sure that "+\
3672
+ "the returned ID is a valid filename (and unique)")
3673
+ label = filename
3674
+ filename = self.uniqueFileName( filename )
2730
3675
  arguments = None
2731
3676
 
2732
3677
  else:
@@ -2767,9 +3712,9 @@ class CacheCallable(object):
2767
3712
  if arg in arguments:
2768
3713
  del arguments[arg]
2769
3714
 
2770
- # apply logics
3715
+ # did the user provide a label or unique ID?
2771
3716
  if uid_or_label is None:
2772
- label = name
3717
+ uid_or_label = name
2773
3718
 
2774
3719
  else:
2775
3720
  if self._name_of_name_arg in arguments:
@@ -2789,24 +3734,30 @@ class CacheCallable(object):
2789
3734
  # call format or function
2790
3735
  if isinstance( uid_or_label, str ):
2791
3736
  try:
2792
- label = str.format( uid_or_label, **arguments )
3737
+ uid_or_label = str.format( uid_or_label, **arguments )
2793
3738
  except KeyError as e:
2794
3739
  raise KeyError(e, f"Error while generating id for '{name}' using format string '{uid_or_label}': {e}. Available arguments: {list(arguments)}")
2795
3740
 
2796
3741
  else:
2797
3742
  which = 'uid' if not self._uid is None else 'label'
2798
3743
  try:
2799
- label = uid_or_label(**arguments)
3744
+ uid_or_label = uid_or_label(**arguments)
2800
3745
  except TypeError as e:
2801
3746
  raise TypeError(e, f"Error while generating '{which}' for '{name}' using a function: {e}. Available arguments: {list(arguments)}")
2802
3747
  except Exception as e:
2803
3748
  raise type(e)(f"Error while generating '{which}' for '{name}': attempt to call '{which}' of type {type(uid_or_label)} failed: {e}")
2804
- assert isinstance(label, str), ("Error:", which,"callable must return a string. Found",type(label))
3749
+ assert isinstance(uid_or_label, str), ("Error:", which, "callable must return a string. Found",type(uid_or_label))
2805
3750
 
2806
3751
  if self.unique:
2807
- uid = self.uniqueLabelledFileName( label )
3752
+ if not is_filename(uid_or_label):
3753
+ raise ValueError(f"The unique filename '{uid_or_label}' computed for '{name}' contains invalid characters for filename. When using `uid` make sure that "+\
3754
+ "the returned filename is indeed a valid filename (and unique)")
3755
+
3756
+ label = uid_or_label
3757
+ filename = self.uniqueFileName( uid_or_label )
2808
3758
  else:
2809
- uid = self.uniqueNamedFileName( label, **arguments )
3759
+ label = uid_or_label
3760
+ filename = self.labelledFileName( uid_or_label, **arguments )
2810
3761
 
2811
3762
  # determine version, cache mode
2812
3763
  # ------------------
@@ -2818,11 +3769,11 @@ class CacheCallable(object):
2818
3769
  # store process information
2819
3770
  # -------------------------
2820
3771
 
2821
- execute.cache_info.label = str(label) if not label is None else None
2822
- execute.cache_info.uid = uid
2823
- execute.cache_info.version = version_
3772
+ execute.cache_info.label = str(label) if not label is None else None
3773
+ execute.cache_info.filename = filename
3774
+ execute.cache_info.version = version_
2824
3775
 
2825
- if self.cacheController.keep_last_arguments:
3776
+ if self.cache_controller.keep_last_arguments:
2826
3777
  info_arguments = OrderedDict()
2827
3778
  for argname, argvalue in arguments.items():
2828
3779
  info_arguments[argname] = str(argvalue)[:100]
@@ -2833,26 +3784,26 @@ class CacheCallable(object):
2833
3784
  # ---------------
2834
3785
 
2835
3786
  if cache_mode.delete:
2836
- self._subdir.delete( uid )
3787
+ self._subdir.delete( filename )
2837
3788
  elif cache_mode.read:
2838
3789
  class Tag:
2839
3790
  pass
2840
3791
  tag = Tag()
2841
3792
  if not is_new:
2842
- r = self._subdir.read( uid, tag, version=version_ )
3793
+ r = self._subdir.read( filename, tag, version=version_ )
2843
3794
  else:
2844
3795
  try:
2845
3796
  execute.__new_during_read = True
2846
- r = self._subdir.read( uid, tag, version=version_ )
3797
+ r = self._subdir.read( filename, tag, version=version_ )
2847
3798
  finally:
2848
3799
  execute.__new_during_read = False
2849
3800
 
2850
3801
  if not r is tag:
2851
3802
  if not track_cached_files is None:
2852
- track_cached_files += self._fullFileName(uid)
3803
+ track_cached_files += self._fullFileName(filename)
2853
3804
  execute.cache_info.last_cached = True
2854
3805
  if not debug_verbose is None:
2855
- debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.fullFileName(uid)}'.")
3806
+ debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.full_file_name(filename)}'.")
2856
3807
  if is_new:
2857
3808
  assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__", F.__qualname__, label)
2858
3809
  r.__magic_cache_call_init__ = False # since we called __new__, __init__ will be called next
@@ -2871,9 +3822,9 @@ class CacheCallable(object):
2871
3822
  assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__")
2872
3823
 
2873
3824
  if cache_mode.write:
2874
- self._subdir.write(uid,r,version=version_)
3825
+ self._subdir.write(filename,r,version=version_)
2875
3826
  if not track_cached_files is None:
2876
- track_cached_files += self._subdir.fullFileName(uid)
3827
+ track_cached_files += self._subdir.full_file_name(filename)
2877
3828
  execute.cache_info.last_cached = False
2878
3829
 
2879
3830
  if is_new:
@@ -2883,81 +3834,21 @@ class CacheCallable(object):
2883
3834
 
2884
3835
  if not debug_verbose is None:
2885
3836
  if cache_mode.write:
2886
- debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.fullFileName(uid)}'.")
3837
+ debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.full_file_name(filename)}'.")
2887
3838
  else:
2888
- debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.fullFileName(uid)}'.")
3839
+ debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.full_file_name(filename)}'.")
2889
3840
  return r
2890
3841
 
2891
3842
  update_wrapper( wrapper=execute, wrapped=F )
2892
- execute.cache_info = CacheInfo()
2893
-
2894
- execute.cache_info.name = name # decoded name of the function
2895
- execute.cache_info.signature = inspect.signature(F) # signature of the function
2896
-
2897
- execute.cache_info.uid = None # last function call ID
2898
- execute.cache_info.label = None # last unique file name cached to
2899
- execute.cache_info.version = None # last version used
2900
-
2901
- execute.cache_info.last_cached = None # last function call restored from disk?
2902
-
2903
- if self.cacheController.keep_last_arguments:
2904
- execute.cache_info.arguments = None # last function call arguments dictionary of strings
3843
+ execute.cache_info = CacheInfo(name, F, self.cache_controller.keep_last_arguments)
2905
3844
 
2906
3845
  if is_new:
2907
3846
  execute.__new_during_read = False
2908
3847
 
2909
3848
  if not debug_verbose is None:
2910
3849
  debug_verbose.write(f"cache({name}): {'function' if not is_new else 'class constructor function'} registered for caching into '{self._subdir.path}'.")
2911
- self.cacheController.versioned[name] = execute
3850
+ self.cache_controller.versioned[name] = execute
2912
3851
  return execute
2913
3852
 
2914
- def VersionedCacheRoot( directory : str, *,
2915
- ext : str = None,
2916
- fmt : Format = None,
2917
- createDirectory : bool = None,
2918
- **controller_kwargs
2919
- ):
2920
- """
2921
- Create a root directory for versioning caching on disk
2922
-
2923
- Usage:
2924
- In a central file, define a root directory
2925
- vroot = VersionedCacheRoot("!/cache")
2926
3853
 
2927
- and a sub-directory
2928
- vtest = vroot("test")
2929
-
2930
- @vtest.cache("1.0")
2931
- def f1( x=1, y=2 ):
2932
- print(x,y)
2933
-
2934
- @vtest.cache("1.0", dps=[f1])
2935
- def f2( x=1, y=2, z=3 ):
2936
- f1( x,y )
2937
- print(z)
2938
-
2939
- Parameters
2940
- ----------
2941
- directory : name of the directory. Using SubDir the following short cuts are supported:
2942
- "!/dir" creates 'dir' in the temporary directory
2943
- "~/dir" creates 'dir' in the home directory
2944
- "./dir" created 'dir' relative to the current directory
2945
- ext : extension, which will automatically be appended to file names (see SubDir). Default depends on format. For Format.PICKLE it is 'pck'
2946
- fmt : format, see SubDir.Format. Default is Format.PICKLE
2947
- createDirectory : whether to create the directory upon creation. Default is no.
2948
- controller_kwargs: parameters passed to VersionController, for example:
2949
- exclude_arg_types : list of types or names of types to exclude when auto-generating function signatures from function arguments.
2950
- A standard example from cdxbasics is "Context" as it is used to print progress messages.
2951
- max_filename_length : maximum filename length
2952
- hash_length: length used for hashes, see cdxbasics.util.uniqueHash()
2953
-
2954
- Returns
2955
- -------
2956
- A root cache directory
2957
- """
2958
- controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
2959
- return SubDir( directory=directory, ext=ext, fmt=fmt, createDirectory=createDirectory, controller=controller )
2960
3854
 
2961
- version = version_decorator
2962
-
2963
-