cdxcore 0.1.6__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cdxcore might be problematic. Click here for more details.

cdxcore/subdir.py CHANGED
@@ -1,61 +1,387 @@
1
1
  """
2
- subdir
3
- Simple class to keep track of directory sturctures and for automated caching on disk
4
- Hans Buehler 2020
5
- """
2
+ Overview
3
+ --------
4
+
5
+ This module contains utilities for file i/o, directory management and
6
+ streamlined versioned caching.
7
+
8
+ The key idea is to provide transparent, concise :mod:`pickle` access to the file system
9
+ via the :class:`cdxcore.subdir.SubDir` class.
10
+
11
+ **Key design features:**
12
+
13
+ * Simple path construction via ``()`` operator. By default directories which do not exist yet
14
+ are only created upon writing a first file.
15
+
16
+ * Files managed by :class:`cdxcore.subdir.SubDir` all have the same extension.
17
+
18
+ * Files support fast versioning: the version of a file can be read without having to read the
19
+ entire file.
20
+
21
+ * :dec:`cdxcore.subdir.SubDir.cache` implements a convenient versioned caching framework.
22
+
23
+ Directories
24
+ ^^^^^^^^^^^
25
+
26
+ The core of the framework is the :class:`cdxcore.subdir.SubDir` class which represents a directory
27
+ with files of a given extension.
28
+
29
+ Simply write::
30
+
31
+ from cdxcore.subdir import SubDir
32
+ subdir = SubDir("my_directory") # relative to current working directory
33
+ subdir = SubDir("./my_directory") # relative to current working directory
34
+ subdir = SubDir("~/my_directory") # relative to home directory
35
+ subdir = SubDir("!/my_directory") # relative to default temp directory
36
+
37
+ Note that ``my_directoy`` will not be created if it does not exist yet. It will be created the first
38
+ time we write a file.
39
+
40
+ You can specify a parent for relative path names::
41
+
42
+ from cdxcore.subdir import SubDir
43
+ subdir = SubDir("my_directory", "~") # relative to home directory
44
+ subdir = SubDir("my_directory", "!") # relative to default temp directory
45
+ subdir = SubDir("my_directory", ".") # relative to current directory
46
+ subdir2 = SubDir("my_directory", subdir) # subdir2 is relative to `subdir`
47
+
48
+ Change the extension to "bin"::
49
+
50
+ from cdxcore.subdir import SubDir
51
+ subdir = SubDir("~/my_directory;*.bin")
52
+ subdir = SubDir("~/my_directory", ext="bin")
53
+ subdir = SubDir("my_directory", "~", ext="bin")
54
+
55
+ You can turn off extension management by setting the extension to ""::
56
+
57
+ from cdxcore.subdir import SubDir
58
+ subdir = SubDir("~/my_directory", ext="")
59
+
60
+ You can also use :meth:`cdxcore.subdir.SubDir.__call__` to generate sub directories.
61
+
62
+ from cdxcore.subdir import SubDir
63
+ parent = SubDir("~/parent")
64
+ subdir = parent("subdir")
65
+
66
+ Be aware that when the operator :meth:`cdxcore.subdir.SubDir.__call__`
67
+ is called with two keyword arguments, then it reads files.
68
+
69
+ You can obtain a list of all sub directories in a directory by using :meth:`cdxcore.subdir.SubDir.sub_dirs`.
70
+ The list of files with the corresponding extension is accessible via :meth:`cdxcore.subdir.SubDir.files`.
71
+
72
+ File Format
73
+ ^^^^^^^^^^^
74
+
75
+ :class:`cdxcore.subdir.SubDir` supports file i/o with a number of different file formats
76
+ via :class:`cdxcore.subdir.Format`.
77
+
78
+ * "PICKLE": standard pickling with default extension is "pck".
79
+
80
+ * "JSON_PICKLE": uses the :mod:`jsonpickle` package; default extension "jpck".
81
+ The advantage of this format over "PICKLE" is that it is somewhat human-readable.
82
+ However, ``jsonpickle`` uses compressed formats for complex objects such as :mod:`numpy`
83
+ arrays, hence readablility is somewhat limited. Using "JSON_PICKLE"
84
+ comes at cost of slower i/o speed.
85
+
86
+ * "JSON_PLAIN": calls :func:`cdxcore.util.plain` is used to generate human readable files
87
+ which cannot be loaded back from disk.
88
+ In this mode ``SubDir`` converts objects into plain Python objects before using :mod:`json`
89
+ to write them to disk.
90
+ That means that deserialized data does not have the correct object structure
91
+ to be able to restore files written in "JSON_PLAIN".
92
+ However, such files are much easier to read.
93
+
94
+ * "BLOSC" uses `blosc <https://github.com/blosc/python-blosc>`__
95
+ to read/write compressed binary data. The blosc compression algorithm is very fast,
96
+ hence using this mode will not usually lead to notably slower performanbce than using
97
+ "PICKLE" but will generate smaller files, depending on your data structure.
98
+
99
+ The default extension for "BLOSC" is "zbsc".
100
+
101
+ * "GZIP": uses :mod:`gzip` to
102
+ to read/write compressed binary data. The default extension is "pgz".
103
+
104
+ Summary of properties:
105
+
106
+
107
+ +--------------+------------------+----------------+-------+-------------+-----------+
108
+ | Format | Restores objects | Human readable | Speed | Compression | Extension |
109
+ +==============+==================+================+=======+=============+===========+
110
+ | PICKLE | yes | no | high | no | .pck |
111
+ +--------------+------------------+----------------+-------+-------------+-----------+
112
+ | JSON_PLAIN | no | yes | low | no | .json |
113
+ +--------------+------------------+----------------+-------+-------------+-----------+
114
+ | JSON_PICKLE | yes | limited | low | no | .jpck |
115
+ +--------------+------------------+----------------+-------+-------------+-----------+
116
+ | BLOSC | yes | no | high | yes | .zbsc |
117
+ +--------------+------------------+----------------+-------+-------------+-----------+
118
+ | GZIP | yes | no | high | yes | .pgz |
119
+ +--------------+------------------+----------------+-------+-------------+-----------+
120
+
121
+
122
+ You may specify the file format when instantiating :class:`cdxcore.subdir.SubDir`::
123
+
124
+ from cdxcore.subdir import SubDir
125
+ subdir = SubDir("~/my_directory", fmt=SubDir.PICKLE)
126
+ subdir = SubDir("~/my_directory", fmt=SubDir.JSON_PICKLE)
127
+ ...
128
+
129
+ If ``ext`` is not specified the extension will defaulted to
130
+ the respective default extension of the format requested.
131
+
132
+ Reading Files
133
+ ^^^^^^^^^^^^^
134
+
135
+ To read the data contained in a ``file`` from our subdirectory
136
+ with its reference extension use :meth:`cdxcore.subdir.SubDir.read`::
137
+
138
+ from cdxcore.subdir import SubDir
139
+ subdir = SubDir("!/test")
140
+
141
+ data = subdir.read("file") # returns the default `None` if file is not found
142
+ data = subdir.read("file", default=[]) # returns the default [] if file is not found
143
+
144
+ This function will return the "default"``" (which in turns defaults to ``None``)
145
+ if ``file.ext`` does not exist.
146
+ You can opt for :meth:`cdxcore.subdir.SubDir.read` to raise an error instead of returning a default
147
+ by using ``raise_on_error=True``::
148
+
149
+ data = subdir.read("file", raise_on_error=True) # raises 'KeyError' if not found
150
+
151
+ When calling ``read()`` you may specify an alternative extension::
152
+
153
+ data = subdir.read("file", ext="bin") # change extension to "bin"
154
+ data = subdir.read("file.bin", ext="") # no automatic extension
155
+
156
+ Specifying a different format for :meth:`cdxcore.subdir.SubDir.read` only changes
157
+ the extension automatically if you have not overwritten it before:
158
+
159
+ .. code-block:: python
160
+
161
+ subdir = SubDir("!/test") # default format PICKLE with extension pck
162
+ data = subdir.read("file", fmt=Subdir.JSON_PICKLE ) # uses "json" extension
163
+
164
+ subdir = SubDir("!/test", ext="bin") # user-specified extension
165
+ data = subdir.read("file", fmt=Subdir.JSON_PICKLE ) # keeps using "bin"
166
+
167
+ You can also use the :meth:`cdxcore.subdir.SubDir.__call__` to read files, in which case you must specify a default value
168
+ (if you don't, then the operator will return a sub directory)::
169
+
170
+ data = subdir("file", None) # returns None if file is not found
171
+
172
+ You can also use item notation to access files.
173
+ In this case, though, an error will be thrown if the file does not exist::
174
+
175
+ data = subdir['file'] # raises KeyError if file is not found
176
+
177
+ You can read a range of files in one function call::
178
+
179
+ data = subdir.read( ["file1", "file2"] ) # returns list
180
+
181
+ Finally, you can also iterate through all existing files using iterators::
182
+
183
+ # manual loading
184
+ for file in subdir:
185
+ data = subdir.read(file)
186
+ ...
187
+
188
+ # automatic loading, with "None" as a default
189
+ for file, data in subdir.items():
190
+ ...
191
+
192
+ To obtain a list of all files in our directory which have the correct extension, use :meth:`cdxcore.subdir.SubDir.files`.
193
+
194
+ Writing Files
195
+ ^^^^^^^^^^^^^
6
196
 
197
+ Writing files mirrors reading them::
7
198
 
8
- import os
9
- import os.path
10
- import uuid
11
- import threading
12
- import pickle
13
- import tempfile
14
- import shutil
15
- import datetime
16
- import inspect
199
+ from cdxcore.subdir import SubDir
200
+ subdir = SubDir("!/test")
201
+
202
+ subdir.write("file", data)
203
+ subdir['file'] = data
204
+
205
+ You may specifify different a extension::
206
+
207
+ subdir.write("file", data, ext="bin")
208
+
209
+ You can also specify a file :class:`cdxcore.subdir.Format`.
210
+ The extension will be changed automatically if you have not set it manually::
211
+
212
+ subdir = SubDir("!/test")
213
+ subdir.write("file", data, fmt=SubDir.JSON_PICKLE ) # will write to "file.json"
214
+
215
+ To write several files at once, write::
216
+
217
+ subdir.write(["file1", "file"], [data1, data2])
218
+
219
+ Note that when writing to a file, :meth:`cdxcore.subdir.SubDir.write`
220
+ will first write to a temporary file, and then rename this file into the target file name.
221
+ The temporary file name is generated by applying :func:`cdxcore.uniquehash.unique_hash48`
222
+ to the
223
+ target file name,
224
+ current time, process and thread ID, as well as the machines's UUID.
225
+ his is done to reduce collisions between processes/machines accessing the same files,
226
+ potentially accross a network.
227
+ It does not remove collision risk entirely, though.
228
+
229
+ Filenames
230
+ ^^^^^^^^^
231
+
232
+ :class:`cdxcore.subdir.SubDir` transparently handles directory access and extensions.
233
+ That means a user usually only uses ``file`` names which do not contain either.
234
+ To obtain the full qualified filename given a "file" use :meth:`cdxcore.subdir.SubDir.full_file_name`.
235
+
236
+ Reading and Writing Versioned Files
237
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
238
+
239
+ :class:`cdxcore.subdir.SubDir` supports versioned files.
240
+ If versions are used, then they *must* be used for both reading and writing.
241
+ :dec:`cdxcore.version.version` provides a standard decorator framework for definining
242
+ versions for classes and functions including the version dependencies.
243
+
244
+ If a ``version`` is provided to :func:`cdxcore.subdir.SubDir.write`
245
+ then ``SubDir`` will write the version in a block ahead of the main content of the file.
246
+ In case of the PICKLE format, this is a byte string. In case of JSON_PLAIN and JSON_PICKLE this is line of
247
+ text starting with ``#`` ahead of the file. (Note that this violates
248
+ the JSON file format.)
249
+
250
+ Writing short version block ahead of the main data allows :func:`cdxcore.subdir.SubDir.read`
251
+ reading this version information back quickly without needing to read the entire file.
252
+ ``read()`` does attempt so if its called with a ``version`` parameter.
253
+ In this case it will compare the read version with the provided version,
254
+ and only return the main content of the file if versions match.
255
+
256
+ Use :func:`cdxcore.subdir.SubDir.is_version` to check whether a given file has a specific version.
257
+ Like ``read()`` this function only reads the information required to obtain the information and will
258
+ be much faster than reading the whole file.
259
+
260
+ *Important:* note that if a file was written, it has to be read again with a test version.
261
+ You can specify ``version="*"`` for :func:`cdxcore.subdir.SubDir.read` to match any version.
262
+
263
+ **Examples:**
264
+
265
+ Writing a versioned file::
266
+
267
+ from cdxcore.subdir import SubDir
268
+ sub_dir = SubDir("!/test_version)
269
+ sub_dir.write("test", [1,2,3], version="0.0.1" )
270
+
271
+ To read ``[1,2,3]`` from "test" we need to use the correct version::
272
+
273
+ _ = sub_dir.read("test", version="0.0.1")
274
+
275
+ The following will not read "test" as the versions do not match::
276
+
277
+ _ = sub_dir.read("test", version="0.0.2")
278
+
279
+ By default :func:`cdxcore.subdir.SubDir.read`
280
+ will not fail if a version mismatch is encountered; rather it will
281
+ attempt to delete the file and then return the ``default`` value.
282
+
283
+ This can be turned off
284
+ with the keyword ``delete_wrong_version`` set to ``False``.
285
+
286
+ You can ignore the version used to write a file by using `*` as version::
287
+
288
+ _ = sub_dir.read("test", version="*")
289
+
290
+ Note that reading files which have been written with a ``version`` without
291
+ ``version`` keyword will fail because ``SubDir`` will only append additional version information
292
+ to the file if required.
293
+
294
+ Test existence of Files
295
+ ^^^^^^^^^^^^^^^^^^^^^^^
296
+
297
+ To test existence of 'file' in a directory, use one of::
298
+
299
+ subdir.exist('file')
300
+ 'file' in subdir
301
+
302
+ Deleting files
303
+ ^^^^^^^^^^^^^^
304
+
305
+ To delete a 'file', use any of the following::
306
+
307
+ subdir.delete("file")
308
+ del subdir['file']
309
+
310
+ All of these are *silent*, and will not throw errors if "file" does not exist.
311
+ In order to throw an error use::
312
+
313
+ subdir.delete('file', raise_on_error=True)
314
+
315
+ A few member functions assist in deleting a number of files:
316
+
317
+ * :func:`cdxcore.subdir.SubDir.delete_all_files`: delete all files in the directory with matching extension. Do not delete sub directories, or files with extensions different to our own.
318
+ * :func:`cdxcore.subdir.SubDir.delete_all_content`: delete all files with our extension, including in all sub-directories. If a sub-directory is left empty
319
+ upon ``delete_all_content`` delete it, too.
320
+ * :func:`cdxcore.subdir.SubDir.delete_everything`: deletes *everything*, not just files with matching extensions.
321
+
322
+ Caching
323
+ ^^^^^^^
324
+
325
+ A :class:`cdxcore.subdir.SubDir` object offers an advanced context for caching calls to :class:`collection.abc.Callable``
326
+ objects with :dec:`cdxcore.subdir.SubDir.cache`.
327
+
328
+ This involves keying the cache by the function name and its current parameters using :class:`cdxcore.uniquehash.UniqueHash`,
329
+ and monitoring the functions version using :dec:`cdxcore.version.version`. The caching behaviour itself can be controlled by
330
+ specifying the desired :class:`cdxcore.subdir.CacheMode`.
331
+
332
+ Import
333
+ ------
334
+ .. code-block:: python
335
+
336
+ import cdxcore.uniquehash as uniquehash
337
+
338
+ Documentation
339
+ -------------
340
+ """
341
+
342
+ import os as os
343
+ import uuid as uuid
344
+ import threading as threading
345
+ import pickle as pickle
346
+ import tempfile as tempfile
347
+ import shutil as shutil
348
+ import datetime as datetime
349
+ import inspect as inspect
350
+ import platform as platform
17
351
  from collections import OrderedDict
18
- from collections.abc import Collection, Mapping, Callable
352
+ from collections.abc import Collection, Mapping, Callable, Iterable
19
353
  from enum import Enum
20
- import json as json
21
- import platform as platform
22
354
  from functools import update_wrapper
23
- import warnings as warnings
24
355
 
25
- import numpy as np
26
- import jsonpickle as jsonpickle
27
- import jsonpickle.ext.numpy as jsonpickle_numpy
28
- import zlib as zlib
356
+ import json as json
29
357
  import gzip as gzip
30
358
  import blosc as blosc
31
-
32
- from .prettydict import pdct
359
+ import sys as sys
360
+ from .err import verify, error, warn, fmt as txtfmt
361
+ from .pretty import PrettyObject
33
362
  from .verbose import Context
34
- from .version import Version, version as version_decorator
35
- from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP, fmt as txtfmt, plain
36
- from .uniquehash import uniqueHash48, uniqueLabelExt, namedUniqueHashExt
37
-
38
- def error( text, *args, exception = RuntimeError, **kwargs ):
39
- raise exception( txtfmt(text, *args, **kwargs) )
40
- def verify( cond, text, *args, exception = RuntimeError, **kwargs ):
41
- if not cond:
42
- error( text, *args, **kwargs, exception=exception )
43
- def warn( text, *args, warning=warnings.RuntimeWarning, stack_level=1, **kwargs ):
44
- warnings.warn( txtfmt(text, *args, **kwargs), warning, stack_level=stack_level )
363
+ from .version import Version, version as version_decorator, VersionError
364
+ from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP, plain, is_filename
365
+ from .uniquehash import unique_hash48, UniqueLabel, NamedUniqueHash
366
+
45
367
 
46
368
  """
369
+ :meta private:
47
370
  compression
48
371
  """
49
- jsonpickle_numpy.register_handlers()
372
+
373
+ def _import_jsonpickle():
374
+ """ For some dodgy reason importing `jsonpickle` normally causes my tests to fail with a recursion error """
375
+ jsonpickle = sys.modules.get('jsonpickle', None)
376
+ if jsonpickle is None:
377
+ import jsonpickle as jsonpickle
378
+ import jsonpickle.ext.numpy as jsonpickle_numpy
379
+ jsonpickle_numpy.register_handlers()
380
+ return jsonpickle
381
+
50
382
  BLOSC_MAX_BLOCK = 2147483631
51
383
  BLOSC_MAX_USE = 1147400000 # ... blosc really cannot handle large files
52
-
53
- """
54
- Hashing
55
- """
56
- uniqueFileName48 = uniqueHash48
57
- uniqueNamedFileName48_16 = namedUniqueHashExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
58
- uniqueLabelledFileName48_16 = uniqueLabelExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
384
+ #
59
385
 
60
386
  def _remove_trailing( path ):
61
387
  if len(path) > 0:
@@ -63,13 +389,34 @@ def _remove_trailing( path ):
63
389
  return _remove_trailing(path[:-1])
64
390
  return path
65
391
 
392
+
393
+ # ========================================================================
394
+ # Basics
395
+ # ========================================================================
396
+
66
397
  class Format(Enum):
67
- """ File formats for SubDir """
68
- PICKLE = 0
69
- JSON_PICKLE = 1
70
- JSON_PLAIN = 2
71
- BLOSC = 3
72
- GZIP = 4
398
+ """
399
+ File formats for :class:`cdxcore.subdir.SubDir`.
400
+
401
+ +--------------+------------------+----------------+-------+-------------+-----------+
402
+ | Format | Restores objects | Human readable | Speed | Compression | Extension |
403
+ +==============+==================+================+=======+=============+===========+
404
+ | PICKLE | yes | no | high | no | .pck |
405
+ +--------------+------------------+----------------+-------+-------------+-----------+
406
+ | JSON_PLAIN | no | yes | low | no | .json |
407
+ +--------------+------------------+----------------+-------+-------------+-----------+
408
+ | JSON_PICKLE | yes | limited | low | no | .jpck |
409
+ +--------------+------------------+----------------+-------+-------------+-----------+
410
+ | BLOSC | yes | no | high | yes | .zbsc |
411
+ +--------------+------------------+----------------+-------+-------------+-----------+
412
+ | GZIP | yes | no | high | yes | .pgz |
413
+ +--------------+------------------+----------------+-------+-------------+-----------+
414
+ """
415
+ PICKLE = 0 #: Standard binary :mod:`pickle` format.
416
+ JSON_PICKLE = 1 #: :mod:`jsonpickle` format.
417
+ JSON_PLAIN = 2 #: ``json`` format.
418
+ BLOSC = 3 #: :mod:`blosc` binary compressed format.
419
+ GZIP = 4 #: :mod:`gzip` binary compressed format.
73
420
 
74
421
  PICKLE = Format.PICKLE
75
422
  JSON_PICKLE = Format.JSON_PICKLE
@@ -77,31 +424,79 @@ JSON_PLAIN = Format.JSON_PLAIN
77
424
  BLOSC = Format.BLOSC
78
425
  GZIP = Format.GZIP
79
426
 
80
- """
81
- Use the following for config calls:
82
- format = subdir.mkFormat( config("format", "pickle", subdir.FORMAT_NAMES, "File format") )
83
- """
84
- FORMAT_NAMES = [ s.lower() for s in Format.__members__ ]
85
- def mkFormat( name ):
86
- if not name in FORMAT_NAMES:
87
- raise LookupError(f"Unknown format name '{name}'. Must be one of: {fmt_list(name)}")
88
- return Format[name.upper()]
427
+ class VersionPresentError(RuntimeError):
428
+ """
429
+ Exception raised in case a file was read which had a version, but no test version
430
+ was provided.
431
+ """
432
+ pass
433
+
434
+ # ========================================================================
435
+ # Caching utilities
436
+ # ========================================================================
89
437
 
90
438
  class CacheMode(object):
91
439
  """
92
- CacheMode
93
- A class which encodes standard behaviour of a caching strategy:
440
+ A class which encodes standard behaviour of a caching strategy.
441
+
442
+ **Summary mechanics:**
443
+
444
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
445
+ | Action | on | gen | off | update | clear | readonly |
446
+ +=========================================+=======+=======+=======+=========+========+==========+
447
+ | load cache from disk if exists | x | x | | | | x |
448
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
449
+ | write updates to disk | x | x | | x | | |
450
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
451
+ | delete existing object | | | | | x | |
452
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
453
+ | delete existing object if incompatible | x | | | x | x | |
454
+ +-----------------------------------------+-------+-------+-------+---------+--------+----------+
455
+
456
+
457
+ **Standard Caching Semantics**
458
+
459
+ Assuming we wish to cache results from calling a function ``f`` in a file named ``filename``
460
+ in a directory ``directory``, then this is the ``CacheMode`` waterfall:
94
461
 
95
- on gen off update clear readonly
96
- load cache from disk if exists x x - - - x
97
- write updates to disk x x - x - -
98
- delete existing object - - - - x -
99
- delete existing object if incompatible x - - x x -
462
+ .. code-block:: python
100
463
 
101
- See cdxbasics.subdir for functions to manage files.
464
+ def cache_f( filename : str, directory : SubDir, version : str, cache_mode : CacheMode ):
465
+ if cache_mode.delete:
466
+ directory.delete(filename)
467
+ if cache_mode.read:
468
+ r = directory.read(filename,
469
+ default=None,
470
+ version=version,
471
+ raise_on_error=False,
472
+ delete_wrong_version=cache_mode.del_incomp
473
+ )
474
+ if not r is None:
475
+ return r
476
+
477
+ r = f(...) # compute result
478
+
479
+ if cache_mode.write:
480
+ directory.write(filename,
481
+ r,
482
+ version=version,
483
+ raise_on_error=False
484
+ )
485
+
486
+ return r
487
+
488
+ See :func:`cdxcore.subdir.SubDir.cache` for a comprehensive
489
+ implementation.
490
+
491
+ Parameters
492
+ ----------
493
+ mode : str, optional
494
+ Which mode to use: ``"on"``, ``"gen"``, ``"off"``, ``"update"``, ``"clear"`` or ``"readonly"``.
495
+
496
+ The default is ``None`` in which case ``"on"`` is used.
102
497
  """
103
498
 
104
- ON = "on"
499
+ ON = "on"
105
500
  GEN = "gen"
106
501
  OFF = "off"
107
502
  UPDATE = "update"
@@ -109,22 +504,31 @@ class CacheMode(object):
109
504
  READONLY = "readonly"
110
505
 
111
506
  MODES = [ ON, GEN, OFF, UPDATE, CLEAR, READONLY ]
507
+ """
508
+ List of available modes in text form.
509
+ This list can be used as ``cast`` parameter when calling :func:`cdxcore.config.Config.__call__`::
510
+
511
+ from cdxcore.config import Config
512
+ from cdxcore.subdir import CacheMode
513
+
514
+ def get_cache_mode( config : Config ) -> CacheMode:
515
+ return CacheMode( config("cache_mode", "on", CacheMode.MODES, CacheMode.HELP) )
516
+ """
517
+
112
518
  HELP = "'on' for standard caching; 'gen' for caching but keep existing incompatible files; 'off' to turn off; 'update' to overwrite any existing cache; 'clear' to clear existing caches; 'readonly' to read existing caches but not write new ones"
113
-
519
+ """
520
+ Standard ``config`` help text, to be used with :func:`cdxcore.config.Config.__call__` as follows::
521
+
522
+ from cdxcore.config import Config
523
+ from cdxcore.subdir import CacheMode
524
+
525
+ def get_cache_mode( config : Config ) -> CacheMode:
526
+ return CacheMode( config("cache_mode", "on", CacheMode.MODES, CacheMode.HELP) )
527
+ """
528
+
114
529
  def __init__(self, mode : str = None ):
115
530
  """
116
- Encodes standard behaviour of a caching strategy:
117
-
118
- on gen off update clear readonly
119
- load upon start from disk if exists x x - - - x
120
- write updates to disk x x - x - -
121
- delete existing object upon start - - - - x -
122
- delete existing object if incompatible x - - x x -
123
-
124
- Parameters
125
- ----------
126
- mode : str
127
- Which mode to use.
531
+ :meta private:
128
532
  """
129
533
  if isinstance( mode, CacheMode ):
130
534
  return# id copy constuctor
@@ -145,22 +549,22 @@ class CacheMode(object):
145
549
 
146
550
  @property
147
551
  def read(self) -> bool:
148
- """ Whether to load any existing data when starting """
552
+ """ Whether to load any existing cached data. """
149
553
  return self._read
150
554
 
151
555
  @property
152
556
  def write(self) -> bool:
153
- """ Whether to write cache data to disk """
557
+ """ Whether to cache newly computed data to disk. """
154
558
  return self._write
155
559
 
156
560
  @property
157
561
  def delete(self) -> bool:
158
- """ Whether to delete existing data """
562
+ """ Whether to delete existing data. """
159
563
  return self._delete
160
564
 
161
565
  @property
162
566
  def del_incomp(self) -> bool:
163
- """ Whether to delete existing data if it is not compatible """
567
+ """ Whether to delete existing data if it is not compatible or has the wrong version. """
164
568
  return self._del_in
165
569
 
166
570
  def __str__(self) -> str:# NOQA
@@ -175,311 +579,327 @@ class CacheMode(object):
175
579
 
176
580
  @property
177
581
  def is_off(self) -> bool:
178
- """ Whether this cache mode is OFF """
582
+ """ Whether this cache mode is OFF. """
179
583
  return self.mode == self.OFF
180
584
 
181
585
  @property
182
586
  def is_on(self) -> bool:
183
- """ Whether this cache mode is ON """
587
+ """ Whether this cache mode is ON. """
184
588
  return self.mode == self.ON
185
589
 
186
590
  @property
187
591
  def is_gen(self) -> bool:
188
- """ Whether this cache mode is GEN """
592
+ """ Whether this cache mode is GEN. """
189
593
  return self.mode == self.GEN
190
594
 
191
595
  @property
192
596
  def is_update(self) -> bool:
193
- """ Whether this cache mode is UPDATE """
597
+ """ Whether this cache mode is UPDATE. """
194
598
  return self.mode == self.UPDATE
195
599
 
196
600
  @property
197
601
  def is_clear(self) -> bool:
198
- """ Whether this cache mode is CLEAR """
602
+ """ Whether this cache mode is CLEAR. """
199
603
  return self.mode == self.CLEAR
200
604
 
201
605
  @property
202
606
  def is_readonly(self) -> bool:
203
- """ Whether this cache mode is READONLY """
607
+ """ Whether this cache mode is READONLY. """
204
608
  return self.mode == self.READONLY
205
609
 
206
610
  class CacheController( object ):
207
- """
208
- Central control for versioning.
209
- Enabes to to turn on/off caching, debugging and tracks all versions
611
+ r"""
612
+ Central control parameters for caching.
613
+
614
+ When a parameter object of this type
615
+ is assigned to a :class:`cdxcore.subdir.SubDir`,
616
+ then it is passed on when sub-directories are
617
+ created. This way all ``SubDir`` have the same
618
+ caching behaviour.
619
+
620
+ See :class:`cdxcore.subdir.CacheController` for
621
+ a list of control parameters.
622
+
623
+ Parameters
624
+ ----------
625
+ exclude_arg_types : list[type], optional
626
+ List of types to exclude from producing unique ids from function arguments.
627
+
628
+ Defaults to ``[Context]``.
629
+
630
+ cache_mode : CacheMode, optional
631
+ Top level cache control.
632
+ Set to "OFF" to turn off all caching.
633
+ Default is "ON".
634
+
635
+ max_filename_length : int, optional
636
+ Maximum filename length. If unique id's exceed the file name a hash of length
637
+ ``hash_length`` will be intergated into the file name.
638
+ See :class:`cdxcore.uniquehash.NamedUniqueHash`.
639
+ Default is ``48``.
640
+
641
+ hash_length : int, optional
642
+ Length of the hash used to make sure each filename is unique
643
+ See :class:`cdxcore.uniquehash.NamedUniqueHash`.
644
+ Default is ``8``.
645
+
646
+ debug_verbose : :class:`cdxcore.verbose.Context`, optional
647
+ If not ``None`` print caching process messages to this object.
648
+
649
+ Default is ``None``.
650
+
651
+ keep_last_arguments : bool, optional
652
+ Keep a dictionary of all parameters as string representations after each function call.
653
+ If the function ``F`` was decorated using :meth:``cdxcore.subdir.SubDir.cache``,
654
+ you can access this information via ``F.cache_info.last_arguments``.
655
+
656
+ Note that strings are limited to 100 characters per argument to avoid memory
657
+ overload when large objects are passed.
658
+
659
+ Default is ``False``.
210
660
  """
211
661
 
212
662
  def __init__(self, *,
213
663
  exclude_arg_types : list[type] = [Context],
214
664
  cache_mode : CacheMode = CacheMode.ON,
215
665
  max_filename_length: int = 48,
216
- hash_length : int = 16,
666
+ hash_length : int = 8,
217
667
  debug_verbose : Context = None,
218
668
  keep_last_arguments: bool = False
219
669
  ):
220
670
  """
221
- Background parameters to control caching
222
-
223
- Parameters
224
- ----------
225
- exclude_arg_types :
226
- List of types to exclude from producing unique ids from function arguments. Defaults to [SubDir, Context]
227
- cache_mode :
228
- Top level cache control. Set to "OFF" to turn off all caching. Default is "ON"
229
- max_filename_length :
230
- Maximum filename length. If unique id's exceed the file name a hash of length 'hash_length' will be intergated into the file name.
231
- See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
232
- hash_length :
233
- Length of the hash used to make sure each filename is unique
234
- See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
235
- debug_verbose :
236
- If non-None print caching process messages to this object.
237
- keep_last_arguments :
238
- keep a dictionary of all parameters as string representations after each function call.
239
- If the function F was decorated using SubDir.cache(), you can access this information via
240
- F.cache_info.last_arguments
241
- Note that strings are limited to 100 characters per argument to avoid memory
242
- overload when large objects are passed.
243
- """
244
- max_filename_length = int(max_filename_length)
245
- hash_length = int(hash_length)
671
+ :meta private:
672
+ """
673
+ max_filename_length = int(max_filename_length)
674
+ hash_length = int(hash_length)
246
675
  assert max_filename_length>0, ("'max_filename_length' must be positive")
247
676
  assert hash_length>0 and hash_length<=max_filename_length, ("'hash_length' must be positive and at most 'max_filename_length'")
248
677
  assert max_filename_length>=hash_length, ("'hash_length' must not exceed 'max_filename_length")
249
678
  self.cache_mode = CacheMode(cache_mode if not cache_mode is None else CacheMode.ON)
250
- self.debug_verbose = debug_verbose
679
+ self.debug_verbose = Context(debug_verbose) if isinstance(debug_verbose, (int,str)) else debug_verbose
251
680
  self.exclude_arg_types = set(exclude_arg_types) if not exclude_arg_types is None else None
252
- self.versioned = pdct() # list
253
- self.uniqueNamedFileName = namedUniqueHashExt(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
254
- self.uniqueLabelledFileName = uniqueLabelExt(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
681
+ self.versioned = PrettyObject() # list
682
+ self.labelledFileName = NamedUniqueHash(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
683
+ self.uniqueFileName = UniqueLabel(max_length=max_filename_length,id_length=hash_length,filename_by=None)
255
684
  self.keep_last_arguments = keep_last_arguments
256
685
 
257
686
  default_cacheController = CacheController()
687
+ #
258
688
 
689
+ # ========================================================================
690
+ # SubDir
691
+ # ========================================================================
259
692
 
260
- class CacheTracker(object):
261
- """
262
- Utility class to track caching and be able to delete all dependent objects
693
+ class SubDir(object):
694
+ r"""
695
+ ``SubDir`` implements a transparent i/o
696
+ interface for storing data in files.
263
697
 
264
- """
265
- def __init__(self):
266
- """ track cache files """
267
- self._files = []
268
- def __iadd__(self, new_file):
269
- """ Add a new file to the tracker """
270
- self._files.append( new_file )
271
- def delete_cache_files(self):
272
- """ Delete all tracked files """
273
- for file in self._files:
274
- if os.path.exists(file):
275
- os.remove(file)
276
- self._files = []
277
- def __str__(self) -> str:#NOQA
278
- return f"Tracked: {self._files}"
279
- def __repr__(self) -> str:#NOQA
280
- return f"Tracked: {self._files}"
281
-
282
- class InitCacheInfo(object):
283
- pass
698
+ **Directories**
284
699
 
285
- class CacheInfo(object):
286
- pass
700
+ Instantiate a ``SubDir`` with a directory name. There are some
701
+ pre-defined relative system paths the name can refer to::
287
702
 
288
- # SubDir
289
- # ======
703
+ from cdxcore.subdir import SubDir
704
+ parent = SubDir("!/subdir") # relative to system temp directory
705
+ parent = SubDir("~/subdir") # relative to user home directory
706
+ parent = SubDir("./subdir") # relative to current working directory (explicit)
707
+ parent = SubDir("subdir") # relative to current working directory (implicit)
708
+ parent = SubDir("/tmp/subdir") # absolute path (linux)
709
+ parent = SubDir("C:/temp/subdir") # absolute path (windows)
710
+ parent = SubDir("") # current working directory
711
+
712
+ Sub-directories can be generated in a number of ways::
290
713
 
291
- class SubDir(object):
292
- """
293
- SubDir implements a transparent interface for storing data in files, with a common extension.
294
- The generic pattern is:
714
+ subDir = parent('subdir') # using __call__
715
+ subDir = SubDir('subdir', parent) # explicit constructor
716
+ subDir = SubDir('subdir', parent="!/") # explicit constructor with parent being a string
295
717
 
296
- 1) create a root 'parentDir':
297
- Absolute: parentDir = SubDir("C:/temp/root")
298
- In system temp directory: parentDir = SubDir("!/root")
299
- In user directory: parentDir = SubDir("~/root")
300
- Relative to current directory: parentDir = SubDir("./root")
718
+ Files managed by ``SubDir`` will usually have the same extension.
719
+ This extension can be specified with ``ext``, or as part of the directory string::
720
+
721
+ subDir = SubDir("~/subdir", ext="bin") # set extension to 'bin'
722
+ subDir = SubDir("~/subdir;*.bin") # set extension to 'bin'
723
+
724
+ Leaving the extension as default ``None`` allows ``SubDir`` to automatically use
725
+ the extension associated with any specified format.
301
726
 
302
- 2) Use SubDirs to transparently create hierachies of stored data:
303
- assume f() will want to store some data:
727
+ **Copy Constructor**
304
728
 
305
- def f(parentDir, ...):
729
+ The constructor is shallow.
306
730
 
307
- subDir = parentDir('subdir') <-- note that the call () operator is overloaded: if a second argument is provided, the directory will try to read the respective file.
308
- or
309
- subDir = SubDir('subdir', parentDir)
310
- :
311
- :
312
- Write data:
731
+ **File I/O**
313
732
 
314
- subDir['item1'] = item1 <-- dictionary style
315
- subDir.item2 = item2 <-- member style
316
- subDir.write('item3',item3) <-- explicit
733
+ Write data with :meth:`cdxcore.subdir.SubDir.write`::
317
734
 
318
- Note that write() can write to multiple files at the same time.
735
+ subDir.write('item3',item3) # explicit
736
+ subDir['item1'] = item1 # dictionary style
319
737
 
320
- 3) Reading is similar
738
+ Note that :meth:`cdxcore.subdir.SubDir.write` can write to multiple files at the same time.
321
739
 
322
- def readF(parentDir,...):
740
+ Read data with :meth:`cdxcore.subdir.SubDir.read`::
323
741
 
324
- subDir = parentDir('subdir')
742
+ item = subDir('item', 'i1') # returns 'i1' if not found.
743
+ item = subdir.read('item') # returns None if not found
744
+ item = subdir.read('item','i2') # returns 'i2' if not found
745
+ item = subDir['item'] # raises a KeyError if not found
325
746
 
326
- item = subDir('item', 'i1') <-- returns 'i1' if not found.
327
- item = subdir.read('item') <-- returns None if not found
328
- item = subdir.read('item','i2') <-- returns 'i2' if not found
329
- item = subDir['item'] <-- throws a KeyError if not found
330
- item = subDir.item <-- throws an AttributeError if not found
747
+ Treat files in a directory like dictionaries::
331
748
 
332
- 4) Treating data like dictionaries
749
+ for file in subDir:
750
+ data = subDir[file]
751
+ f(item, data)
333
752
 
334
- def scanF(parentDir,...)
753
+ for file, data in subDir.items():
754
+ f(item, data)
335
755
 
336
- subDir = parentDir('f')
756
+ Delete items::
337
757
 
338
- for item in subDir:
339
- data = subDir[item]
758
+ del subDir['item'] # silently fails if 'item' does not exist
759
+ subDir.delete('item') # silently fails if 'item' does not exist
760
+ subDir.delete('item', True) # raises a KeyError if 'item' does not exit
340
761
 
341
- Delete items:
762
+ Cleaning up::
342
763
 
343
- del subDir['item'] <-- silently fails if 'item' does not exist
344
- del subDir.item <-- silently fails if 'item' does not exist
345
- subDir.delete('item') <-- silently fails if 'item' does not exist
346
- subDir.delete('item', True) <-- throw a KeyError if 'item' does not exit
764
+ parent.delete_all_content() # silently deletes all files with matching extensions, and sub directories.
347
765
 
348
- 5) Cleaning up
766
+ **File Format**
349
767
 
350
- parentDir.deleteAllContent() <-- silently deletes all files and sub directories.
768
+ ``SubDir`` supports a number of file formats via :class:`cdxcore.subdir.Format`.
769
+ Those can be controlled with the ``fmt`` keyword in various functions not least
770
+ :class:`cdxcore.subdir.SubDir`::
351
771
 
352
- 6) As of version 0.2.59 subdir supports json file formats. Those can be controlled with the 'fmt' keyword in various functions.
353
- The most straightfoward way is to specify the format of the directory itself:
772
+ subdir = SubDir("!/.test", fmt=SubDir.JSON_PICKLE)
354
773
 
355
- subdir = SubDir("!/.test", fmt=SubDir.JSON_PICKLE)
774
+ See :class:`cdxcore.subdir.Format` for supported formats.
775
+
776
+ Parameters
777
+ ----------
778
+ name : str:
779
+ Name of the directory.
780
+
781
+ The name may start with any of the following special characters:
782
+
783
+ * ``'.'`` for current directory
784
+ * ``'~'`` for home directory
785
+ * ``'!'`` for system default temp directory
786
+
787
+ The directory name may also contain a formatting string for defining ``ext`` on the fly:
788
+ for example use ``"!/test;*.bin"`` to specify a directory ``"test"`` in the user's
789
+ temp directory with extension ``"bin"``.
790
+
791
+ The directory name can be set to ``None`` in which case it is always empty
792
+ and attempts to write to it fail with :class:`EOFError`.
793
+
794
+ parent : str | SubDir, optional
795
+ Parent directory.
796
+
797
+ If ``parent`` is a :class:`cdxcore.subdir.SubDir` then its parameters are used
798
+ as default values here.
356
799
 
357
- The following formats are supported:
800
+ Default is ``None``.
801
+
802
+ ext : str, optional
803
+ Extension for files managed by this ``SubDir``. All files will share the same extension.
358
804
 
359
- SubDir.PICKLE:
360
- Use pickle
361
- SubDir.JSON_PLAIN:
362
- Uses cdxbasics.util.plain() to convert data into plain Python objects and writes
363
- this to disk as text. Loading back such files will result in plain Python objects,
364
- but *not* the original objects
365
- SubDir.JSON_PICKLE:
366
- Uses the jsonpickle package to load/write data in somewhat readable text formats.
367
- Data can be loaded back from such a file, but files may not be readable (e.g. numpy arrays
368
- are written in compressed form).
369
- SubDir.BLOSC:
370
- Uses https://www.blosc.org/python-blosc/ to compress data on-the-fly.
371
- BLOSC is much faster than GZIP or ZLIB but is limited to 2GB data, sadly.
372
- SubDir.ZLIB:
373
- Uses https://docs.python.org/3/library/zlib.html to compress data on-the-fly
374
- using, essentially, GZIP.
805
+ If set to ``""`` no extension is assigned to this directory. That means, for example, that
806
+ :meth:`cdxcore.subdir.SubDir.files` returns all files contained in the directory, not
807
+ just files with a specific extension.
808
+
809
+ If ``None``, use an extension depending on ``fmt``:
810
+
811
+ * 'pck' for the default PICKLE format.
812
+ * 'json' for JSON_PLAIN.
813
+ * 'jpck' for JSON_PICKLE.
814
+ * 'zbsc' for BLOSC.
815
+ * 'pgz' for GZIP.
816
+
817
+ Default is ``None``.
818
+
819
+ fmt : :class:`cdxcore.subdir.Format`, optional
375
820
 
376
- Summary of properties:
821
+ One of the :class:`cdxcore.subdir.Format` codes.
822
+ If ``ext`` is left to ``None`` then setting the a format will also set the corrsponding ``ext``.
823
+
824
+ Default is ``Format.PICKLE``.
377
825
 
378
- | Restores objects | Human readable | Speed | Compression
379
- PICKLE | yes | no | high | no
380
- JSON_PLAIN | no | yes | low | no
381
- JSON_PICKLE | yes | limited | low | no
382
- BLOSC | yes | no | high | yes
383
- GZIP | yes | no | high | yes
826
+ create_directory : bool | None, optional
827
+
828
+ Whether to create the directory upon creation of the ``SubDir`` object; otherwise it will be created upon first
829
+ :meth:`cdxcore.subdir.SubDir.write`.
830
+
831
+ Set to ``None`` to use the setting of the parent directory, or ``False`` if no parent
832
+ is specified.
833
+
834
+ Default is ``False``.
384
835
 
385
- Several other operations are supported; see help()
836
+ delete_everything : bool, optional
837
+
838
+ Delete all contents in the newly defined sub directory upon creation.
386
839
 
387
- Hans Buehler May 2020
840
+ Default is ``False``.
841
+
842
+ cache_controller : :class:`cdxcore.subdir.CacheController`, optional
843
+
844
+ An object which fine-tunes the behaviour of :meth:`cdxcore.subdir.SubDir.cache`.
845
+ See that function's documentation for further details. Default is ``None``.
388
846
  """
389
847
 
390
848
  class __RETURN_SUB_DIRECTORY(object):
391
849
  pass
850
+ """ :meta private: """
392
851
 
393
- Format = Format
394
- PICKLE = Format.PICKLE
852
+ Format = Format # :meta private
853
+ """ :meta private: """
854
+
855
+ PICKLE = Format.PICKLE
856
+ """ :meta private: """
857
+
395
858
  JSON_PICKLE = Format.JSON_PICKLE
859
+ """ :meta private: """
860
+
396
861
  JSON_PLAIN = Format.JSON_PLAIN
862
+ """ :meta private: """
863
+
397
864
  BLOSC = Format.BLOSC
865
+ """ :meta private: """
866
+
398
867
  GZIP = Format.GZIP
399
-
400
- DEFAULT_RAISE_ON_ERROR = False
868
+ """ :meta private: """
869
+
401
870
  RETURN_SUB_DIRECTORY = __RETURN_SUB_DIRECTORY
871
+ """ :meta private: """
872
+
402
873
  DEFAULT_FORMAT = Format.PICKLE
403
- DEFAULT_CREATE_DIRECTORY = False # legacy behaviour so that self.path is a valid path
874
+ """ Default :class:`cdxcore.subdir.Format`: ``Format.PICKLE`` """
875
+
404
876
  EXT_FMT_AUTO = "*"
877
+ """ :meta private: """
405
878
 
406
879
  MAX_VERSION_BINARY_LEN = 128
407
-
880
+ """ :meta private: """
881
+
408
882
  VER_NORMAL = 0
883
+ """ :meta private: """
409
884
  VER_CHECK = 1
885
+ """ :meta private: """
410
886
  VER_RETURN = 2
887
+ """ :meta private: """
888
+
411
889
 
412
890
  def __init__(self, name : str,
413
- parent = None, *,
891
+ parent : str|type = None, *,
414
892
  ext : str = None,
415
893
  fmt : Format = None,
416
- eraseEverything : bool = False,
417
- createDirectory : bool = None,
418
- cacheController : CacheController = None
894
+ create_directory : bool = None,
895
+ delete_everything : bool = False,
896
+ cache_controller : CacheController = None
419
897
  ):
420
898
  """
421
- Instantiates a sub directory which contains pickle files with a common extension.
422
- By default the directory is created.
423
-
424
- Absolute directories
425
- sd = SubDir("!/subdir") - relative to system temp directory
426
- sd = SubDir("~/subdir") - relative to user home directory
427
- sd = SubDir("./subdir") - relative to current working directory (explicit)
428
- sd = SubDir("subdir") - relative to current working directory (implicit)
429
- sd = SubDir("/tmp/subdir") - absolute path (linux)
430
- sd = SubDir("C:/temp/subdir") - absolute path (windows)
431
- Short-cut
432
- sd = SubDir("") - current working directory
433
-
434
- It is often desired that the user specifies a sub-directory name under some common parent directory.
435
- You can create sub directories if you provide a 'parent' directory:
436
- sd2 = SubDir("subdir2", parent=sd) - relative to other sub directory
437
- sd2 = sd("subdir2") - using call operator
438
- Works with strings, too:
439
- sd2 = SubDir("subdir2", parent="~/my_config") - relative to ~/my_config
440
-
441
- All files managed by SubDir will have the same extension.
442
- The extension can be specified with 'ext', or as part of the directory string:
443
- sd = SubDir("~/subdir;*.bin") - set extension to 'bin'
444
-
445
- COPY CONSTRUCTION
446
- This function also allows copy construction and constrution from a repr() string.
447
-
448
- HANDLING KEYS
449
- SubDirs allows reading data using the item and attribute notation, i.e. we may use
450
- sd = SubDir("~/subdir")
451
- x = sd.x
452
- y = sd['y']
453
- If the respective keys are not found, exceptions are thrown.
454
-
455
- NONE OBJECTS
456
- It is possible to set the directory name to 'None'. In this case the directory will behave as if:
457
- No files exist
458
- Writing fails with a EOFError.
899
+ Instantiates a sub directory which contains files with a common extension.
459
900
 
460
- Parameters
461
- ----------
462
- name - Name of the directory.
463
- '.' for current directory
464
- '~' for home directory
465
- '!' for system default temp directory
466
- May contain a formatting string for defining 'ext' on the fly:
467
- Use "!/test;*.bin" to specify 'test' in the system temp directory as root directory with extension 'bin'
468
- Can be set to None, see above.
469
- parent - Parent directory. If provided, will also set defaults for 'ext' and 'raiseOnError'
470
- ext - standard file extenson for data files. All files will share the same extension.
471
- If None, use the parent extension, or if that is not specified use an extension depending on 'fmt':
472
- 'pck' for the default PICKLE format
473
- 'json' for JSON_PLAIN
474
- 'jpck' for JSON_PICKLE
475
- Set to "" to turn off managing extensions.
476
- fmt - format, current pickle or json
477
- eraseEverything - delete all contents in the newly defined subdir
478
- createDirectory - whether to create the directory.
479
- Otherwise it will be created upon first write().
480
- Set to None to use the setting of the parent directory
481
- """
482
- createDirectory = bool(createDirectory) if not createDirectory is None else None
901
+ """
902
+ create_directory = bool(create_directory) if not create_directory is None else None
483
903
 
484
904
  # copy constructor support
485
905
  if isinstance(name, SubDir):
@@ -487,9 +907,9 @@ class SubDir(object):
487
907
  self._path = name._path
488
908
  self._ext = name._ext if ext is None else ext
489
909
  self._fmt = name._fmt if fmt is None else fmt
490
- self._crt = name._crt if createDirectory is None else createDirectory
491
- self._cctrl = name._cctrl if cacheController is None else cacheController
492
- if eraseEverything: raise ValueError( "Cannot use 'eraseEverything' when cloning a directory")
910
+ self._crt = name._crt if create_directory is None else create_directory
911
+ self._cctrl = name._cctrl if cache_controller is None else cache_controller
912
+ if delete_everything: raise ValueError( "Cannot use 'delete_everything' when cloning a directory")
493
913
  return
494
914
 
495
915
  # reconstruction from a dictionary
@@ -498,14 +918,14 @@ class SubDir(object):
498
918
  self._path = name['_path']
499
919
  self._ext = name['_ext'] if ext is None else ext
500
920
  self._fmt = name['_fmt'] if fmt is None else fmt
501
- self._crt = name['_crt'] if createDirectory is None else createDirectory
502
- self._cctrl = name['_cctrl'] if cacheController is None else cacheController
503
- if eraseEverything: raise ValueError( "Cannot use 'eraseEverything' when cloning a directory")
921
+ self._crt = name['_crt'] if create_directory is None else create_directory
922
+ self._cctrl = name['_cctrl'] if cache_controller is None else cache_controller
923
+ if delete_everything: raise ValueError( "Cannot use 'delete_everything' when cloning a directory")
504
924
  return
505
925
 
506
926
  # parent
507
927
  if isinstance(parent, str):
508
- parent = SubDir( parent, ext=ext, fmt=fmt, createDirectory=createDirectory, cacheController=cacheController )
928
+ parent = SubDir( parent, ext=ext, fmt=fmt, create_directory=create_directory, cache_controller=cache_controller )
509
929
  if not parent is None and not isinstance(parent, SubDir):
510
930
  raise ValueError( "'parent' must be SubDir, str, or None. Found object of type '{type(parent)}'")
511
931
 
@@ -543,15 +963,15 @@ class SubDir(object):
543
963
  else:
544
964
  self._ext = SubDir._extract_ext(ext)
545
965
 
546
- # createDirectory
547
- if createDirectory is None:
548
- self._crt = self.DEFAULT_CREATE_DIRECTORY if parent is None else parent._crt
966
+ # create_directory
967
+ if create_directory is None:
968
+ self._crt = False if parent is None else parent._crt
549
969
  else:
550
- self._crt = bool(createDirectory)
970
+ self._crt = bool(create_directory)
551
971
 
552
972
  # cache controller
553
- assert type(cacheController).__name__ == CacheController.__name__, ("'cacheController' should be of type 'CacheController'", type(cacheController))
554
- self._cctrl = cacheController
973
+ assert cache_controller is None or type(cache_controller).__name__ == CacheController.__name__, ("'cache_controller' should be of type 'CacheController'", type(cache_controller))
974
+ self._cctrl = cache_controller
555
975
 
556
976
  # name
557
977
  if name is None:
@@ -566,12 +986,12 @@ class SubDir(object):
566
986
  if len(name) > 1 and name[1] != '/':
567
987
  raise ValueError( txtfmt("If 'name' starts with '%s', then the second character must be '/' (or '\\' on windows). Found 'name' set to '%s'", name[:1], _name ))
568
988
  if name[0] == '!':
569
- name = SubDir.tempDir()[:-1] + name[1:]
989
+ name = SubDir.temp_dir()[:-1] + name[1:]
570
990
  elif name[0] == ".":
571
- name = SubDir.workingDir()[:-1] + name[1:]
991
+ name = SubDir.working_dir()[:-1] + name[1:]
572
992
  else:
573
993
  assert name[0] == "~", ("Internal error", name[0] )
574
- name = SubDir.userDir()[:-1] + name[1:]
994
+ name = SubDir.user_dir()[:-1] + name[1:]
575
995
  elif name == "..":
576
996
  error("Cannot use name '..'")
577
997
  elif not parent is None:
@@ -587,33 +1007,37 @@ class SubDir(object):
587
1007
  self._path = os.path.abspath(name) + '/'
588
1008
  self._path = self._path.replace('\\','/')
589
1009
 
590
- if eraseEverything:
591
- self.eraseEverything(keepDirectory=self._crt)
1010
+ if delete_everything:
1011
+ self.delete_everything(keep_directory=self._crt)
592
1012
  if self._crt:
593
- self.createDirectory()
1013
+ self.create_directory()
594
1014
 
595
1015
  @staticmethod
596
- def expandStandardRoot( name ):
1016
+ def expand_std_root( name ):
597
1017
  """
598
- Expands 'name' by a standardized root directory if provided:
599
- If 'name' starts with -> return
600
- ! -> tempDir()
601
- . -> workingDir()
602
- ~ -> userDir()
1018
+ Expands ``name`` by a standardized root directory if provided:
1019
+
1020
+ The first character of ``name`` can be either of:
1021
+
1022
+ * ``"!"`` returns :meth:`cdxcore.subdir.SubDir.temp_dir()`.
1023
+ * ``"."`` returns :meth:`cdxcore.subdir.SubDir.working_dir()`.
1024
+ * ``"~"`` returns :meth:`cdxcore.subdir.SubDir.user_dir()`.
1025
+
1026
+ If neither of these matches the first character, ``name``
1027
+ is returned as is.
603
1028
  """
604
1029
  if len(name) < 2 or name[0] not in ['.','!','~'] or name[1] not in ["\\","/"]:
605
1030
  return name
606
1031
  if name[0] == '!':
607
- return SubDir.tempDir() + name[2:]
1032
+ return SubDir.temp_dir() + name[2:]
608
1033
  elif name[0] == ".":
609
- return SubDir.workingDir() + name[2:]
1034
+ return SubDir.working_dir() + name[2:]
610
1035
  else:
611
- return SubDir.userDir() + name[2:]
1036
+ return SubDir.user_dir() + name[2:]
612
1037
 
613
- def createDirectory( self ):
1038
+ def create_directory( self ):
614
1039
  """
615
- Creates the directory if it doesn't exist yet.
616
- Does not do anything if is_none.
1040
+ Creates the current directory if it doesn't exist yet.
617
1041
  """
618
1042
  # create directory/clean up
619
1043
  if self._path is None:
@@ -628,8 +1052,8 @@ class SubDir(object):
628
1052
  if not os.path.isdir(self._path[:-1]):
629
1053
  raise NotADirectoryError(txtfmt( "Cannot use sub directory %s: object exists but is not a directory", self._path[:-1] ))
630
1054
 
631
- def pathExists(self) -> bool:
632
- """ Returns True if the current directory exists """
1055
+ def path_exists(self) -> bool:
1056
+ """ Whether the current directory exists """
633
1057
  return os.path.exists( self._path[:-1] ) if not self._path is None else False
634
1058
 
635
1059
  # -- a few basic properties --
@@ -659,60 +1083,79 @@ class SubDir(object):
659
1083
 
660
1084
  @property
661
1085
  def is_none(self) -> bool:
662
- """ Whether this object is 'None' or not """
1086
+ """ Whether this object is ``None`` or not. For such ``SubDir`` object no files exists, and writing any file will fail. """
663
1087
  return self._path is None
664
1088
 
665
1089
  @property
666
1090
  def path(self) -> str:
667
1091
  """
668
- Return current path, including trailing '/'
669
- Note that the path may not exist yet. If this is required, consider using existing_path
1092
+ Return current path, including trailing ``'/'``.
1093
+
1094
+ Note that the path may not exist yet. If existence is required, consider using
1095
+ :meth:`cdxcore.subdir.SubDir.existing_path`.
670
1096
  """
671
1097
  return self._path
672
1098
 
673
1099
  @property
674
1100
  def existing_path(self) -> str:
675
1101
  """
676
- Return current path, including training '/'.
677
- In addition to self.path this property ensures that the directory structure exists (or raises an exception)
1102
+ Return current path, including training ``'/'``.
1103
+
1104
+ ``existing_path`` ensures that the directory structure exists (or raises an exception).
1105
+ Use :meth:`cdxcore.subdir.SubDir.path` if creation on the fly is not desired.
678
1106
  """
679
- self.createDirectory()
1107
+ self.create_directory()
680
1108
  return self.path
681
1109
 
682
1110
  @property
683
1111
  def fmt(self) -> Format:
684
- """ Returns current format """
1112
+ """ Returns current :class:`cdxcore.subdir.Format`. """
685
1113
  return self._fmt
686
1114
 
687
1115
  @property
688
1116
  def ext(self) -> str:
689
1117
  """
690
- Returns the common extension of the files in this directory, including leading '.'
691
- Resolves '*' into the extension associated with the current format.
1118
+ Returns the common extension of the files in this directory, including leading ``'.'``.
1119
+ Resolves ``"*"`` into the extension associated with the current :class:`cdxcore.subdir.Format`.
692
1120
  """
693
1121
  return self._ext if self._ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
694
1122
 
695
- def autoExt( self, ext : str = None ) -> str:
1123
+ def auto_ext( self, ext_or_fmt : str|Format = None ) -> str:
696
1124
  """
697
- Computes the effective extension based on inputs 'ext', defaulting to the SubDir's extension.
698
- Resolves '*' into the extension associated with the specified format.
699
- This function allows setting 'ext' also as a Format.
1125
+ Computes the effective extension based on theh inputs ``ext_or_fmt``,
1126
+ and the current settings for ``self``.
1127
+
1128
+ If ``ext_or_fmt`` is set to ``"*"`` then the extension associated to
1129
+ the format of ``self`` is returned.
1130
+
1131
+ Parameters
1132
+ ----------
1133
+ ext_or_fmt : str or :class:`cdxcore.subdir.Format`
1134
+ An extension or a format.
700
1135
 
701
- Returns the extension with leading '.'
1136
+ Returns
1137
+ -------
1138
+ ext : str
1139
+ The extension with leading ``'.'``.
702
1140
  """
703
- if isinstance(ext, Format):
704
- return self._auto_ext(ext)
1141
+ if isinstance(ext_or_fmt, Format):
1142
+ return self._auto_ext(ext_or_fmt)
705
1143
  else:
706
- ext = self._ext if ext is None else SubDir._extract_ext(ext)
1144
+ ext = self._ext if ext_or_fmt is None else SubDir._extract_ext(ext_or_fmt)
707
1145
  return ext if ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
708
1146
 
709
- def autoExtFmt( self, *, ext : str = None, fmt : Format = None ) -> str:
1147
+ def auto_ext_fmt( self, *, ext : str = None, fmt : Format = None ) -> tuple[str]:
710
1148
  """
711
- Computes the effective extension and format based on inputs 'ext' and 'fmt', each of which defaults to the SubDir's current settings.
712
- Resolves '*' into the extension associated with the specified format.
713
- This function allows setting 'ext' also as a Format.
1149
+ Computes the effective extension and format based on inputs ``ext`` and ``fmt``,
1150
+ each of which defaults to the respective values of ``self``.
1151
+
1152
+ Resolves an ``ext`` of ``"*"`` into the extension associated with ``fmt``.
714
1153
 
715
- Returns (ext, fmt) where 'ext' contains the leading '.'
1154
+ Returns
1155
+ -------
1156
+ (ext, fmt) : tuple
1157
+ Here ``ext`` contains the leading ``'.'`` and ``fmt`` is
1158
+ of type :class:`cdxcore.subdir.Format`.
716
1159
  """
717
1160
  if isinstance(ext, Format):
718
1161
  verify( fmt is None or fmt == ext, "If 'ext' is a Format, then 'fmt' must match 'ext' or be None. Found '%s' and '%s', respectively.", ext, fmt, exception=ValueError )
@@ -724,8 +1167,8 @@ class SubDir(object):
724
1167
  return ext, fmt
725
1168
 
726
1169
  @property
727
- def cacheController(self):
728
- """ Returns an assigned CacheController, or None """
1170
+ def cache_controller(self):
1171
+ """ Returns an assigned :class:`cdxcore.subdir.CacheController`, or ``None`` """
729
1172
  return self._cctrl if not self._cctrl is None else default_cacheController
730
1173
 
731
1174
  # -- static helpers --
@@ -747,7 +1190,10 @@ class SubDir(object):
747
1190
 
748
1191
  @staticmethod
749
1192
  def _version_to_bytes( version : str ) -> bytearray:
750
- """ Convert string version to byte string of at most size MAX_VERSION_BINARY_LEN + 1 """
1193
+ """
1194
+ Convert string version to byte string of at most size
1195
+ :data:`cdxcore.subdir.SubDir.MAX_VERSION_BINARY_LEN` + 1
1196
+ """
751
1197
  if version is None:
752
1198
  return None
753
1199
  version_ = bytearray(version,'utf-8')
@@ -790,69 +1236,67 @@ class SubDir(object):
790
1236
 
791
1237
  # -- public utilities --
792
1238
 
793
- def fullFileName(self, key : str, *, ext : str = None) -> str:
1239
+ def full_file_name(self, file : str, *, ext : str = None) -> str:
794
1240
  """
795
1241
  Returns fully qualified file name.
796
- The function tests that 'key' does not contain directory information.
797
-
798
- If 'self' is None, then this function returns None
799
- If key is None then this function returns None
1242
+
1243
+ The function tests that ``file`` does not contain directory information.
800
1244
 
801
1245
  Parameters
802
1246
  ----------
803
- key : str
804
- Core file name, e.g. the 'key' in a data base sense
1247
+ file : str
1248
+ Core file name without path or extension.
805
1249
  ext : str
806
- If not None, use this extension rather than self.ext
1250
+ If not ``None``, use this extension rather than :attr:`cdxcore.subdir.SubDir.ext`.
807
1251
 
808
1252
  Returns
809
1253
  -------
810
- Fully qualified system file name
811
-
812
- [This function has an alias 'fullKeyName' for backward compatibility]
1254
+ Filename : str
1255
+ Fully qualified system file name.
1256
+ If ``self`` is ``None``, then this function returns ``None``; if ``file`` is ``None`` then this function also returns ``None``.
813
1257
  """
814
- if self._path is None or key is None:
1258
+ if self._path is None or file is None:
815
1259
  return None
816
- key = str(key)
817
- verify( len(key) > 0, "'key' cannot be empty")
1260
+ file = str(file)
1261
+ verify( len(file) > 0, "'file' cannot be empty")
818
1262
 
819
- sub, _ = os.path.split(key)
820
- verify( len(sub) == 0, "Key '%s' contains directory information", key)
1263
+ sub, _ = os.path.split(file)
1264
+ verify( len(sub) == 0, "Key '%s' contains directory information", file)
821
1265
 
822
- verify( key[0] != "!", "Key '%s' cannot start with '!' (this symbol indicates the temp directory)", key, exception=ValueError )
823
- verify( key[0] != "~", "Key '%s' cannot start with '~' (this symbol indicates the user's directory)", key, exception=ValueError )
1266
+ verify( file[0] != "!", "Key '%s' cannot start with '!' (this symbol indicates the temp directory)", file, exception=ValueError )
1267
+ verify( file[0] != "~", "Key '%s' cannot start with '~' (this symbol indicates the user's directory)", file, exception=ValueError )
824
1268
 
825
- ext = self.autoExt( ext )
826
- if len(ext) > 0 and key[-len(ext):] != ext:
827
- return self._path + key + ext
828
- return self._path + key
829
- fullKeyName = fullFileName # backwards compatibility
1269
+ ext = self.auto_ext( ext )
1270
+ if len(ext) > 0 and file[-len(ext):] != ext:
1271
+ return self._path + file + ext
1272
+ return self._path + file
1273
+ full_file_name = full_file_name # backwards compatibility
830
1274
 
831
1275
  @staticmethod
832
- def tempDir() -> str:
1276
+ def temp_dir() -> str:
833
1277
  """
834
- Return system temp directory. Short cut to tempfile.gettempdir()
835
- Result contains trailing '/'
1278
+ Return system temp directory. Short-cut to :func:`tempfile.gettempdir`.
1279
+ Result contains trailing ``'/'``.
836
1280
  """
837
1281
  d = tempfile.gettempdir()
838
1282
  assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-1", d)
839
1283
  return d + "/"
840
1284
 
841
1285
  @staticmethod
842
- def workingDir() -> str:
1286
+ def working_dir() -> str:
843
1287
  """
844
- Return current working directory. Short cut for os.getcwd()
845
- Result contains trailing '/'
1288
+ Return current working directory. Short-cut for :func:`os.getcwd`.
1289
+ Result contains trailing ``'/'``.
846
1290
  """
847
1291
  d = os.getcwd()
848
1292
  assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-2", d)
849
1293
  return d + "/"
850
1294
 
851
1295
  @staticmethod
852
- def userDir() -> str:
1296
+ def user_dir() -> str:
853
1297
  """
854
- Return current working directory. Short cut for os.path.expanduser('~')
855
- Result contains trailing '/'
1298
+ Return current working directory. Short-cut for :func:`os.path.expanduser` with parameter ``'~'``.
1299
+ Result contains trailing ``'/'``.
856
1300
  """
857
1301
  d = os.path.expanduser('~')
858
1302
  assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-3", d)
@@ -860,95 +1304,101 @@ class SubDir(object):
860
1304
 
861
1305
  # -- read --
862
1306
 
863
- def _read_reader( self, reader, key : str, default, raiseOnError : bool, *, ext : str = None ):
1307
+ def _read_reader( self, reader, file : str, default, raise_on_error : bool, *, ext : str = None ):
864
1308
  """
865
1309
  Utility function for read() and readLine()
866
1310
 
867
1311
  Parameters
868
1312
  ----------
869
- reader( key, fullFileName, default )
1313
+ reader( file, full_file_name, default )
870
1314
  A function which is called to read the file once the correct directory is identified
871
- key : key (for error messages, might include '/')
872
- fullFileName : full file name
1315
+ file : file (for error messages, might include '/')
1316
+ full_file_name : full file name
873
1317
  default value
874
- key : str or list
875
- str: fully qualified key
1318
+ file : str or list
1319
+ str: fully qualified file
876
1320
  list: list of fully qualified names
877
1321
  default :
878
1322
  default value. None is a valid default value
879
1323
  list : list of defaults for a list of keys
880
- raiseOnError : bool
1324
+ raise_on_error : bool
881
1325
  If True, and the file does not exist, throw exception
882
1326
  ext :
883
1327
  Extension or None for current extension.
884
1328
  list : list of extensions for a list of keys
885
1329
  """
886
1330
  # vector version
887
- if not isinstance(key,str):
888
- if not isinstance(key, Collection): raise ValueError(txtfmt( "'key' must be a string, or an interable object. Found type %s", type(key)))
889
- l = len(key)
1331
+ if not isinstance(file,str):
1332
+ if not isinstance(file, Collection): raise ValueError(txtfmt( "'file' must be a string, or an interable object. Found type %s", type(file)))
1333
+ l = len(file)
890
1334
  if default is None or isinstance(default,str) or not isinstance(default, Collection):
891
1335
  default = [ default ] * l
892
1336
  else:
893
- if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(default), l ))
1337
+ if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(default), l ))
894
1338
  if ext is None or isinstance(ext, str) or not isinstance(ext, Collection):
895
1339
  ext = [ ext ] * l
896
1340
  else:
897
- if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l ))
898
- return [ self._read_reader(reader=reader,key=k,default=d,raiseOnError=raiseOnError,ext=e) for k, d, e in zip(key,default,ext) ]
1341
+ if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l ))
1342
+ return [ self._read_reader(reader=reader,file=k,default=d,raise_on_error=raise_on_error,ext=e) for k, d, e in zip(file,default,ext) ]
899
1343
 
900
1344
  # deleted directory?
901
1345
  if self._path is None:
902
- verify( not raiseOnError, "Trying to read '%s' from an empty directory object", key, exception=NotADirectoryError)
1346
+ verify( not raise_on_error, "Trying to read '%s' from an empty directory object", file, exception=NotADirectoryError)
903
1347
  return default
904
1348
 
905
- # single key
906
- if len(key) == 0: raise ValueError(txtfmt("'key' missing (the filename)" ))
907
- sub, key_ = os.path.split(key)
1349
+ # single file
1350
+ if len(file) == 0: raise ValueError(txtfmt("'file' missing (the filename)" ))
1351
+ sub, key_ = os.path.split(file)
908
1352
  if len(sub) > 0:
909
- return self(sub)._read_reader(reader=reader,key=key_,default=default,raiseOnError=raiseOnError,ext=ext)
910
- if len(key_) == 0: ValueError(txtfmt("'key' %s indicates a directory, not a file", key))
1353
+ return self(sub)._read_reader(reader=reader,file=key_,default=default,raise_on_error=raise_on_error,ext=ext)
1354
+ if len(key_) == 0: ValueError(txtfmt("'file' %s indicates a directory, not a file", file))
911
1355
 
912
1356
  # don't try if directory doesn't exist
913
- fullFileName = self.fullFileName(key,ext=ext)
914
- if not self.pathExists():
915
- if raiseOnError:
916
- raise KeyError(key, fullFileName)
1357
+ full_file_name = self.full_file_name(file,ext=ext)
1358
+ if not self.path_exists():
1359
+ if raise_on_error:
1360
+ raise KeyError(file, full_file_name)
917
1361
  return default
918
1362
 
919
1363
  # does file exit?
920
- if not os.path.exists(fullFileName):
921
- if raiseOnError:
922
- raise KeyError(key,fullFileName)
1364
+ if not os.path.exists(full_file_name):
1365
+ if raise_on_error:
1366
+ raise KeyError(file,full_file_name)
923
1367
  return default
924
- if not os.path.isfile(fullFileName):
925
- raise IOError(txtfmt( "Cannot read %s: object exists, but is not a file (full path %s)", key, fullFileName ))
1368
+ if not os.path.isfile(full_file_name):
1369
+ raise IOError(txtfmt( "Cannot read '%s': object exists, but is not a file (full path %s)", file, full_file_name ))
926
1370
 
927
1371
  # read content
928
1372
  # delete existing files upon read error
929
1373
  try:
930
- return reader( key, fullFileName, default )
1374
+ return reader( file, full_file_name, default )
931
1375
  except EOFError as e:
932
1376
  try:
933
- os.remove(fullFileName)
934
- warn("Cannot read %s; file deleted (full path %s).\nError: %s",key,fullFileName, str(e))
1377
+ os.remove(full_file_name)
1378
+ warn("Cannot read '%s'; file deleted (full path '%s').\nError: %s",file,full_file_name, str(e))
935
1379
  except Exception as e:
936
- warn("Cannot read %s; attempt to delete file failed (full path %s): %s",key,fullFileName,str(e))
1380
+ warn("Cannot read '%s'; subsequent attempt to delete file failed (full path '%s''): %s",file,full_file_name,str(e))
937
1381
  except FileNotFoundError as e:
938
- if raiseOnError:
939
- raise KeyError(key, fullFileName, str(e)) from e
1382
+ if raise_on_error:
1383
+ raise KeyError(file, full_file_name, str(e)) from e
1384
+ except VersionError as e:
1385
+ if raise_on_error:
1386
+ raise e
1387
+ except VersionPresentError as e:
1388
+ if raise_on_error:
1389
+ raise e
940
1390
  except Exception as e:
941
- if raiseOnError:
942
- raise KeyError(key, fullFileName, str(e)) from e
1391
+ if raise_on_error:
1392
+ raise KeyError(file, full_file_name, str(e)) from e
943
1393
  except (ImportError, BaseException) as e:
944
- e.add_note( key )
945
- e.add_note( fullFileName )
1394
+ e.add_note( file )
1395
+ e.add_note( full_file_name )
946
1396
  raise e
947
1397
  return default
948
1398
 
949
- def _read( self, key : str,
1399
+ def _read( self, file : str,
950
1400
  default = None,
951
- raiseOnError : bool = False,
1401
+ raise_on_error : bool = False,
952
1402
  *,
953
1403
  version : str = None,
954
1404
  ext : str = None,
@@ -957,18 +1407,34 @@ class SubDir(object):
957
1407
  handle_version : int = 0
958
1408
  ):
959
1409
  """ See read() """
960
- ext, fmt = self.autoExtFmt(ext=ext, fmt=fmt)
1410
+ ext, fmt = self.auto_ext_fmt(ext=ext, fmt=fmt)
961
1411
  version = str(version) if not version is None else None
962
1412
  version = version if handle_version != SubDir.VER_RETURN else ""
963
1413
  assert not fmt == self.EXT_FMT_AUTO, ("'fmt' is '*' ...?")
964
1414
 
965
1415
  if version is None and fmt in [Format.BLOSC, Format.GZIP]:
966
- version = ""
1416
+ # blosc and gzip have unexpected side effects
1417
+ # a version is attempted to be read but is not present
1418
+ # (e.g. blosc causes a MemoryError)
1419
+ version = ""
967
1420
 
968
- def reader( key, fullFileName, default ):
1421
+ def reader( file, full_file_name, default ):
969
1422
  test_version = "(unknown)"
970
- if fmt == Format.PICKLE or fmt == Format.BLOSC:
971
- with open(fullFileName,"rb") as f:
1423
+
1424
+ def handle_pickle_error(e):
1425
+ err = "invalid load key, '\\x03'."
1426
+ if not version is None or e.args[0] != err:
1427
+ print("####", e.args)
1428
+ raise e
1429
+ raise VersionPresentError(
1430
+ f"Error reading '{full_file_name}': encountered an unpickling error '{err}' "+\
1431
+ f"while attempting to read file using {str(fmt)}. "+\
1432
+ "This is likely caused by attempting to read a file which was written with "+\
1433
+ "version information without providing a test version during read(). If the version is of the file "+\
1434
+ "is not important, use `version=\"*\"'", e) from e
1435
+ if fmt == Format.PICKLE:
1436
+ # we do not read any version information if not requested
1437
+ with open(full_file_name,"rb") as f:
972
1438
  # handle version as byte string
973
1439
  ok = True
974
1440
  if not version is None:
@@ -981,37 +1447,55 @@ class SubDir(object):
981
1447
  if ok:
982
1448
  if handle_version == SubDir.VER_CHECK:
983
1449
  return True
984
- if fmt == Format.PICKLE:
1450
+ try:
985
1451
  data = pickle.load(f)
986
- elif fmt == Format.BLOSC:
987
- if blosc is None:
988
- raise ModuleNotFoundError("blosc", "'blosc' not found.")
989
- nnbb = f.read(2)
990
- num_blocks = int.from_bytes( nnbb, 'big', signed=False )
991
- data = bytearray()
992
- for i in range(num_blocks):
993
- blockl = int.from_bytes( f.read(6), 'big', signed=False )
994
- if blockl>0:
995
- bdata = blosc.decompress( f.read(blockl) )
996
- data += bdata
997
- del bdata
1452
+ except pickle.UnpicklingError as e:
1453
+ handle_pickle_error(e)
1454
+ return data
1455
+
1456
+ elif fmt == Format.BLOSC:
1457
+ # we do not write
1458
+ # any version information if not requested
1459
+ with open(full_file_name,"rb") as f:
1460
+ # handle version as byte string
1461
+ ok = True
1462
+ if not version is None: # it's never None
1463
+ test_len = int( f.read( 1 )[0] )
1464
+ test_version = f.read(test_len)
1465
+ test_version = test_version.decode("utf-8")
1466
+ if handle_version == SubDir.VER_RETURN:
1467
+ return test_version
1468
+ ok = (version == "*" or test_version == version)
1469
+ if ok:
1470
+ if handle_version == SubDir.VER_CHECK:
1471
+ return True
1472
+ nnbb = f.read(2)
1473
+ num_blocks = int.from_bytes( nnbb, 'big', signed=False )
1474
+ data = bytearray()
1475
+ for i in range(num_blocks):
1476
+ blockl = int.from_bytes( f.read(6), 'big', signed=False )
1477
+ if blockl>0:
1478
+ bdata = blosc.decompress( f.read(blockl) )
1479
+ data += bdata
1480
+ del bdata
1481
+ try:
998
1482
  data = pickle.loads(data)
999
- else:
1000
- raise NotImplementedError(fmt, txtfmt("Unkown format '%s'", fmt))
1483
+ except pickle.UnpicklingError as e:
1484
+ handle_pickle_error(e)
1001
1485
  return data
1002
1486
 
1003
1487
  elif fmt == Format.GZIP:
1004
- if gzip is None:
1005
- raise ModuleNotFoundError("gzip", "'gzip' not found'")
1006
- with gzip.open(fullFileName,"rb") as f:
1488
+ # always read version information
1489
+ with gzip.open(full_file_name,"rb") as f:
1007
1490
  # handle version as byte string
1008
- ok = True
1009
- test_len = int( f.read( 1 )[0] )
1010
- test_version = f.read(test_len)
1011
- test_version = test_version.decode("utf-8")
1012
- if handle_version == SubDir.VER_RETURN:
1013
- return test_version
1014
- ok = (version == "*" or test_version == version)
1491
+ ok = True
1492
+ if not version is None: # it's never None
1493
+ test_len = int( f.read( 1 )[0] )
1494
+ test_version = f.read(test_len)
1495
+ test_version = test_version.decode("utf-8")
1496
+ if handle_version == SubDir.VER_RETURN:
1497
+ return test_version
1498
+ ok = (version == "*" or test_version == version)
1015
1499
  if ok:
1016
1500
  if handle_version == SubDir.VER_CHECK:
1017
1501
  return True
@@ -1019,13 +1503,16 @@ class SubDir(object):
1019
1503
  return data
1020
1504
 
1021
1505
  elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
1022
- with open(fullFileName,"rt",encoding="utf-8") as f:
1506
+ # only read version information if requested
1507
+ with open(full_file_name,"rt",encoding="utf-8") as f:
1023
1508
  # handle versioning
1024
1509
  ok = True
1025
1510
  if not version is None:
1026
1511
  test_version = f.readline()
1027
1512
  if test_version[:2] != "# ":
1028
- raise EnvironmentError("Error reading '%s': file does not appear to contain a version (it should start with '# ')" % fullFileName)
1513
+ raise VersionError("Error reading '{full_file_name}' using {fmt}: file does not appear to contain a version (it should start with '# ')",
1514
+ version_found="",
1515
+ version_expected=version)
1029
1516
  test_version = test_version[2:]
1030
1517
  if test_version[-1:] == "\n":
1031
1518
  test_version = test_version[:-1]
@@ -1037,8 +1524,7 @@ class SubDir(object):
1037
1524
  return ok
1038
1525
  # read
1039
1526
  if fmt == Format.JSON_PICKLE:
1040
- if jsonpickle is None:
1041
- raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found'")
1527
+ jsonpickle = _import_jsonpickle()
1042
1528
  return jsonpickle.decode( f.read() )
1043
1529
  else:
1044
1530
  assert fmt == Format.JSON_PLAIN, ("Internal error: unknown Format", fmt)
@@ -1048,25 +1534,33 @@ class SubDir(object):
1048
1534
 
1049
1535
  # arrive here if version is wrong
1050
1536
  # delete a wrong version
1537
+
1538
+ if version == "":
1539
+ raise VersionPresentError(f"Error reading '{full_file_name}' using {fmt}: the file has version '{test_version}', but was attempted to be read without "+\
1540
+ "a test version. If you intended to accept any version, use 'version=\"*\"' instead.")
1541
+
1051
1542
  deleted = ""
1052
1543
  if delete_wrong_version:
1053
1544
  try:
1054
- os.remove(fullFileName)
1545
+ os.remove(full_file_name)
1055
1546
  e = None
1056
1547
  except Exception as e_:
1057
1548
  e = str(e_)
1058
1549
  if handle_version == SubDir.VER_CHECK:
1059
1550
  return False
1060
- if not raiseOnError:
1551
+ if not raise_on_error:
1061
1552
  return default
1062
1553
  deleted = " (file was deleted)" if e is None else " (attempt to delete file failed: %s)" % e
1063
- raise EnvironmentError("Error reading '%s': found version '%s' not '%s'%s" % (fullFileName,str(test_version),str(version),deleted))
1554
+ raise VersionError( f"Error reading '{full_file_name}' using {fmt}: found version '{test_version}' not '{version}'{deleted}",
1555
+ version_found=test_version,
1556
+ version_expected=version
1557
+ )
1064
1558
 
1065
- return self._read_reader( reader=reader, key=key, default=default, raiseOnError=raiseOnError, ext=ext )
1559
+ return self._read_reader( reader=reader, file=file, default=default, raise_on_error=raise_on_error, ext=ext )
1066
1560
 
1067
- def read( self, key : str,
1561
+ def read( self, file : str,
1068
1562
  default = None,
1069
- raiseOnError : bool = False,
1563
+ raise_on_error : bool = False,
1070
1564
  *,
1071
1565
  version : str = None,
1072
1566
  delete_wrong_version : bool = True,
@@ -1074,296 +1568,323 @@ class SubDir(object):
1074
1568
  fmt : Format = None
1075
1569
  ):
1076
1570
  """
1077
- Read pickled data from 'key' if the file exists, or return 'default'
1078
- -- Supports 'key' containing directories
1079
- -- Supports 'key' (and default, ext) being iterable.
1080
- In this case any any iterable 'default' except strings are considered accordingly.
1081
- In order to have a unit default which is an iterable, you will have to wrap it in another iterable, e.g.
1082
- E.g.:
1083
- keys = ['file1', 'file2']
1084
-
1085
- sd.read( keys )
1086
- --> works, both are using default None
1571
+ Read data from a file if the file exists, or return ``default``.
1087
1572
 
1088
- sd.read( keys, 1 )
1089
- --> works, both are using default '1'
1573
+ * Supports ``file`` containing directory information.
1574
+ * Supports ``file`` (and ``default``as well as ``ext``) being iterable.
1575
+ Examples::
1576
+
1577
+ from cdxcore.subdir import SubDir
1578
+ files = ['file1', 'file2']
1579
+ sd = SubDir("!/test")
1090
1580
 
1091
- sd.read( keys, [1,2] )
1092
- --> works, defaults 1 and 2, respectively
1581
+ sd.read( files ) # both files are using default None
1582
+ sd.read( files, 1 ) # both files are using default '1'
1583
+ sd.read( files, [1,2] ) # files use defaults 1 and 2, respectively
1093
1584
 
1094
- sd.read( keys, [1] )
1095
- --> produces error as len(keys) != len(default)
1585
+ sd.read( files, [1] ) # produces error as len(keys) != len([1])
1096
1586
 
1097
- Strings are iterable but are treated as single value.
1098
- Therefore
1099
- sd.read( keys, '12' )
1100
- means the default value '12' is used for both files.
1101
- Use
1102
- sd.read( keys, ['1','2'] )
1103
- in case the intention was using '1' and '2', respectively.
1104
-
1105
- Returns the read object, or a list of objects if 'key' was iterable.
1106
- If the current directory is 'None', then behaviour is as if the file did not exist.
1587
+ Strings are iterable but are treated as single value.
1588
+ Therefore::
1589
+
1590
+ sd.read( files, '12' ) # the default value '12' is used for both files
1591
+ sd.read( files, ['1','2'] ) # use defaults '1' and '2', respectively
1107
1592
 
1108
1593
  Parameters
1109
1594
  ----------
1110
- key : str
1111
- A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
1595
+ file : str
1596
+ A file name or a list thereof. ``file`` may contain subdirectories.
1597
+
1112
1598
  default :
1113
- Default value, or default values if key is a list
1114
- raiseOnError : bool
1599
+ Default value, or default values if ``file`` is a list.
1600
+
1601
+ raise_on_error : bool
1115
1602
  Whether to raise an exception if reading an existing file failed.
1116
1603
  By default this function fails silently and returns the default.
1604
+
1117
1605
  version : str
1118
- If not None, specifies the version of the current code base.
1606
+ If not ``None``, specifies the version of the current code base.
1607
+
1119
1608
  In this case, this version will be compared to the version of the file being read.
1120
- If they do not match, read fails (either by returning default or throwing an exception).
1121
- You can specify version "*" to read any version. This is distrinct from reading a file without version.
1609
+ If they do not match, read fails (either by returning default or throwing a :class:`cdxcore.version.VersionError` exception).
1610
+
1611
+ You can specify version ``"*"`` to accept any version.
1612
+ Note that this is distinct
1613
+ to using ``None`` which stipulates that the file should not
1614
+ have version information.
1615
+
1122
1616
  delete_wrong_version : bool
1123
- If True, and if a wrong version was found, delete the file.
1617
+ If ``True``, and if a wrong version was found, delete the file.
1618
+
1124
1619
  ext : str
1125
- Extension overwrite, or a list thereof if key is a list
1126
- Set to:
1127
- -- None to use directory's default
1128
- -- '*' to use the extension implied by 'fmt'
1129
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1130
- fmt : Format
1131
- File format or None to use the directory's default.
1132
- Note that 'fmt' cannot be a list even if 'key' is.
1133
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1620
+ Extension overwrite, or a list thereof if ``file`` is a list.
1621
+
1622
+ Use:
1623
+
1624
+ * ``None`` to use directory's default.
1625
+ * ``'*'`` to use the extension implied by ``fmt``.
1626
+ * ``""`` to turn of extension management.
1627
+
1628
+ fmt : :class:`cdxcore.subdir.Format`
1629
+ File :class:`cdxcore.subdir.Format` or ``None`` to use the directory's default.
1630
+
1631
+ Note:
1632
+
1633
+ * ``fmt`` cannot be a list even if ``file`` is.
1634
+ * Unless ``ext`` or the SubDir's extension is ``'*'``, changing the format does not automatically change the extension.
1134
1635
 
1135
1636
  Returns
1136
1637
  -------
1137
- For a single 'key': Content of the file if successfully read, or 'default' otherwise.
1138
- If 'key' is a list: list of contents.
1638
+ Content
1639
+ For a single ``file`` returns the content of the file if successfully read, or ``default`` otherwise.
1640
+ If ``file``` is a list: list of contents.
1641
+
1642
+ Raises
1643
+ ------
1644
+ :class:`cdxcore.version.VersionError`:
1645
+ If the file's version did not match the ``version`` provided.
1646
+
1139
1647
  """
1140
- return self._read( key=key,
1648
+ return self._read( file=file,
1141
1649
  default=default,
1142
- raiseOnError=raiseOnError,
1650
+ raise_on_error=raise_on_error,
1143
1651
  version=version,
1144
1652
  ext=ext,
1145
1653
  fmt=fmt,
1146
1654
  delete_wrong_version=delete_wrong_version,
1147
1655
  handle_version=SubDir.VER_NORMAL )
1148
1656
 
1149
- get = read # backwards compatibility
1150
-
1151
- def is_version( self, key : str, version : str = None, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
1657
+ def is_version( self, file : str, version : str = None, raise_on_error : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
1152
1658
  """
1153
- Compares the version of 'key' with 'version'.
1659
+ Tests the version of a file.
1154
1660
 
1155
1661
  Parameters
1156
1662
  ----------
1157
- key : str
1158
- A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
1663
+ file : str
1664
+ A filename, or a list thereof.
1665
+
1159
1666
  version : str
1160
- Specifies the version of the current code base to compare with.
1161
- You can use '*' to match any version
1667
+ Specifies the version to compare the file's version with.
1668
+
1669
+ You can use ``"*"`` to match any version.
1162
1670
 
1163
- raiseOnError : bool
1671
+ raise_on_error : bool
1164
1672
  Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
1165
1673
  By default this function fails silently and returns the default.
1674
+
1166
1675
  delete_wrong_version : bool
1167
- If True, and if a wrong version was found, delete the file.
1676
+ If True, and if a wrong version was found, delete ``file``.
1677
+
1168
1678
  ext : str
1169
- Extension overwrite, or a list thereof if key is a list.
1679
+ Extension overwrite, or a list thereof if file is a list.
1680
+
1170
1681
  Set to:
1171
- -- None to use directory's default
1172
- -- '*' to use the extension implied by 'fmt'
1173
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1174
- fmt : Format
1175
- File format or None to use the directory's default.
1176
- Note that 'fmt' cannot be a list even if 'key' is.
1177
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1682
+
1683
+ * ``None`` to use directory's default.
1684
+ * ``"*"`` to use the extension implied by ``fmt``.
1685
+ * ``""`` for no extension.
1686
+
1687
+ fmt : :class:`cdxcore.subdir.Format`
1688
+ File format or ``None`` to use the directory's default.
1689
+ Note that ``fmt`` cannot be a list even if ``file`` is.
1178
1690
 
1179
1691
  Returns
1180
1692
  -------
1181
- Returns True only if the file exists and has the correct version.
1693
+ Status : bool
1694
+ Returns `True` only if the file exists, has version information, and its version is equal to ``version``.
1182
1695
  """
1183
- return self._read( key=key,default=False,raiseOnError=raiseOnError,version=version,ext=ext,fmt=fmt,delete_wrong_version=delete_wrong_version,handle_version=SubDir.VER_CHECK )
1696
+ return self._read( file=file,default=False,raise_on_error=raise_on_error,version=version,ext=ext,fmt=fmt,delete_wrong_version=delete_wrong_version,handle_version=SubDir.VER_CHECK )
1184
1697
 
1185
- def get_version( self, key : str, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None ):
1698
+ def get_version( self, file : str, raise_on_error : bool = False, *, ext : str = None, fmt : Format = None ):
1186
1699
  """
1187
- Returns the version ID stored in 'key'.
1700
+ Returns a version stored in a file.
1701
+
1188
1702
  This requires that the file has previously been saved with a version.
1189
- Otherwise this function will return unpredictable results.
1703
+ Otherwise this function will have unpredictable results.
1190
1704
 
1191
1705
  Parameters
1192
1706
  ----------
1193
- key : str
1194
- A core filename ("key") or a list thereof. The 'key' may contain subdirectory information '/'.
1195
- raiseOnError : bool
1707
+ file : str
1708
+ A filename, or a list thereof.
1709
+
1710
+ raise_on_error : bool
1196
1711
  Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
1197
1712
  By default this function fails silently and returns the default.
1713
+
1714
+ delete_wrong_version : bool
1715
+ If ``True``, and if a wrong version was found, delete ``file``.
1716
+
1198
1717
  ext : str
1199
- Extension overwrite, or a list thereof if key is a list.
1718
+ Extension overwrite, or a list thereof if ``file`` is a list.
1719
+
1200
1720
  Set to:
1201
- -- None to use directory's default
1202
- -- '*' to use the extension implied by 'fmt'
1203
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1204
- fmt : Format
1205
- File format or None to use the directory's default.
1206
- Note that 'fmt' cannot be a list even if 'key' is.
1207
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1721
+
1722
+ * ``None`` to use directory's default.
1723
+ * ``"*"`` to use the extension implied by ``fmt``.
1724
+ * ``""`` for no extension.
1725
+
1726
+ fmt : :class:`cdxcore.subdir.Format`
1727
+ File format or ``None`` to use the directory's default.
1728
+ Note that ``fmt`` cannot be a list even if ``file`` is.
1208
1729
 
1209
1730
  Returns
1210
1731
  -------
1211
- Version ID.
1732
+ version : str
1212
1733
  """
1213
- return self._read( key=key,default=None,raiseOnError=raiseOnError,version="",ext=ext,fmt=fmt,delete_wrong_version=False,handle_version=SubDir.VER_RETURN )
1734
+ return self._read( file=file,default=None,raise_on_error=raise_on_error,version="",ext=ext,fmt=fmt,delete_wrong_version=False,handle_version=SubDir.VER_RETURN )
1214
1735
 
1215
- def readString( self, key : str, default = None, raiseOnError : bool = False, *, ext : str = None ) -> str:
1736
+ def read_string( self, file : str, default = None, raise_on_error : bool = False, *, ext : str = None ) -> str:
1216
1737
  """
1217
- Reads text from 'key' or returns 'default'. Removes trailing EOLs
1218
- -- Supports 'key' containing directories#
1219
- -- Supports 'key' being iterable. In this case any 'default' can be a list, too.
1220
-
1221
- Returns the read string, or a list of strings if 'key' was iterable.
1222
- If the current directory is 'None', then behaviour is as if the file did not exist.
1223
-
1224
- Use 'ext' to specify the extension.
1225
- You cannot use 'ext' to specify a format as the format is plain text.
1226
- If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
1738
+ Reads text from a file. Removes trailing EOLs.
1739
+
1740
+ Returns the read string, or a list of strings if ``file`` was iterable.
1227
1741
  """
1228
1742
  verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext)
1229
1743
  ext = ext if not ext is None else self._ext
1230
1744
  ext = ext if ext != self.EXT_FMT_AUTO else ".txt"
1231
1745
 
1232
- def reader( key, fullFileName, default ):
1233
- with open(fullFileName,"rt",encoding="utf-8") as f:
1746
+ def reader( file, full_file_name, default ):
1747
+ with open(full_file_name,"rt",encoding="utf-8") as f:
1234
1748
  line = f.readline()
1235
1749
  if len(line) > 0 and line[-1] == '\n':
1236
1750
  line = line[:-1]
1237
1751
  return line
1238
- return self._read_reader( reader=reader, key=key, default=default, raiseOnError=raiseOnError, ext=ext )
1752
+ return self._read_reader( reader=reader, file=file, default=default, raise_on_error=raise_on_error, ext=ext )
1239
1753
 
1240
1754
  # -- write --
1241
1755
 
1242
- def _write( self, writer, key : str, obj, raiseOnError : bool, *, ext : str = None ) -> bool:
1756
+ def _write( self, writer, file : str, obj, raise_on_error : bool, *, ext : str = None ) -> bool:
1243
1757
  """ Utility function for write() and writeLine() """
1244
1758
  if self._path is None:
1245
- raise EOFError("Cannot write to '%s': current directory is not specified" % key)
1246
- self.createDirectory()
1759
+ raise EOFError("Cannot write to '%s': current directory is not specified" % file)
1760
+ self.create_directory()
1247
1761
 
1248
1762
  # vector version
1249
- if not isinstance(key,str):
1250
- if not isinstance(key, Collection): error( "'key' must be a string or an interable object. Found type %s", type(key))
1251
- l = len(key)
1763
+ if not isinstance(file,str):
1764
+ if not isinstance(file, Collection): error( "'file' must be a string or an interable object. Found type %s", type(file), exception=ValueError)
1765
+ l = len(file)
1252
1766
  if obj is None or isinstance(obj,str) or not isinstance(obj, Collection):
1253
1767
  obj = [ obj ] * l
1254
1768
  else:
1255
- if len(obj) != l: error("'obj' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(obj), l )
1769
+ if len(obj) != l: error("'obj' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(obj), l, exception=ValueError )
1256
1770
  if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1257
1771
  ext = [ ext ] * l
1258
1772
  else:
1259
- if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1773
+ if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l, exception=ValueError )
1260
1774
  ok = True
1261
- for k,o,e in zip(key,obj,ext):
1262
- ok |= self._write( writer, k, o, raiseOnError=raiseOnError, ext=e )
1775
+ for k,o,e in zip(file,obj,ext):
1776
+ ok |= self._write( writer, k, o, raise_on_error=raise_on_error, ext=e )
1263
1777
  return ok
1264
1778
 
1265
- # single key
1266
- if not len(key) > 0: error("'key is empty (the filename)" )
1267
- sub, key = os.path.split(key)
1268
- if len(key) == 0: error("'key '%s' refers to a directory, not a file", key)
1779
+ # single file
1780
+ if not len(file) > 0: error("'file is empty (the filename)" )
1781
+ sub, file = os.path.split(file)
1782
+ if len(file) == 0: error("'file '%s' refers to a directory, not a file", file)
1269
1783
  if len(sub) > 0:
1270
- return SubDir(sub,parent=self)._write(writer,key,obj, raiseOnError=raiseOnError,ext=ext )
1784
+ return SubDir(sub,parent=self)._write(writer,file,obj, raise_on_error=raise_on_error,ext=ext )
1271
1785
 
1272
1786
  # write to temp file, then rename into target file
1273
1787
  # this reduces collision when i/o operations are slow
1274
- fullFileName = self.fullKeyName(key,ext=ext)
1275
- tmp_file = uniqueHash48( [ key, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
1788
+ full_file_name = self.full_file_name(file,ext=ext)
1789
+ tmp_file = unique_hash48( [ file, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
1276
1790
  tmp_i = 0
1277
- fullTmpFile = self.fullKeyName(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
1791
+ fullTmpFile = self.full_file_name(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
1278
1792
  while os.path.exists(fullTmpFile):
1279
- fullTmpFile = self.fullKeyName(tmp_file) + "." + str(tmp_i) + ".tmp"
1793
+ fullTmpFile = self.full_file_name(tmp_file) + "." + str(tmp_i) + ".tmp"
1280
1794
  tmp_i += 1
1281
1795
  if tmp_i >= 10:
1282
- raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % ( fullFileName, fullTmpFile ) )
1796
+ raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % ( full_file_name, fullTmpFile ) )
1283
1797
 
1284
1798
  # write
1285
- if not writer( key, fullTmpFile, obj ):
1799
+ if not writer( file, fullTmpFile, obj ):
1286
1800
  return False
1287
- assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile, fullFileName)
1801
+ assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile, full_file_name)
1288
1802
  try:
1289
- if os.path.exists(fullFileName):
1290
- os.remove(fullFileName)
1291
- os.rename(fullTmpFile, fullFileName)
1803
+ if os.path.exists(full_file_name):
1804
+ os.remove(full_file_name)
1805
+ os.rename(fullTmpFile, full_file_name)
1292
1806
  except Exception as e:
1293
1807
  os.remove(fullTmpFile)
1294
- if raiseOnError:
1808
+ if raise_on_error:
1295
1809
  raise e
1296
1810
  return False
1297
1811
  return True
1298
1812
 
1299
- def write( self, key : str,
1813
+ def write( self, file : str,
1300
1814
  obj,
1301
- raiseOnError : bool = True,
1815
+ raise_on_error : bool = True,
1302
1816
  *,
1303
1817
  version : str = None,
1304
1818
  ext : str = None,
1305
1819
  fmt : Format = None ) -> bool:
1306
1820
  """
1307
- Pickles 'obj' into key.
1308
- -- Supports 'key' containing directories
1309
- -- Supports 'key' being a list.
1310
- In this case, if obj is an iterable it is considered the list of values for the elements of 'keys'
1311
- If 'obj' is not iterable, it will be written into all 'key's
1312
-
1313
- keys = ['file1', 'file2']
1314
-
1315
- sd.write( keys, 1 )
1316
- --> works, writes '1' in both files.
1317
-
1318
- sd.read( keys, [1,2] )
1319
- --> works, writes 1 and 2, respectively
1821
+ Writes an object to file.
1822
+
1823
+ * Supports ``file`` containing directories.
1824
+ * Supports ``file`` being a list.
1825
+ In this case, if ``obj`` is an iterable it is considered the list of values for the elements of ``file``.
1826
+ If ``obj`` is not iterable, it will be written into all files from ``file``::
1320
1827
 
1321
- sd.read( keys, "12" )
1322
- --> works, writes '12' in both files
1828
+ from cdxcore.subdir import SubDir
1323
1829
 
1324
- sd.write( keys, [1] )
1325
- --> produces error as len(keys) != len(obj)
1830
+ keys = ['file1', 'file2']
1831
+ sd = SubDir("!/test")
1832
+ sd.write( keys, 1 ) # works, writes '1' in both files.
1833
+ sd.write( keys, [1,2] ) # works, writes 1 and 2, respectively
1834
+ sd.write( keys, "12" ) # works, writes '12' in both files
1835
+ sd.write( keys, [1] ) # produces error as len(keys) != len(obj)
1326
1836
 
1327
- If the current directory is 'None', then the function throws an EOFError exception
1837
+ If the current directory is ``None``, then the function raises an :class:`EOFError` exception.
1328
1838
 
1329
1839
  Parameters
1330
1840
  ----------
1331
- key : str
1332
- Core filename ("key"), or list thereof
1841
+ file : str
1842
+ Core filename, or list thereof.
1843
+
1333
1844
  obj :
1334
- Object to write, or list thereof if 'key' is a list
1335
- raiseOnError : bool
1336
- If False, this function will return False upon failure
1845
+ Object to write, or list thereof if ``file`` is a list.
1846
+
1847
+ raise_on_error : bool
1848
+ If ``False``, this function will return ``False`` upon failure.
1849
+
1337
1850
  version : str
1338
- If not None, specifies the version of the code which generated 'obj'.
1851
+ If not ``None``, specifies the version of the code which generated ``obj``.
1339
1852
  This version will be written to the beginning of the file.
1853
+
1340
1854
  ext : str
1341
- Extension, or list thereof if 'key' is a list.
1342
- Set to:
1343
- -- None to use directory's default
1344
- -- '*' to use the extension implied by 'fmt'
1345
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1346
- fmt : Format
1347
- File format or None to use the directory's default.
1348
- Note that 'fmt' cannot be a list even if 'key' is.
1349
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
1855
+ Extension, or list thereof if ``file`` is a list.
1856
+
1857
+ * Use ``None`` to use directory's default extension.
1858
+ * Use ``"*"`` to use the extension implied by ``fmt``.
1859
+
1860
+ fmt : :class:`cdxcore.subdir.Format`
1861
+ File format or ``None`` to use the directory's default.
1862
+ Note that ``fmt`` cannot be a list even if ``file`` is.
1863
+ Note that unless ``ext`` or the SubDir's extension is '*',
1864
+ changing the format does not automatically change the extension used.
1350
1865
 
1351
1866
  Returns
1352
1867
  -------
1353
- Boolean to indicate success if raiseOnError is False.
1868
+ Success : bool
1869
+ Boolean to indicate success if ``raise_on_error`` is ``False``.
1354
1870
  """
1355
- ext, fmt = self.autoExtFmt(ext=ext, fmt=fmt)
1871
+ ext, fmt = self.auto_ext_fmt(ext=ext, fmt=fmt)
1356
1872
  version = str(version) if not version is None else None
1357
1873
  assert ext != self.EXT_FMT_AUTO, ("'ext' is '*'...?")
1358
1874
 
1359
1875
  if version=='*': error("You cannot write version '*'. Use None to write a file without version.")
1876
+
1360
1877
  if version is None and fmt in [Format.BLOSC, Format.GZIP]:
1361
- version = ""
1878
+ # blosc and gzip have unexpected side effects
1879
+ # a version is attempted to be read but is not present
1880
+ # (e.g. blosc causes a MemoryError)
1881
+ version = ""
1362
1882
 
1363
- def writer( key, fullFileName, obj ):
1883
+ def writer( file, full_file_name, obj ):
1364
1884
  try:
1365
- if fmt == Format.PICKLE or fmt == Format.BLOSC:
1366
- with open(fullFileName,"wb") as f:
1885
+ if fmt == Format.PICKLE:
1886
+ # only if a version is provided write it into the file
1887
+ with open(full_file_name,"wb") as f:
1367
1888
  # handle version as byte string
1368
1889
  if not version is None:
1369
1890
  version_ = bytearray(version, "utf-8")
@@ -1372,35 +1893,41 @@ class SubDir(object):
1372
1893
  len8[0] = len(version_)
1373
1894
  f.write(len8)
1374
1895
  f.write(version_)
1375
- if fmt == Format.PICKLE:
1376
- pickle.dump(obj,f,-1)
1377
- else:
1378
- assert fmt == fmt.BLOSC, ("Internal error: unknown format", fmt)
1379
- if blosc is None:
1380
- raise ModuleNotFoundError("blosc", "'blosc' not found")
1381
- pdata = pickle.dumps(obj) # returns data as a bytes object
1382
- del obj
1383
- len_data = len(pdata)
1384
- num_blocks = max(0,len_data-1) // BLOSC_MAX_USE + 1
1385
- f.write(num_blocks.to_bytes(2, 'big', signed=False))
1386
- for i in range(num_blocks):
1387
- start = i*BLOSC_MAX_USE
1388
- end = min(len_data,start+BLOSC_MAX_USE)
1389
- assert end>start, ("Internal error; nothing to write")
1390
- block = blosc.compress( pdata[start:end] )
1391
- blockl = len(block)
1392
- f.write( blockl.to_bytes(6, 'big', signed=False) )
1393
- if blockl > 0:
1394
- f.write( block )
1395
- del block
1396
- del pdata
1896
+ pickle.dump(obj,f,-1)
1897
+
1898
+ elif fmt == Format.BLOSC:
1899
+ # only if a version is provided write it into the file
1900
+ with open(full_file_name,"wb") as f:
1901
+ # handle version as byte string
1902
+ if not version is None: # it's never None
1903
+ version_ = bytearray(version, "utf-8")
1904
+ if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
1905
+ len8 = bytearray(1)
1906
+ len8[0] = len(version_)
1907
+ f.write(len8)
1908
+ f.write(version_)
1909
+ pdata = pickle.dumps(obj) # returns data as a bytes object
1910
+ del obj
1911
+ len_data = len(pdata)
1912
+ num_blocks = max(0,len_data-1) // BLOSC_MAX_USE + 1
1913
+ f.write(num_blocks.to_bytes(2, 'big', signed=False))
1914
+ for i in range(num_blocks):
1915
+ start = i*BLOSC_MAX_USE
1916
+ end = min(len_data,start+BLOSC_MAX_USE)
1917
+ assert end>start, ("Internal error; nothing to write")
1918
+ block = blosc.compress( pdata[start:end] )
1919
+ blockl = len(block)
1920
+ f.write( blockl.to_bytes(6, 'big', signed=False) )
1921
+ if blockl > 0:
1922
+ f.write( block )
1923
+ del block
1924
+ del pdata
1397
1925
 
1398
1926
  elif fmt == Format.GZIP:
1399
- if gzip is None:
1400
- raise ModuleNotFoundError("gzip", "'gzip' not found")
1401
- with gzip.open(fullFileName,"wb") as f:
1927
+ # only if a version is provided write it into the file
1928
+ with gzip.open(full_file_name,"wb") as f:
1402
1929
  # handle version as byte string
1403
- if not version is None:
1930
+ if not version is None: # it's never None
1404
1931
  version_ = bytearray(version, "utf-8")
1405
1932
  if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
1406
1933
  len8 = bytearray(1)
@@ -1410,12 +1937,12 @@ class SubDir(object):
1410
1937
  pickle.dump(obj,f,-1)
1411
1938
 
1412
1939
  elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
1413
- with open(fullFileName,"wt",encoding="utf-8") as f:
1940
+ # only if a version is provided write it into the file
1941
+ with open(full_file_name,"wt",encoding="utf-8") as f:
1414
1942
  if not version is None:
1415
1943
  f.write("# " + version + "\n")
1416
1944
  if fmt == Format.JSON_PICKLE:
1417
- if jsonpickle is None:
1418
- raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found")
1945
+ jsonpickle = _import_jsonpickle()
1419
1946
  f.write( jsonpickle.encode(obj) )
1420
1947
  else:
1421
1948
  assert fmt == Format.JSON_PLAIN, ("Internal error: invalid Format", fmt)
@@ -1424,27 +1951,21 @@ class SubDir(object):
1424
1951
  else:
1425
1952
  raise NotImplementedError(fmt, txtfmt("Internal error: invalid format '%s'", fmt))
1426
1953
  except Exception as e:
1427
- if raiseOnError:
1954
+ if raise_on_error:
1428
1955
  raise e
1429
1956
  return False
1430
1957
  return True
1431
- return self._write( writer=writer, key=key, obj=obj, raiseOnError=raiseOnError, ext=ext )
1432
-
1433
- set = write
1958
+ return self._write( writer=writer, file=file, obj=obj, raise_on_error=raise_on_error, ext=ext )
1434
1959
 
1435
- def writeString( self, key : str, line : str, raiseOnError : bool = True, *, ext : str = None ) -> bool:
1960
+ def write_string( self, file : str, line : str, raise_on_error : bool = True, *, ext : str = None ) -> bool:
1436
1961
  """
1437
- Writes 'line' into key. A trailing EOL will not be read back
1438
- -- Supports 'key' containing directories
1439
- -- Supports 'key' being a list.
1440
- In this case, line can either be the same value for all key's or a list, too.
1441
-
1442
- If the current directory is 'None', then the function throws an EOFError exception
1443
- See additional comments for write()
1962
+ Writes a line of text into a file.
1444
1963
 
1445
- Use 'ext' to specify the extension.
1446
- You cannot use 'ext' to specify a format as the format is plain text.
1447
- If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
1964
+ * Supports ``file``` containing directories.
1965
+ * Supports ``file``` being a list.
1966
+ In this case, ``line`` can either be the same value for all file's or a list, too.
1967
+
1968
+ If the current directory is ``None``, then the function throws an EOFError exception
1448
1969
  """
1449
1970
  verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext, exception=ValueError )
1450
1971
  ext = ext if not ext is None else self._ext
@@ -1452,38 +1973,37 @@ class SubDir(object):
1452
1973
 
1453
1974
  if len(line) == 0 or line[-1] != '\n':
1454
1975
  line += '\n'
1455
- def writer( key, fullFileName, obj ):
1976
+ def writer( file, full_file_name, obj ):
1456
1977
  try:
1457
- with open(fullFileName,"wt",encoding="utf-8") as f:
1978
+ with open(full_file_name,"wt",encoding="utf-8") as f:
1458
1979
  f.write(obj)
1459
1980
  except Exception as e:
1460
- if raiseOnError:
1981
+ if raise_on_error:
1461
1982
  raise e
1462
1983
  return False
1463
1984
  return True
1464
- return self._write( writer=writer, key=key, obj=line, raiseOnError=raiseOnError, ext=ext )
1985
+ return self._write( writer=writer, file=file, obj=line, raise_on_error=raise_on_error, ext=ext )
1465
1986
 
1466
1987
  # -- iterate --
1467
1988
 
1468
1989
  def files(self, *, ext : str = None) -> list:
1469
1990
  """
1470
- Returns a list of keys in this subdirectory with the current extension, or the specified extension.
1991
+ Returns a list of files in this subdirectory with the current extension, or the specified extension.
1471
1992
 
1472
1993
  In other words, if the extension is ".pck", and the files are "file1.pck", "file2.pck", "file3.bin"
1473
1994
  then this function will return [ "file1", "file2" ]
1474
1995
 
1475
- If 'ext' is
1476
- -- None, the directory's default extension will be used
1477
- -- "" then this function will return all files in this directory.
1478
- -- a Format, then the default extension of the format will be used.
1479
-
1480
- This function ignores directories. Use subDirs() to retrieve those.
1996
+ If ``ext`` is:
1997
+
1998
+ * ``None``, then the directory's default extension will be used.
1999
+ * ``""`` then this function will return all files in this directory.
2000
+ * ``"*"`` then the extension corresponding to the current format will be used.
1481
2001
 
1482
- [This function has an alias 'keys']
2002
+ This function ignores directories. Use :meth:`cdxcore.subdir.SubDir.sub_dirs` to retrieve those.
1483
2003
  """
1484
- if not self.pathExists():
2004
+ if not self.path_exists():
1485
2005
  return []
1486
- ext = self.autoExt( ext=ext )
2006
+ ext = self.auto_ext( ext )
1487
2007
  ext_l = len(ext)
1488
2008
  keys = []
1489
2009
  with os.scandir(self._path) as it:
@@ -1497,15 +2017,15 @@ class SubDir(object):
1497
2017
  else:
1498
2018
  keys.append( entry.name )
1499
2019
  return keys
1500
- keys = files
1501
2020
 
1502
- def subDirs(self) -> list:
2021
+ def sub_dirs(self) -> list:
1503
2022
  """
1504
- Returns a list of all sub directories
1505
- If self does not refer to an existing directory, then this function returns an empty list.
2023
+ Retrieve a list of all sub directories.
2024
+
2025
+ If ``self`` does not refer to an existing directory, then this function returns an empty list.
1506
2026
  """
1507
2027
  # do not do anything if the object was deleted
1508
- if not self.pathExists():
2028
+ if not self.path_exists():
1509
2029
  return []
1510
2030
  subdirs = []
1511
2031
  with os.scandir(self._path[:-1]) as it:
@@ -1517,322 +2037,345 @@ class SubDir(object):
1517
2037
 
1518
2038
  # -- delete --
1519
2039
 
1520
- def delete( self, key : str, raiseOnError: bool = False, *, ext : str = None ):
2040
+ def delete( self, file : str, raise_on_error: bool = False, *, ext : str = None ):
1521
2041
  """
1522
- Deletes 'key'; 'key' might be a list.
2042
+ Deletes ``file``.
2043
+
2044
+ This function will quietly fail if ``file`` does not exist unless ``raise_on_error``
2045
+ is set to ``True``.
1523
2046
 
1524
2047
  Parameters
1525
2048
  ----------
1526
- key :
2049
+ file :
1527
2050
  filename, or list of filenames
1528
- raiseOnError :
1529
- if False, do not throw KeyError if file does not exist.
1530
- ext :
1531
- Extension, or list thereof if 'key' is an extension.
2051
+
2052
+ raise_on_error : bool
2053
+ If ``False``, do not throw :class:`KeyError` if file does not exist
2054
+ or another error occurs.
2055
+
2056
+ ext : str
2057
+ Extension, or list thereof if ``file`` is a list.
2058
+
1532
2059
  Use
1533
- -- None for the directory default
1534
- -- "" to not use an automatic extension.
1535
- -- A Format to specify the default extension for that format.
2060
+
2061
+ * ``None`` for the directory default.
2062
+ * ``""`` to not use an automatic extension.
2063
+ * ``"*"`` to use the extension associated with the format of the directory.
1536
2064
  """
1537
2065
  # do not do anything if the object was deleted
1538
2066
  if self._path is None:
1539
- if raiseOnError: raise EOFError("Cannot delete '%s': current directory not specified" % key)
2067
+ if raise_on_error: raise EOFError("Cannot delete '%s': current directory not specified" % file)
1540
2068
  return
1541
2069
 
1542
2070
  # vector version
1543
- if not isinstance(key,str):
1544
- if not isinstance(key, Collection): error( "'key' must be a string or an interable object. Found type %s", type(key))
1545
- l = len(key)
2071
+ if not isinstance(file,str):
2072
+ if not isinstance(file, Collection): error( "'file' must be a string or an interable object. Found type %s", type(file))
2073
+ l = len(file)
1546
2074
  if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1547
2075
  ext = [ ext ] * l
1548
2076
  else:
1549
- if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1550
- for k, e in zip(key,ext):
1551
- self.delete(k, raiseOnError=raiseOnError, ext=e)
2077
+ if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
2078
+ for k, e in zip(file,ext):
2079
+ self.delete(k, raise_on_error=raise_on_error, ext=e)
1552
2080
  return
1553
2081
 
1554
- # handle directories in 'key'
1555
- if len(key) == 0: error( "'key' is empty" )
1556
- sub, key_ = os.path.split(key)
1557
- if len(key_) == 0: error("'key' %s indicates a directory, not a file", key)
1558
- if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,raiseOnError=raiseOnError,ext=ext)
2082
+ # handle directories in 'file'
2083
+ if len(file) == 0: error( "'file' is empty" )
2084
+ sub, key_ = os.path.split(file)
2085
+ if len(key_) == 0: error("'file' %s indicates a directory, not a file", file)
2086
+ if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,raise_on_error=raise_on_error,ext=ext)
1559
2087
  # don't try if directory doesn't existy
1560
- if not self.pathExists():
1561
- if raiseOnError:
1562
- raise KeyError(key)
2088
+ if not self.path_exists():
2089
+ if raise_on_error:
2090
+ raise KeyError(file)
1563
2091
  return
1564
- fullFileName = self.fullKeyName(key, ext=ext)
1565
- if not os.path.exists(fullFileName):
1566
- if raiseOnError:
1567
- raise KeyError(key)
2092
+ full_file_name = self.full_file_name(file, ext=ext)
2093
+ if not os.path.exists(full_file_name):
2094
+ if raise_on_error:
2095
+ raise KeyError(file)
1568
2096
  else:
1569
- os.remove(fullFileName)
2097
+ os.remove(full_file_name)
1570
2098
 
1571
- def deleteAllKeys( self, raiseOnError : bool = False, *, ext : str = None ):
2099
+ def delete_all_files( self, raise_on_error : bool = False, *, ext : str = None ):
1572
2100
  """
1573
2101
  Deletes all valid keys in this sub directory with the correct extension.
1574
2102
 
1575
2103
  Parameters
1576
2104
  ----------
1577
- key :
1578
- filename, or list of filenames
1579
- raiseOnError :
1580
- if False, do not throw KeyError if file does not exist.
1581
- ext :
1582
- File extension to match.
1583
- Use
1584
- -- None for the directory default
1585
- -- "" to match all files regardless of extension.
1586
- -- A Format to specify the default extension for that format.
2105
+ raise_on_error : bool
2106
+ Set to ``False`` to quietly ignore errors.
2107
+
2108
+ ext : str
2109
+ Extension to be used:
2110
+
2111
+ * ``None`` for the directory default.
2112
+ * ``""`` to not use an automatic extension.
2113
+ * ``"*"`` to use the extension associated with the format of the directory.
1587
2114
  """
1588
2115
  if self._path is None:
1589
- if raiseOnError: raise EOFError("Cannot delete all files: current directory not specified")
2116
+ if raise_on_error: raise EOFError("Cannot delete all files: current directory not specified")
1590
2117
  return
1591
- if not self.pathExists():
2118
+ if not self.path_exists():
1592
2119
  return
1593
- self.delete( self.keys(ext=ext), raiseOnError=raiseOnError, ext=ext )
2120
+ self.delete( self.files(ext=ext), raise_on_error=raise_on_error, ext=ext )
1594
2121
 
1595
- def deleteAllContent( self, deleteSelf : bool = False, raiseOnError : bool = False, *, ext : str = None ):
2122
+ def delete_all_content( self, delete_self : bool = False, raise_on_error : bool = False, *, ext : str = None ):
1596
2123
  """
1597
2124
  Deletes all valid keys and subdirectories in this sub directory.
2125
+
1598
2126
  Does not delete files with other extensions.
1599
- Use eraseEverything() if the aim is to delete everything.
2127
+ Use :meth:`cdxcore.subdir.SubDir.delete_everything` if the aim is to delete, well, everything.
1600
2128
 
1601
2129
  Parameters
1602
2130
  ----------
1603
- deleteSelf:
1604
- whether to delete the directory or only its contents
1605
- raiseOnError:
1606
- False for silent failure
1607
- ext:
1608
- Extension for keys, or None for the directory's default.
1609
- You can also provide a Format for 'ext'.
1610
- Use "" to match all files regardless of extension.
2131
+ delete_self: bool
2132
+ Whether to delete the directory itself as well, or only its contents.
2133
+ raise_on_error: bool
2134
+ ``False`` for silent failure
2135
+ ext: str
2136
+ Extension for keys, or ``None`` for the directory's default.
2137
+ Use ``""`` to match all files regardless of extension.
1611
2138
  """
1612
2139
  # do not do anything if the object was deleted
1613
2140
  if self._path is None:
1614
- if raiseOnError: raise EOFError("Cannot delete all contents: current directory not specified")
2141
+ if raise_on_error: raise EOFError("Cannot delete all contents: current directory not specified")
1615
2142
  return
1616
- if not self.pathExists():
2143
+ if not self.path_exists():
1617
2144
  return
1618
2145
  # delete sub directories
1619
- subdirs = self.subDirs();
2146
+ subdirs = self.sub_dirs();
1620
2147
  for subdir in subdirs:
1621
- SubDir(subdir, parent=self).deleteAllContent( deleteSelf=True, raiseOnError=raiseOnError, ext=ext )
2148
+ SubDir(subdir, parent=self).delete_all_content( delete_self=True, raise_on_error=raise_on_error, ext=ext )
1622
2149
  # delete keys
1623
- self.deleteAllKeys( raiseOnError=raiseOnError,ext=ext )
2150
+ self.delete_all_files( raise_on_error=raise_on_error,ext=ext )
1624
2151
  # delete myself
1625
- if not deleteSelf:
2152
+ if not delete_self:
1626
2153
  return
1627
2154
  rest = list( os.scandir(self._path[:-1]) )
1628
2155
  txt = str(rest)
1629
2156
  txt = txt if len(txt) < 50 else (txt[:47] + '...')
1630
2157
  if len(rest) > 0:
1631
- if raiseOnError: error( "Cannot delete my own directory %s: directory not empty: found %ld object(s): %s", self._path,len(rest), txt)
2158
+ if raise_on_error: error( "Cannot delete my own directory %s: directory not empty: found %ld object(s): %s", self._path,len(rest), txt)
1632
2159
  return
1633
2160
  os.rmdir(self._path[:-1]) ## does not work ????
1634
2161
  self._path = None
1635
2162
 
1636
- def eraseEverything( self, keepDirectory : bool = True ):
2163
+ def delete_everything( self, keep_directory : bool = True ):
1637
2164
  """
1638
- Deletes the entire sub directory will all contents
1639
- WARNING: deletes ALL files, not just those with the present extension.
1640
- Will keep the subdir itself by default.
1641
- If not, it will invalidate 'self._path'
1642
-
1643
- If self is None, do nothing. That means you can call this function several times.
2165
+ Deletes the entire sub directory will all contents.
2166
+
2167
+ *WARNING:* deletes *all* files and sub-directories, not just those with the present extension.
2168
+ If ``keep_directory`` is ``False``, the directory referred to by this object will also be deleted.
2169
+ In this case, ``self`` will be set to ``None``.
1644
2170
  """
1645
2171
  if self._path is None:
1646
2172
  return
1647
- if not self.pathExists():
2173
+ if not self.path_exists():
1648
2174
  return
1649
2175
  shutil.rmtree(self._path[:-1], ignore_errors=True)
1650
- if not keepDirectory and os.path.exists(self._path[:-1]):
2176
+ if not keep_directory and os.path.exists(self._path[:-1]):
1651
2177
  os.rmdir(self._path[:-1])
1652
2178
  self._path = None
1653
- elif keepDirectory and not os.path.exists(self._path[:-1]):
2179
+ elif keep_directory and not os.path.exists(self._path[:-1]):
1654
2180
  os.makedirs(self._path[:-1])
1655
2181
 
1656
2182
  # -- file ops --
1657
2183
 
1658
- def exists(self, key : str, *, ext : str = None ) -> bool:
2184
+ def exists(self, file : str, *, ext : str = None ) -> bool:
1659
2185
  """
1660
- Checks whether 'key' exists. Works with iterables
2186
+ Checks whether a file exists.
1661
2187
 
1662
2188
  Parameters
1663
2189
  ----------
1664
- key :
1665
- filename, or list of filenames
1666
- ext :
1667
- Extension, or list thereof if 'key' is an extension.
2190
+ file :
2191
+ Filename, or list of filenames.
2192
+
2193
+ ext : str
2194
+ Extension, or list thereof if ``file`` is a list.
2195
+
1668
2196
  Use
1669
- -- None for the directory default
1670
- -- "" for no automatic extension
1671
- -- A Format to specify the default extension for that format.
2197
+
2198
+ * ``None`` for the directory default.
2199
+ * ``""`` to not use an automatic extension.
2200
+ * ``"*"`` to use the extension associated with the format of the directory.
1672
2201
 
1673
2202
  Returns
1674
2203
  -------
1675
- If 'key' is a string, returns True or False, else it will return a list of bools.
2204
+ Status : bool
2205
+ If ``file`` is a string, returns ``True`` or ``False``, else it will return a list of ``bool`` values.
1676
2206
  """
1677
2207
  # vector version
1678
- if not isinstance(key,str):
1679
- verify( isinstance(key, Collection), "'key' must be a string or an interable object. Found type %s", type(key))
1680
- l = len(key)
2208
+ if not isinstance(file,str):
2209
+ verify( isinstance(file, Collection), "'file' must be a string or an interable object. Found type %s", type(file))
2210
+ l = len(file)
1681
2211
  if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1682
2212
  ext = [ ext ] * l
1683
2213
  else:
1684
- if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1685
- return [ self.exists(k,ext=e) for k,e in zip(key,ext) ]
2214
+ if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
2215
+ return [ self.exists(k,ext=e) for k,e in zip(file,ext) ]
1686
2216
  # empty directory
1687
2217
  if self._path is None:
1688
2218
  return False
1689
- # handle directories in 'key'
1690
- if len(key) == 0: raise ValueError("'key' missing (the filename)")
1691
- sub, key_ = os.path.split(key)
1692
- if len(key_) == 0: raise IsADirectoryError( key, txtfmt("'key' %s indicates a directory, not a file", key) )
2219
+ # handle directories in 'file'
2220
+ if len(file) == 0: raise ValueError("'file' missing (the filename)")
2221
+ sub, key_ = os.path.split(file)
2222
+ if len(key_) == 0: raise IsADirectoryError( file, txtfmt("'file' %s indicates a directory, not a file", file) )
1693
2223
  if len(sub) > 0:
1694
- return self(sub).exists(key=key_,ext=ext)
2224
+ return self(sub).exists(file=key_,ext=ext)
1695
2225
  # if directory doesn't exit
1696
- if not self.pathExists():
2226
+ if not self.path_exists():
1697
2227
  return False
1698
- # single key
1699
- fullFileName = self.fullKeyName(key, ext=ext)
1700
- if not os.path.exists(fullFileName):
2228
+ # single file
2229
+ full_file_name = self.full_file_name(file, ext=ext)
2230
+ if not os.path.exists(full_file_name):
1701
2231
  return False
1702
- if not os.path.isfile(fullFileName):
1703
- raise IsADirectoryError("Structural error: key %s: exists, but is not a file (full path %s)",key,fullFileName)
2232
+ if not os.path.isfile(full_file_name):
2233
+ raise IsADirectoryError("Structural error: file %s: exists, but is not a file (full path %s)",file,full_file_name)
1704
2234
  return True
1705
2235
 
1706
- def _getFileProperty( self, *, key : str, ext : str, func ):
2236
+ def _getFileProperty( self, *, file : str, ext : str, func ):
1707
2237
  # vector version
1708
- if not isinstance(key,str):
1709
- verify( isinstance(key, Collection), "'key' must be a string or an interable object. Found type %s", type(key))
1710
- l = len(key)
2238
+ if not isinstance(file,str):
2239
+ verify( isinstance(file, Collection), "'file' must be a string or an interable object. Found type %s", type(file))
2240
+ l = len(file)
1711
2241
  if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
1712
2242
  ext = [ ext ] * l
1713
2243
  else:
1714
- if len(ext) != l: error("'ext' must have same lengths as 'key' if the latter is a collection; found %ld and %ld", len(ext), l )
1715
- return [ self._getFileProperty(key=k,ext=e,func=func) for k,e in zip(key,ext) ]
2244
+ if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
2245
+ return [ self._getFileProperty(file=k,ext=e,func=func) for k,e in zip(file,ext) ]
1716
2246
  # empty directory
1717
2247
  if self._path is None:
1718
2248
  return None
1719
- # handle directories in 'key'
1720
- if len(key) == 0: raise ValueError("'key' missing (the filename)")
1721
- sub, key_ = os.path.split(key)
1722
- if len(key_) == 0: raise IsADirectoryError( key, txtfmt("'key' %s indicates a directory, not a file", key) )
1723
- if len(sub) > 0: return self(sub)._getFileProperty(key=key_,ext=ext,func=func)
2249
+ # handle directories in 'file'
2250
+ if len(file) == 0: raise ValueError("'file' missing (the filename)")
2251
+ sub, key_ = os.path.split(file)
2252
+ if len(key_) == 0: raise IsADirectoryError( file, txtfmt("'file' %s indicates a directory, not a file", file) )
2253
+ if len(sub) > 0: return self(sub)._getFileProperty(file=key_,ext=ext,func=func)
1724
2254
  # if directory doesn't exit
1725
- if not self.pathExists():
2255
+ if not self.path_exists():
1726
2256
  return None
1727
- # single key
1728
- fullFileName = self.fullKeyName(key, ext=ext)
1729
- if not os.path.exists(fullFileName):
2257
+ # single file
2258
+ full_file_name = self.full_file_name(file, ext=ext)
2259
+ if not os.path.exists(full_file_name):
1730
2260
  return None
1731
- return func(fullFileName)
2261
+ return func(full_file_name)
1732
2262
 
1733
- def getCreationTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
2263
+ def get_creation_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
1734
2264
  """
1735
- Returns the creation time of 'key', or None if file was not found.
1736
- See comments on os.path.getctime() for compatibility
2265
+ Returns the creation time of a file.
2266
+
2267
+ See comments on :func:`os.path.getctime` for system compatibility information.
1737
2268
 
1738
2269
  Parameters
1739
2270
  ----------
1740
- key :
1741
- filename, or list of filenames
2271
+ file :
2272
+ filename, or list of filenames.
1742
2273
  ext :
1743
- Extension, or list thereof if 'key' is an extension.
1744
- Use
1745
- -- None for the directory default
1746
- -- "" for no automatic extension
1747
- -- A Format to specify the default extension for that format.
2274
+ Extension, or list thereof if ``file`` is an extension.
2275
+ Use:
2276
+
2277
+ * ``None`` for the directory default.
2278
+ * ``""`` for no automatic extension.
2279
+ * A :class:`cdxcore.subdir.Format` to use the default extension for that format.
1748
2280
 
1749
2281
  Returns
1750
2282
  -------
1751
- datetime.datetime if 'key' is a string, otherwise a list of datetime's
2283
+ Datetime : :class:`datetime.datetime`
2284
+ A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
2285
+ Returns ``None`` if an error occured.
1752
2286
  """
1753
- return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getctime(x)) )
2287
+ return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getctime(x)) )
1754
2288
 
1755
- def getLastModificationTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
2289
+ def get_last_modification_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
1756
2290
  """
1757
- Returns the last modification time of 'key', or None if file was not found.
1758
- See comments on os.path.getmtime() for compatibility
2291
+ Returns the last modification time a file.
2292
+
2293
+ See comments on :func:`os.path.getmtime` for system compatibility information.
1759
2294
 
1760
2295
  Parameters
1761
2296
  ----------
1762
- key :
1763
- filename, or list of filenames
2297
+ file :
2298
+ filename, or list of filenames.
1764
2299
  ext :
1765
- Extension, or list thereof if 'key' is an extension.
1766
- Use
1767
- -- None for the directory default
1768
- -- "" for no automatic extension
1769
- -- A Format to specify the default extension for that format.
2300
+ Extension, or list thereof if ``file`` is an extension.
2301
+ Use:
2302
+
2303
+ * ``None`` for the directory default.
2304
+ * ``""`` for no automatic extension.
2305
+ * A :class:`cdxcore.subdir.Format` to use the default extension for that format.
1770
2306
 
1771
2307
  Returns
1772
2308
  -------
1773
- datetime.datetime if 'key' is a string, otherwise a list of datetime's
2309
+ Datetime : :class:`datetime.datetime`
2310
+ A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
2311
+ Returns ``None`` if an error occured.
1774
2312
  """
1775
- return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getmtime(x)) )
2313
+ return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getmtime(x)) )
1776
2314
 
1777
- def getLastAccessTime( self, key : str, *, ext : str = None ) -> datetime.datetime:
2315
+ def get_last_access_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
1778
2316
  """
1779
- Returns the last access time of 'key', or None if file was not found.
1780
- See comments on os.path.getatime() for compatibility
2317
+ Returns the last access time of a file.
2318
+
2319
+ See comments on :func:`os.path.getatime` for system compatibility information.
1781
2320
 
1782
2321
  Parameters
1783
2322
  ----------
1784
- key :
1785
- filename, or list of filenames
1786
- ext :
1787
- Extension, or list thereof if 'key' is an extension.
1788
- Use
1789
- -- None for the directory default
1790
- -- "" for no automatic extension
1791
- -- A Format to specify the default extension for that format.
2323
+ file : str
2324
+ Filename, or list of filenames.
2325
+
2326
+ ext : str
2327
+ Extension, or list thereof if ``file`` is an extension.
2328
+
2329
+ * Use ``None`` for the directory default.
2330
+ * Use ``""`` for no automatic extension.
1792
2331
 
1793
2332
  Returns
1794
2333
  -------
1795
- datetime.datetime if 'key' is a string, otherwise a list of datetime's
2334
+ Datetime : :class:`datetime.datetime`
2335
+ A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
2336
+ Returns ``None`` if an error occured.
1796
2337
  """
1797
- return self._getFileProperty( key=key, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getatime(x)) )
2338
+ return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getatime(x)) )
1798
2339
 
1799
- def getFileSize( self, key : str, *, ext : str = None ) -> int:
2340
+ def file_size( self, file : str, *, ext : str = None ) -> int:
1800
2341
  """
1801
- Returns the file size of 'key', or None if file was not found.
1802
- See comments on os.path.getatime() for compatibility
2342
+ Returns the file size of a file.
2343
+
2344
+ See comments on :func:`os.path.getatime` for system compatibility information.
1803
2345
 
1804
2346
  Parameters
1805
2347
  ----------
1806
- key :
1807
- filename, or list of filenames
1808
- ext :
1809
- Extension, or list thereof if 'key' is an extension.
1810
- Use
1811
- -- None for the directory default
1812
- -- "" for no automatic extension
1813
- -- A Format to specify the default extension for that format.
2348
+ file : str
2349
+ Filename, or list of filenames.
2350
+
2351
+ ext : str
2352
+ Extension, or list thereof if ``file`` is an extension.
2353
+
2354
+ * Use ``None`` for the directory default.
2355
+ * Use ``""`` for no automatic extension.
1814
2356
 
1815
2357
  Returns
1816
2358
  -------
1817
- File size if 'key' is a string, otherwise a list thereof.
2359
+ File size if ``file``, or ``None`` if an error occured.
1818
2360
  """
1819
- return self._getFileProperty( key=key, ext=ext, func=lambda x : os.path.getsize(x) )
2361
+ return self._getFileProperty( file=file, ext=ext, func=lambda x : os.path.getsize(x) )
1820
2362
 
1821
2363
  def rename( self, source : str, target : str, *, ext : str = None ):
1822
2364
  """
1823
- Rename "source" key into "target" key.
1824
- Function will raise an exception if not successful
2365
+ Rename a file.
2366
+
2367
+ This function will raise an exception if not successful.
1825
2368
 
1826
2369
  Parameters
1827
2370
  ----------
1828
- source, target:
1829
- filenames
1830
- ext :
1831
- Extension, or list thereof if 'key' is an extension.
1832
- Use
1833
- -- None for the directory default
1834
- -- "" for no automatic extensions.
1835
- -- A Format to specify the default extension for that format.
2371
+ source, target : str
2372
+ Filenames.
2373
+
2374
+ ext : str
2375
+ Extension.
2376
+
2377
+ * Use ``None`` for the directory default.
2378
+ * Use ``""`` for no automatic extension.
1836
2379
  """
1837
2380
  # empty directory
1838
2381
  if self._path is None:
@@ -1843,9 +2386,9 @@ class SubDir(object):
1843
2386
  sub, source_ = os.path.split(source)
1844
2387
  if len(source_) == 0: raise IsADirectoryError( source, txtfmt("'source' %s indicates a directory, not a file", source ))
1845
2388
  if len(sub) > 0:
1846
- src_full = self(sub).fullKeyName(key=source_,ext=ext)
2389
+ src_full = self(sub).full_file_name(file=source_,ext=ext)
1847
2390
  else:
1848
- src_full = self.fullKeyName( source, ext=ext )
2391
+ src_full = self.full_file_name( source, ext=ext )
1849
2392
 
1850
2393
  # handle directories in 'target'
1851
2394
  if len(target) == 0: raise ValueError("'target' missing (the filename)" )
@@ -1853,191 +2396,257 @@ class SubDir(object):
1853
2396
  if len(target_) == 0: raise IsADirectoryError( target, txtfmt("'target' %s indicates a directory, not a file", target))
1854
2397
  if len(sub) > 0:
1855
2398
  tar_dir = self(sub)
1856
- tar_dir.createDirectory()
1857
- tar_full = tar_dir.fullKeyName(key=target_,ext=ext)
2399
+ tar_dir.create_directory()
2400
+ tar_full = tar_dir.full_file_name(file=target_,ext=ext)
1858
2401
  else:
1859
- tar_full = self.fullKeyName( target, ext=ext )
1860
- self.createDirectory()
2402
+ tar_full = self.full_file_name( target, ext=ext )
2403
+ self.create_directory()
1861
2404
 
1862
2405
  os.rename(src_full, tar_full)
1863
2406
 
1864
2407
  # utilities
1865
2408
 
1866
2409
  @staticmethod
1867
- def removeBadKeyCharacters( key:str, by:str=' ' ) -> str:
2410
+ def remove_bad_file_characters( file : str, by : str="default" ) -> str:
1868
2411
  """
1869
- Replaces invalid characters in a filename by 'by'.
1870
- See util.fmt_filename() for documentation and further options.
2412
+ Replaces invalid characters in a filename using the map ``by``.
2413
+
2414
+ See :func:`cdxcore.util.fmt_filename` for documentation and further options.
1871
2415
  """
1872
- return fmt_filename( key, by=by )
1873
-
1874
- def unqiueLabelToKey( self, unique_label:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
1875
- """
1876
- Converts a unique label which might contain invalid characters into a unique file name, such that the full file name does not exceed 'max_length' bytes.
1877
- The returned key has the format
1878
- name + separator + ID
1879
- where ID has length id_length.
1880
- If unique_label is already guaranteed to be a valid filename, use unqiueLongFileNameToKey() instead.
1881
- """
1882
- len_ext = len(self.ext)
1883
- assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
1884
- uqf = uniqueLabelExt( max_length=max_length-len_ext, id_length=id_length, separator=separator, filename_by="default" )
1885
- return uqf( unique_label )
2416
+ return fmt_filename( file, by=by )
1886
2417
 
1887
- def unqiueLongFileNameToKey( self, unique_filename:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
1888
- """
1889
- Converts a unique filename which might be too long to a unique filename such that the total length plus 'ext' does not exceed 'max_length' bytes.
1890
- If the filename is already short enough, no change is made.
1891
-
1892
- If 'unique_filename' is not guaranteed to be a valid filename, use unqiueLabelToKey() instead.
1893
- """
1894
- len_ext = len(self.ext)
1895
- assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
1896
- uqf = uniqueLabelExt( max_length=max_length-len_ext, id_length=id_length, separator=separator )
1897
- return uqf( unique_filename )
2418
+ if False:
2419
+ def unqiueLabelToKey( self, unique_label:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
2420
+ """
2421
+ Converts a unique label which might contain invalid characters into a unique file name, such that the full file name does not exceed 'max_length' bytes.
2422
+ The returned file has the format
2423
+ name + separator + ID
2424
+ where ID has length id_length.
2425
+ If unique_label is already guaranteed to be a valid filename, use unqiueLongFileNameToKey() instead.
2426
+ """
2427
+ len_ext = len(self.ext)
2428
+ assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
2429
+ uqf = UniqueLabel( max_length=max_length-len_ext, id_length=id_length, separator=separator, filename_by="default" )
2430
+ return uqf( unique_label )
2431
+
2432
+ def unqiueLongFileNameToKey( self, unique_filename:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
2433
+ """
2434
+ Converts a unique filename which might be too long to a unique filename such that the total length plus 'ext' does not exceed 'max_length' bytes.
2435
+ If the filename is already short enough, no change is made.
2436
+
2437
+ If 'unique_filename' is not guaranteed to be a valid filename, use unqiueLabelToKey() instead.
2438
+ """
2439
+ len_ext = len(self.ext)
2440
+ assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
2441
+ uqf = UniqueLabel( max_length=max_length-len_ext, id_length=id_length, separator=separator )
2442
+ return uqf( unique_filename )
1898
2443
 
1899
2444
  # -- dict-like interface --
1900
2445
 
1901
- def __call__(self, keyOrSub : str,
2446
+ def __call__(self, element : str,
1902
2447
  default = RETURN_SUB_DIRECTORY,
1903
- raiseOnError : bool = False,
2448
+ raise_on_error : bool = False,
1904
2449
  *,
1905
2450
  version : str = None,
1906
2451
  ext : str = None,
1907
2452
  fmt : Format = None,
1908
2453
  delete_wrong_version : bool = True,
1909
- createDirectory : bool = None ):
2454
+ create_directory : bool = None ):
1910
2455
  """
1911
- Return either the value of a sub-key (file), or return a new sub directory.
1912
- If only one argument is used, then this function returns a new sub directory.
1913
- If two arguments are used, then this function returns read( keyOrSub, default ).
2456
+ Read either data from a file, or return a new sub directory.
2457
+
2458
+ If only the ``element`` argument is used, then this function returns a new sub directory
2459
+ named ``element``.
2460
+
2461
+ If both ``element`` and ``default`` arguments are used, then this function attempts to read the file ``element``
2462
+ from disk, returning ``default`` if it does not exist.
1914
2463
 
1915
- sd = SubDir("!/test")
2464
+ Assume we have a subdirectory ``sd``::
2465
+
2466
+ from cdxcore.subdir import SubDir
2467
+ sd = SubDir("!/test")
1916
2468
 
1917
- Member access:
1918
- x = sd('x', None) reads 'x' with default value None
1919
- x = sd('sd/x', default=1) reads 'x' from sub directory 'sd' with default value 1
1920
- x = sd('x', default=1, ext="tmp") reads 'x.tmp' from sub directory 'sd' with default value 1
2469
+ Reading files::
2470
+
2471
+ x = sd('file', None) # reads 'file' with default value None
2472
+ x = sd('sd/file', default=1) # reads 'file' from sub directory 'sd' with default value 1
2473
+ x = sd('file', default=1, ext="tmp") # reads 'file.tmp' with default value 1
1921
2474
 
1922
- Create sub directory:
1923
- sd2 = sd("subdir") creates and returns handle to subdirectory 'subdir'
1924
- sd2 = sd("subdir1/subdir2") creates and returns handle to subdirectory 'subdir1/subdir2'
1925
- sd2 = sd("subdir1/subdir2", ext=".tmp") creates and returns handle to subdirectory 'subdir1/subdir2' with extension "tmp"
1926
- sd2 = sd(ext=".tmp") returns handle to current subdirectory with extension "tmp"
2475
+ Create sub directory::
2476
+
2477
+ sd2 = sd("subdir") # creates and returns handle to subdirectory 'subdir'
2478
+ sd2 = sd("subdir1/subdir2") # creates and returns handle to subdirectory 'subdir1/subdir2'
2479
+ sd2 = sd("subdir1/subdir2", ext=".tmp") # creates and returns handle to subdirectory 'subdir1/subdir2' with extension "tmp"
2480
+ sd2 = sd(ext=".tmp") # returns handle to current subdirectory with extension "tmp"
1927
2481
 
1928
2482
  Parameters
1929
2483
  ----------
1930
- keyOrSub : str
1931
- identify the object requested. Should be a string or a list of strings.
1932
- default:
1933
- If specified, this function reads 'keyOrSub' with read( keyOrSub, default, *args, **kwargs )
1934
- If not specified, then this function calls SubDir(keyOrSub,parent=self,ext=ext,fmt=fmt)
2484
+ element : str
2485
+ File or directory name, or a list thereof.
2486
+
2487
+ default : optional
2488
+ If specified, this function reads ``element`` with
2489
+ ``read( element, default, *args, **kwargs )``.
1935
2490
 
1936
- The following keywords are only relevant when reading files.
1937
- They echo the parameters of read()
2491
+ If ``default`` is not specified, then this function returns a new sub-directory by calling
2492
+ ``SubDir(element,parent=self,ext=ext,fmt=fmt)``.
1938
2493
 
1939
- raiseOnError : bool
2494
+ create_directory : bool, optional
2495
+ *When creating sub-directories:*
2496
+
2497
+ Whether or not to instantly create the sub-directory. The default, ``None``, is to inherit the behaviour from ``self``.
2498
+
2499
+ raise_on_error : bool, optional
2500
+ *When reading files:*
2501
+
1940
2502
  Whether to raise an exception if reading an existing file failed.
1941
- By default this function fails silently and returns the default.
1942
- version : str
1943
- If not None, specifies the version of the current code base.
1944
- Use '*' to read any version (this is distrinct from reading a file without version).
1945
- If version is not' '*', then this version will be compared to the version of the file being read.
1946
- If they do not match, read fails (either by returning default or throwing an exception).
1947
- delete_wrong_version : bool
1948
- If True, and if a wrong version was found, delete the file.
1949
- ext : str
1950
- Extension overwrite, or a list thereof if key is a list
1951
- Set to:
1952
- -- None to use directory's default
1953
- -- '*' to use the extension implied by 'fmt'
1954
- -- for convenience 'ext' can also be a Format (in this case leave fmt to None)
1955
- fmt : Format
1956
- File format or None to use the directory's default.
1957
- Note that 'fmt' cannot be a list even if 'key' is.
1958
- Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
2503
+ By default this function fails silently and returns ``default``.
1959
2504
 
1960
- The following keywords are only relevant when accessing directories
1961
- They echo the parameters of __init__
1962
-
1963
- createDirectory : bool
1964
- Whether or not to create the directory. The default, None, is to inherit the behaviour from self.
1965
- ext : str
1966
- Set to None to inherit the parent's extension.
1967
- fmt : Format
1968
- Set to None to inherit the parent's format.
2505
+ Default is ``False``.
2506
+
2507
+ version : str, optional
2508
+ *When reading files:*
2509
+
2510
+ If not ``None``, specifies the version of the current code base.
2511
+
2512
+ In this case, this version will be compared to the version of the file being read.
2513
+ If they do not match, read fails (either by returning default or throwing a :class:`cdxcore.version.VersionError` exception).
2514
+
2515
+ You can specify version ``"*"`` to accept any version.
2516
+ Note that this is distinct
2517
+ to using ``None`` which stipulates that the file should not
2518
+ have version information.
2519
+
2520
+ Default is ``None``.
2521
+
2522
+ delete_wrong_version : bool, optional
2523
+ *When reading files:*
2524
+
2525
+ If ``True``, and if a wrong version was found, delete the file.
2526
+
2527
+ Default is ``True``.
2528
+
2529
+ ext : str, optional
2530
+ *When reading files:*
2531
+
2532
+ Extension to be used, or a list thereof if ``element`` is a list. Defaults
2533
+ to the extension of ``self``.
2534
+
2535
+ Semantics:
2536
+
2537
+ * ``None`` to use the default extension of ``self``.
2538
+ * ``"*"`` to use the extension implied by ``fmt``.
2539
+ * ``""`` to turn off extension management.
2540
+
2541
+ *When creating sub-directories:*
2542
+
2543
+ Extension for the new subdirectory; set to ``None`` to inherit the parent's extension.
2544
+
2545
+ Default is ``None``.
2546
+
2547
+
2548
+ fmt : :class:`cdxcore.subdir.Format`, optional
2549
+ *When reading files:*
2550
+
2551
+ File format or ``None`` to use the directory's default.
2552
+ Note that ``fmt`` cannot be a list even if ``element`` is.
2553
+ Unless
2554
+ ``ext`` or the SubDir's extension is ``"*"``, changing the
2555
+ format does not automatically change the extension.
2556
+
2557
+ *When creating sub-directories:*
1969
2558
 
2559
+ Format for the new sub-directory; set to ``None`` to inherit the parent's format.
2560
+
2561
+ Default is ``None``.
2562
+
1970
2563
  Returns
1971
2564
  -------
2565
+ Object : type|SubDir
1972
2566
  Either the value in the file, a new sub directory, or lists thereof.
1973
- Returns None if an element was not found.
1974
2567
  """
1975
2568
  if default == SubDir.RETURN_SUB_DIRECTORY:
1976
- if not isinstance(keyOrSub, str):
1977
- if not isinstance(keyOrSub, Collection):
1978
- raise ValueError(txtfmt("'keyOrSub' must be a string or an iterable object. Found type '%s;", type(keyOrSub)))
1979
- return [ SubDir( k,parent=self,ext=ext,fmt=fmt,createDirectory=createDirectory) for k in keyOrSub ]
1980
- return SubDir(keyOrSub,parent=self,ext=ext,fmt=fmt,createDirectory=createDirectory)
1981
- return self.read( key=keyOrSub,
2569
+ if not isinstance(element, str):
2570
+ if not isinstance(element, Collection):
2571
+ raise ValueError(txtfmt("'element' must be a string or an iterable object. Found type '%s;", type(element)))
2572
+ return [ SubDir( k,parent=self,ext=ext,fmt=fmt,create_directory=create_directory) for k in element ]
2573
+ return SubDir(element,parent=self,ext=ext,fmt=fmt,create_directory=create_directory)
2574
+ return self.read( file=element,
1982
2575
  default=default,
1983
- raiseOnError=raiseOnError,
2576
+ raise_on_error=raise_on_error,
1984
2577
  version=version,
1985
2578
  delete_wrong_version=delete_wrong_version,
1986
2579
  ext=ext,
1987
2580
  fmt=fmt )
1988
2581
 
1989
- def __getitem__( self, key ):
2582
+ def __getitem__( self, file ):
1990
2583
  """
1991
- Reads self[key]
1992
- If 'key' does not exist, throw a KeyError
2584
+ Reads ``file`` using :meth:`cdxcore.subdir.SubDir.read`.
2585
+ If '`file'` does not exist, throw a :class:`KeyError`.
1993
2586
  """
1994
- return self.read( key=key, default=None, raiseOnError=True )
2587
+ return self.read( file=file, default=None, raise_on_error=True )
1995
2588
 
1996
- def __setitem__( self, key, value):
1997
- """ Writes 'value' to 'key' """
1998
- self.write(key,value)
2589
+ def __setitem__( self, file, value):
2590
+ """ Writes ``value`` to ``file`` using :meth:`cdxcore.subdir.SubDir.write`. """
2591
+ self.write(file,value)
1999
2592
 
2000
- def __delitem__(self,key):
2001
- """ Silently delete self[key] """
2002
- self.delete(key, False )
2593
+ def __delitem__(self,file):
2594
+ """ Silently delete ``file`` using :meth:`cdxcore.subdir.SubDir.delete`. """
2595
+ self.delete(file, False )
2003
2596
 
2004
2597
  def __len__(self) -> int:
2005
- """ Return the number of files (keys) in this directory """
2006
- return len(self.keys())
2598
+ """ Return the number of files in this directory with matching extension. """
2599
+ return len(self.files())
2007
2600
 
2008
2601
  def __iter__(self):
2009
- """ Returns an iterator which allows traversing through all keys (files) below this directory """
2010
- return self.keys().__iter__()
2011
-
2012
- def __contains__(self, key):
2013
- """ Implements 'in' operator """
2014
- return self.exists(key)
2015
-
2016
- # -- object like interface --
2602
+ """ Returns an iterator which allows traversing through all files below in this directory with matching extension. """
2603
+ return self.files().__iter__()
2017
2604
 
2018
- def __getattr__(self, key):
2019
- """
2020
- Allow using member notation to get data
2021
- This function throws an AttributeError if 'key' is not found.
2605
+ def __contains__(self, file):
2606
+ """ Tests whether ``file`` :meth:`cdxcore.subdir.SubDir.exists`. """
2607
+ return self.exists(file)
2608
+
2609
+ def items(self, *, ext : str = None, raise_on_error : bool = False) -> Iterable:
2022
2610
  """
2023
- if not self.exists(key):
2024
- raise AttributeError(key)
2025
- return self.read( key=key, raiseOnError=True )
2611
+ Dictionary-style iterable of filenames and their content.
2612
+
2613
+ Usage::
2614
+
2615
+ subdir = SubDir("!")
2616
+ for file, data in subdir.items():
2617
+ print( file, str(data)[:100] )
2026
2618
 
2027
- def __setattr__(self, key, value):
2619
+ Parameters
2620
+ ----------
2621
+ ext : str
2622
+ Extension or ``None`` for the directory's current extension. Use ``""``
2623
+ for all file extension.
2624
+
2625
+ Returns
2626
+ -------
2627
+ Iterable
2628
+ An iterable generator
2629
+ """
2630
+ class ItemIterable(Iterable):
2631
+ def __init__(_):
2632
+ _._files = self.files(ext=ext)
2633
+ _._subdir = self
2634
+ def __len__(_):
2635
+ return len(_._files)
2636
+ def __iter__(_):
2637
+ for file in _._files:
2638
+ data = _._subdir.read(file, ext=ext, raise_on_error=raise_on_error)
2639
+ yield file, data
2640
+ return ItemIterable()
2641
+
2642
+ # convenient path ops
2643
+ # -------------------
2644
+
2645
+ def __add__(self, directory : str) -> str:
2028
2646
  """
2029
- Allow using member notation to write data
2030
- Note: keys starting with '_' are /not/ written to disk
2647
+ Returns a the subdirectory ``directory`` of ``self``.
2031
2648
  """
2032
- if key[0] == '_':
2033
- self.__dict__[key] = value
2034
- else:
2035
- self.write(key,value)
2036
-
2037
- def __delattr__(self, key):
2038
- """ Silently delete a key with member notation. """
2039
- verify( key[:1] != "_", "Deleting protected or private members disabled. Fix __delattr__ to support this")
2040
- return self.delete( key=key, raiseOnError=False )
2649
+ return SubDir(directory,parent=self)
2041
2650
 
2042
2651
  # pickling
2043
2652
  # --------
@@ -2052,7 +2661,22 @@ class SubDir(object):
2052
2661
  self._ext = state['ext']
2053
2662
  self._fmt = state['fmt']
2054
2663
  self._crt = state['crt']
2664
+
2665
+ @staticmethod
2666
+ def as_format( format_name : str ) -> int:
2667
+ """
2668
+ Converts a named format into the respective format code.
2669
+
2670
+ Example::
2055
2671
 
2672
+ format = SubDir.as_format( config("format", "pickle", SubDir.FORMAT_NAMES, "File format") )
2673
+ """
2674
+ format_name = format_name.upper()
2675
+ if not format_name in SubDir.FORMAT_NAMES:
2676
+ raise LookupError(f"Unknown format name '{format_name}'. Must be one of: {fmt_list(SubDir.FORMAT_NAMES)}")
2677
+ return Format[format_name]
2678
+
2679
+
2056
2680
  # caching
2057
2681
  # -------
2058
2682
 
@@ -2066,129 +2690,249 @@ class SubDir(object):
2066
2690
  exclude_arg_types : list[type] = None,
2067
2691
  version_auto_class : bool = True):
2068
2692
  """
2069
- Wraps a callable or a class into a cachable function.
2070
- Caching is based on the following two simple principles:
2071
-
2072
- 1) Unique Call ID:
2073
- When a function is called with some parameters, the wrapper identifies a unique ID based
2074
- on the qualified name of the function and on its runtime functional parameters (ie those
2075
- which alter the outcome of the function).
2076
- When a function is called the first time with a given unique call ID, it will store
2077
- the result of the call to disk. If the function is called with the same call ID again,
2078
- the result is read from disk and returned.
2079
-
2080
- To compute unique call IDs' cdxbasics.util.namedUniqueHashExt() is used.
2081
- Please read implementation comments there:
2082
- Key default features:
2083
- * It hashes objects via their __dict__ or __slot__ members.
2084
- This can be overwritten for a class by implementing __unique_hash__; see cdxbasics.util.namedUniqueHashExt().
2085
- * Function members of objects or any members starting with '_' are not considered
2086
- unless this behaviour is changed using CacheController().
2087
- * Numpy and panda frames are hashed using their byte representation.
2088
- That is slow and not recommended. It is better to identify numpy/panda inputs
2089
- via their generating characteristic ID.
2090
-
2091
- 2) Version:
2092
- Each function has a version, which includes dependencies on other functions or classes.
2093
- If the version of a result on disk does not match the current version, it is deleted
2094
- and the function is called again. This way you can use your code to drive updates
2095
- to data generated with cached functions.
2096
- Behind the scenes this is implemented using cdxbasics.version.version() which means
2097
- that the version of a cached function can also depend on versions of non-cached functions
2098
- or other objects.
2099
-
2100
- Functions
2101
- ---------
2102
- Example of caching functions:
2103
-
2104
- Cache a simple function 'f':
2105
-
2106
- from cdxbasics.subdir import SubDir
2107
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2108
-
2109
- @cache.cache("0.1")
2110
- def f(x,y):
2111
- return x*y
2112
-
2113
- _ = f(1,2) # function gets computed and the result cached
2114
- _ = f(1,2) # restore result from cache
2115
- _ = f(2,2) # different parameters: compute and store result
2693
+ Advanced versioned caching for callables.
2694
+
2695
+ Versioned caching is based on the following two simple principles:
2696
+
2697
+ 1) **Unique Call IDs:**
2698
+
2699
+ When a function is called with some parameters, the wrapper identifies a unique ID based
2700
+ on the qualified name of the function and on its runtime functional parameters (ie those
2701
+ which alter the outcome of the function).
2702
+ When a function is called the first time with a given unique call ID, it will store
2703
+ the result of the call to disk. If the function is called with the same call ID again,
2704
+ the result is read from disk and returned.
2705
+
2706
+ To compute unique call IDs :class:`cdxcore.uniquehash.NamedUniqueHash` is used
2707
+ by default.
2708
+
2709
+ 2) **Code Version:**
2710
+
2711
+ Each function has a version, which includes dependencies on other functions or classes.
2712
+ If the version of data on disk does not match the current version, it is deleted
2713
+ and the generating function is called again. This way you can use your code to drive updates
2714
+ to data generated with cached functions.
2715
+
2716
+ Behind the scenes this is implemented using :dec:`cdxcore.version.version` which means
2717
+ that the version of a cached function can also depend on versions of non-cached functions
2718
+ or other objects.
2719
+
2720
+ Caching Functions
2721
+ ^^^^^^^^^^^^^^^^^
2722
+
2723
+ Caching a simple function ``f`` is staight forward:
2724
+
2725
+ .. code-block:: python
2726
+
2727
+ from cdxcore.subdir import SubDir
2728
+ cache = SubDir("!/.cache")
2729
+ cache.delete_all_content() # for illustration
2730
+
2731
+ @cache.cache("0.1")
2732
+ def f(x,y):
2733
+ return x*y
2734
+
2735
+ _ = f(1,2) # function gets computed and the result cached
2736
+ _ = f(1,2) # restore result from cache
2737
+ _ = f(2,2) # different parameters: compute and store result
2738
+
2739
+ Cache another function ``g`` which calls ``f``, and whose version therefore on ``f``'s version:
2740
+
2741
+ .. code-block:: python
2742
+
2743
+ @cache.cache("0.1", dependencies=[f])
2744
+ def g(x,y):
2745
+ return g(x,y)**2
2746
+
2747
+ **Debugging**
2748
+
2749
+ When using automated caching it
2750
+ is important to understand how changes in parameters and the version of the a function
2751
+ affect caching. To this end, :dec:`cdxcore.subdir.SubDir.cache` supports
2752
+ a tracing mechanism via the use of a :class:`cdxcore.subdir.CacheController`:
2753
+
2754
+ .. code-block:: python
2755
+
2756
+ from cdxcore.subdir import SubDir, CacheController, Context
2757
+
2758
+ ctrl = CacheController( debug_verbose=Context("all") )
2759
+ cache = SubDir("!/.cache", cache_controller=ctrl )
2760
+ cache.delete_all_content() # <- delete previous cached files, for this example only
2761
+
2762
+ @cache.cache("0.1")
2763
+ def f(x,y):
2764
+ return x*y
2765
+
2766
+ _ = f(1,2) # function gets computed and the result cached
2767
+ _ = f(1,2) # restore result from cache
2768
+ _ = f(2,2) # different parameters: compute and store result
2769
+
2770
+ Returns:
2116
2771
 
2117
- Another function g which calls f, and whose version therefore on f's version:
2772
+ .. code-block:: python
2773
+
2774
+ 00: cache(f@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2775
+ 00: cache(f@__main__): called 'f@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ 668a6b111549e288.pck'.
2776
+ 00: cache(f@__main__): read 'f@__main__' version 'version 0.1' from cache 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ 668a6b111549e288.pck'.
2777
+ 00: cache(f@__main__): called 'f@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ b5609542d7da0b04.pck'.
2118
2778
 
2119
- from cdxbasics.subdir import SubDir
2120
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2779
+ **Non-Functional Parameters**
2780
+
2781
+ A function may have non-functional parameters which do not alter the function's outcome.
2782
+ An example are ``debug`` flags:
2783
+
2784
+ .. code-block:: python
2785
+
2786
+ from cdxcore.subdir import SubDir
2787
+ cache = SubDir("!/.cache")
2788
+
2789
+ @cache.cache("0.1", dependencies=[f], exclude_args='debug')
2790
+ def g(x,y,debug): # <--' 'debug' is a non-functional parameter
2791
+ if debug:
2792
+ print(f"h(x={x},y={y})")
2793
+ return g(x,y)**2
2794
+
2795
+ You can define certain types as non-functional for *all* functions wrapped
2796
+ by :meth:`cdxcore.subdir.SubDir.cache` when construcing
2797
+ the :class:`cdccore.cache.CacheController` parameter for in :class:`cdxcore.subdir.SubDir`:
2798
+
2799
+ .. code-block:: python
2121
2800
 
2122
- @cache.cache("0.1", dependencies=[f])
2123
- def g(x,y):
2124
- return g(x,y)**2
2801
+ from cdxcore.subdir import SubDir
2802
+
2803
+ class Debugger:
2804
+ def output( cond, message ):
2805
+ print(message)
2806
+
2807
+ ctrl = CacheController(exclude_arg_types=[Debugger]) # <- exclude 'Debugger' parameters from hasing
2808
+ cache = SubDir("!/.cache")
2125
2809
 
2126
- A function may have non-functional parameters which do not alter the function's outcome.
2127
- An example are 'debug' flags:
2128
-
2129
- from cdxbasics.subdir import SubDir
2130
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2810
+ @cache.cache("0.1", dependencies=[f], exclude_args='debug')
2811
+ def g(x,y,debugger : Debugger): # <-- 'debugger' is a non-functional parameter
2812
+ debugger.output(f"h(x={x},y={y})")
2813
+ return g(x,y)**2
2814
+
2815
+ **Unique IDs and File Naming**
2816
+
2817
+ The unique call ID of a decorated functions is by default generated by its fully qualified name
2818
+ and a unique hash of its functional parameters.
2819
+
2820
+ Key default behaviours of :class:`cdxcore.uniquehash.NamedUniqueHash`:
2821
+
2822
+ * The ``NamedUniqueHash`` hashes objects via their ``__dict__`` or ``__slot__`` members.
2823
+ This can be overwritten for a class by implementing ``__unique_hash__``; see :class:`cdxcore.uniquehash.NamedUniqueHash`.
2824
+
2825
+ * Function members of objects or any members starting with '_' are not hashed
2826
+ unless this behaviour is changed using :class:`cdxcore.subdir.CacheController`.
2827
+
2828
+ * Numpy and panda frames are hashed using their byte representation.
2829
+ That is slow and not recommended. It is better to identify numpy/panda inputs
2830
+ via their generating characteristic ID.
2831
+
2832
+ Either way, hashes are not particularly human readable. It is often useful
2833
+ to have unique IDs and therefore filenames which carry some context information.
2834
+
2835
+ This can be achieved by using ``label``:
2131
2836
 
2132
- @cache.cache("0.1", dependencies=[f], exclude_args='debug')
2133
- def g(x,y,debug): # <-- debug is a non-functional parameter
2134
- if debug:
2135
- print(f"h(x={x},y={y})")
2136
- return g(x,y)**2
2137
-
2138
- You can systematically define certain types as non-functional for *all* functions wrapped
2139
- by this SubDir by specifying the respective parameter for the CacheController() in SubDir.__init__().
2140
-
2141
- The Unique Call ID of a functions is by default generated by its fully qualified name
2142
- and a unique hash of its functional parameters.
2143
- This can be made more readable by using id=
2144
-
2145
- from cdxbasics.subdir import SubDir
2146
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2147
-
2148
- @cache.cache("0.1", id="f({x},{y}") # <- using a string to be passed to str.format()
2149
- def f(x,y):
2150
- return x*y
2837
+ .. code-block:: python
2151
2838
 
2152
- You can also use functions:
2153
-
2154
- from cdxbasics.subdir import SubDir
2155
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2156
-
2157
- # Using a function 'id'. Note the **_ to catch uninteresting parameters, here 'debug'
2158
- @cache.cache("0.1", id=lambda x,y,**_: f"h({x},{y})", exclude_args='debug')
2159
- def h(x,y,debug=False):
2160
- if debug:
2161
- print(f"h(x={x},y={y})")
2162
- return x*y
2839
+ from cdxcore.subdir import SubDir, CacheController
2840
+ ctrl = CacheController( debug_verbose=Context("all") )
2841
+ cache = SubDir("!/.cache", cache_controller=ctrl )
2842
+ cache.delete_all_content() # for illustration
2843
+
2844
+ @cache.cache("0.1") # <- no ID
2845
+ def f1(x,y):
2846
+ return x*y
2847
+
2848
+ @cache.cache("0.1", label="f2({x},{y})") # <- label uses a string to be passed to str.format()
2849
+ def f2(x,y):
2850
+ return x*y
2851
+
2852
+ We can also use a function to generate a ``label``. In that case all parameters
2853
+ to the function including its ``name`` are passed to the function. In below example
2854
+ we eat any parameters we are not interested in with ``** _``:
2163
2855
 
2164
- Note that by default it is not assumed that the call Id returned by id is unique,
2165
- and a hash generated from all pertinent arguments will be generated.
2166
- That is why in the previous example we still need to exclude_args 'debug' here.
2856
+ .. code-block:: python
2167
2857
 
2168
- If the id you generate is guaranteed to be unique for all functional parameter values,
2169
- you can add unique=True. In this case the filename of the function
2858
+ @cache.cache("0.1", label=lambda x,y,**_: f"h({x},{y})", exclude_args='debug')
2859
+ def h(x,y,debug=False):
2860
+ if debug:
2861
+ print(f"h(x={x},y={y})")
2862
+ return x*y
2170
2863
 
2171
- from cdxbasics.subdir import SubDir
2172
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2173
-
2174
- # Using a function 'id' with 'unique' to generate a unique ID.
2175
- @cache.cache("0.1", id=lambda x,y,**_: f"h({x},{y})", unique=True)
2176
- def h(x,y,debug=False):
2177
- if debug:
2178
- print(f"h(x={x},y={y})")
2179
- return x*y
2864
+ We obtain:
2865
+
2866
+ .. code-block:: python
2867
+
2868
+ f1(1,1)
2869
+ f2(1,1)
2870
+ h(1,1)
2871
+
2872
+ 00: cache(f1@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2873
+ 00: cache(f2@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2874
+ 00: cache(h@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2875
+ 00: cache(f1@__main__): called 'f1@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f1@__main__ ef197d80d6a0bbb0.pck'.
2876
+ 00: cache(f2@__main__): called 'f2(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f2(1,1) bdc3cd99157c10f7.pck'.
2877
+ 00: cache(h@__main__): called 'h(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/h(1,1) d3fdafc9182070f4.pck'.
2878
+
2879
+ Note that the file names ``f2(1,1) bdc3cd99157c10f7.pck``
2880
+ and ``h(1,1) d3fdafc9182070f4.pck`` for the ``f2`` and ``h`` function calls are now easier to read as
2881
+ they are comprised of the label
2882
+ of the function and a terminal hash key.
2883
+ The trailing hash is appended because we do not assume that the label returned by ``label`` is unique.
2884
+ Therefore, a hash generated from all the ``label`` itself and
2885
+ all pertinent arguments will be appended to the filename.
2886
+
2887
+ If we know how to generate truly unique IDs which are always valid filenames, then we can use ``uid``
2888
+ instead of ``label``:
2889
+
2890
+ .. code-block:: python
2891
+
2892
+ @cache.cache("0.1", uid=lambda x,y,**_: f"h2({x},{y})", exclude_args='debug')
2893
+ def h2(x,y,debug=False):
2894
+ if debug:
2895
+ print(f"h(x={x},y={y})")
2896
+ return x*y
2897
+ h2(1,1)
2898
+
2899
+ yields::
2900
+
2901
+ 00: cache(h2@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
2902
+ 00: cache(h2@__main__): called 'h2(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/h2(1,1).pck'.
2903
+
2904
+ In particular, the filename is now ``h2(1,1).pck`` without any hash.
2905
+ If ``uid`` is used the parameter of the function are not hashed. Like ``label``
2906
+ the parameter ``uid`` can also be a :func:`str.format` string or a callable.
2180
2907
 
2181
- Numpy/Panda
2182
- -----------
2908
+ **Controlliong which Parameters to Hash**
2909
+
2910
+ To specify which parameters are pertinent for identifying a unique id, use:
2911
+
2912
+ * ``include_args``: list of functions arguments to include. If ``None``, use all parameteres as input in the next step
2913
+
2914
+ * ``exclude_args``: list of function arguments to exclude, if not ``None``.
2915
+
2916
+ * ``exclude_arg_types``: a list of types to exclude.
2917
+ This is helpful if control flow is managed with dedicated data types.
2918
+ An example of such a type is :class:`cdxcore.verbose.Context` which is used to print hierarchical output messages.
2919
+ Types can be globally excluded using a :class:`cdccore.cache.CacheController`
2920
+ when calling
2921
+ :class:`cdxcore.subdir.SubDir`.
2922
+
2923
+ **Numpy/Pandas**
2924
+
2183
2925
  Numpy/Panda data should not be hashed for identifying unique call IDs.
2184
2926
  Instead, use the defining characteristics for generating the data frames.
2185
2927
 
2186
2928
  For example:
2187
-
2188
- from cdxbasics.subdir import SubDir
2189
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2190
2929
 
2191
- from cdxbasics.prettydict import pdct
2930
+ .. code-block:: python
2931
+
2932
+ from cdxcore.pretty import PrettyObject
2933
+ from cdxcore.subdir import SubDir
2934
+ cache = SubDir("!/.cache")
2935
+ cache.delete_all_content() # for illustration
2192
2936
 
2193
2937
  @cache.cache("0.1")
2194
2938
  def load_src( src_def ):
@@ -2201,22 +2945,24 @@ class SubDir(object):
2201
2945
  stats = ... using data
2202
2946
  return stats
2203
2947
 
2204
- src_def = pdct()
2948
+ src_def = PrettyObject()
2205
2949
  src_def.start = "2010-01-01"
2206
2950
  src_def.end = "2025-01-01"
2207
2951
  src_def.x = 0.1
2208
2952
 
2209
- stats_def = pdct()
2953
+ stats_def = PrettyObject()
2210
2954
  stats_def.lambda = 0.1
2211
2955
  stats_def.window = 100
2212
2956
 
2213
2957
  data = load_src( src_def )
2214
2958
  stats = statistics( stats_def, src_def, data )
2215
2959
 
2216
- While instructive, this case is not optimal: we do not really need to load 'data'
2217
- if we can reconstruct 'stats' from 'data' (unless we need 'data' further on).
2960
+ While instructive, this case is not optimal: we do not really need to load ``data``
2961
+ if we can reconstruct ``stats`` from ``data`` (unless we need ``data`` further on).
2218
2962
 
2219
- Consider therefore
2963
+ Consider therefore:
2964
+
2965
+ .. code-block:: python
2220
2966
 
2221
2967
  @cache.cache("0.1")
2222
2968
  def load_src( src_def ):
@@ -2232,14 +2978,18 @@ class SubDir(object):
2232
2978
 
2233
2979
  stats = statistics_only( stats_def, src_def )
2234
2980
 
2235
- Member functions
2236
- ----------------
2981
+ Caching Member Functions
2982
+ ^^^^^^^^^^^^^^^^^^^^^^^^
2983
+
2237
2984
  You can cache member functions like any other function.
2238
- Note that version information are by default inherited, i.e. member functions will be dependent on the version of their
2239
- defining class, and class versions will be dependent on their base classes' versions.
2985
+ Note that :dec:`cdxcore.version.version` information are by default inherited, i.e. member functions will be dependent on the version of their
2986
+ defining class, and class versions will be dependent on their base classes' versions:
2240
2987
 
2241
- from cdxbasics.subdir import SubDir, version
2242
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2988
+ .. code-block:: python
2989
+
2990
+ from cdxcore.subdir import SubDir, version
2991
+ cache = SubDir("!/.cache")
2992
+ cache.delete_all_content() # for illustration
2243
2993
 
2244
2994
  @version("0.1")
2245
2995
  class A(object):
@@ -2259,18 +3009,26 @@ class SubDir(object):
2259
3009
  _ = b.f(y=1) # same unique call ID as previous call -> restore result from disk
2260
3010
 
2261
3011
  **WARNING**
2262
- The hashing function used -- cdxbasics.util.uniqueHashExt() -- does by default *not* process members of objects or dictionaries
2263
- which start with a "_". This behaviour can be changed using CacheController().
2264
- For reasonably complex objects it is recommended to implement:
2265
- __unique_hash__( self, length : int, parse_functions : bool, parse_underscore : str )
2266
- (it is also possible to simply set this value to a string constant).
3012
+ :class:`cdxcore.uniquehash.UniqueHash` does *not* by default process members of objects or dictionaries
3013
+ which start with a "_". This behaviour can be changed using :class:`cdxcore.subdir.CacheController`.
3014
+ For reasonably complex objects it is recommended to implement for your objects
3015
+ the a custom hashing function::
3016
+
3017
+ __unique_hash__( self, uniqueHash : UniqueHash, debug_trace : DebugTrace )
3018
+
3019
+ This function is described at :class:`cdxcore.uniquehash.UniqueHash`.
3020
+
3021
+ Caching Bound Member Functions
3022
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2267
3023
 
2268
- Bound Member Functions
2269
- ----------------------
2270
- Note that above is functionally different to decorating a bound member function:
3024
+ Caching bound member functions is technically quite different to caching a function of a class in general,
3025
+ but also supported:
2271
3026
 
2272
- from cdxbasics.subdir import SubDir, version
2273
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
3027
+ .. code-block:: python
3028
+
3029
+ from cdxcore.subdir import SubDir, version
3030
+ cache = SubDir("!/.cache", cache_controller : CacheController(debug_verbose=Context("all")))
3031
+ cache.delete_all_content() # for illustration
2274
3032
 
2275
3033
  class A(object):
2276
3034
  def __init__(self,x):
@@ -2282,133 +3040,177 @@ class SubDir(object):
2282
3040
  f = cache.cache("0.1", id=lambda self, y : f"a.f({y})")(a.f) # <- decorate bound 'f'.
2283
3041
  r = c(y=2)
2284
3042
 
2285
- In this case the function 'f' is bound to 'a'. The object is added as 'self' to the function
2286
- parameter list even though the bound function parameter list does not include 'self'.
2287
- This, together with the comments on hashing objects above, ensures that (hashed) changes to 'a' will
3043
+ In this case the function ``f`` is bound to ``a``. The object is added as ``self`` to the function
3044
+ parameter list even though the bound function parameter list does not include ``self``.
3045
+ This, together with the comments on hashing objects above, ensures that (hashed) changes to ``a`` will
2288
3046
  be reflected in the unique call ID for the member function.
2289
3047
 
2290
- Classes
2291
- -------
2292
- Classes can also be cached.
2293
- This is done in two steps: first, the class itself is decorated to provide version information at its own level.
2294
- Secondly, decorate __init__ which also helps to define the unique call id. You do not need to specify a version
2295
- for __init__ as its version usually coincides with the version of the class.
2296
-
2297
- Simple example:
2298
-
2299
- cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
2300
-
2301
- @cache.cache("0.1")
2302
- class A(object):
2303
-
2304
- @cache.cache(exclude_args=['debug'])
2305
- def __init__(self, x, debug):
2306
- if debug:
2307
- print("__init__",x)
2308
- self.x = x
3048
+ Caching Classes
3049
+ ^^^^^^^^^^^^^^^
2309
3050
 
2310
- __init__ does not actually return a value; for this reason the actual function decorated will be __new__.
2311
- Attempting to cache decorate __new__ will lead to an exception.
3051
+ Classes can also be cached. In this case the creation of a class is cached, i.e. a call to
3052
+ the class constructor restores the respectiv object from disk.
2312
3053
 
2313
- A nuance for __init__ vs ordinary member function is that 'self' is non-functional.
2314
- It is therefore automatically excluded from computing a unique call ID.
2315
- Specifically, 'self' is not part of the arguments passed to 'id':
3054
+ This is done in two steps:
2316
3055
 
2317
- @cache.cache("0.1")
2318
- class A(object):
3056
+ 1) first, the class itself is decorated using
3057
+ :dec:`cdxcore.subdir.SubDir.cache`
3058
+ to provide version information at class level. Only version information are provided here.
3059
+
3060
+ 2) Secondly, decorate ``__init__``. You do not need to specify a version
3061
+ for ``__init__`` as its version usually coincides with the version of the class. At ``__init__``
3062
+ you define how unique IDs are generated from the parameters passed to object construction.
3063
+
3064
+ Simple example:
3065
+
3066
+ .. code-block:: python
3067
+
3068
+ from cdxcore.subdir import SubDir
3069
+ cache = SubDir("!/.cache")
3070
+ cache.delete_all_content() # for illustration
3071
+
3072
+ @cache.cache("0.1")
3073
+ class A(object):
3074
+
3075
+ @cache.cache(exclude_args=['debug'])
3076
+ def __init__(self, x, debug):
3077
+ if debug:
3078
+ print("__init__",x)
3079
+ self.x = x
2319
3080
 
2320
- @cache.cache("0.1", id=lambda x, debug: f"A.__init__(x={x})") # <-- 'self' is not passed to the lambda function; no need to add **_
2321
- def __init__(self, x, debug):
2322
- if debug:
2323
- print("__init__",x)
2324
- self.x = x
3081
+ a = A(1) # caches 'a'
3082
+ b = A(1) # reads the cached object into 'b'
2325
3083
 
2326
- Decorating classes with __slots__ does not yet work.
2327
-
2328
- Non-functional parameters
2329
- -------------------------
2330
- Often functions have parameters which do not alter the output of the function but control i/o or other aspects of the overall environment.
2331
- An example is a function parameter 'debug':
2332
-
2333
- def f(x,y,debug=False):
2334
- z = x*y
2335
- if not debug:
2336
- print(f"x={x}, y={y}, z={z}")
2337
- return z
2338
-
2339
- To specify which parameters are pertinent for identiying a unique id, use:
2340
-
2341
- a) include_args: list of functions arguments to include. If None, use all as input in the next step
2342
- b) exclude_args: list of funciton arguments to exclude, if not None.
2343
- c) exclude_arg_types: a list of types to exclude. This is helpful if control flow is managed with dedicated data types.
2344
- An example of such a type is cdxbasics.verbose.Context which is used to print hierarchical output messages.
2345
- Types can be globally excluded using the CacheController.
3084
+ **Technical Comments**
3085
+
3086
+ The function ``__init__`` does not actually return a value; for this reason
3087
+ behind the scenes it is actually ``__new__`` which is being decorated.
3088
+ Attempting to cache-decorate ``__new__`` manually will lead to an exception.
3089
+
3090
+ A nuance for ``__init__`` vs ordinary member function is that the
3091
+ ``self`` parameter is non-functional
3092
+ (in the sense that it is an empty object when ``__init__`` is called).
3093
+ ``self`` is therefore automatically excluded from computing a unique call ID.
3094
+ That also means ``self`` is not part of the arguments passed to ``uid``:
3095
+
3096
+ .. code-block:: python
3097
+
3098
+ @cache.cache("0.1")
3099
+ class A(object):
2346
3100
 
3101
+ @cache.cache("0.1", id=lambda x, debug: f"A.__init__(x={x})") # <-- 'self' is not passed to the lambda function; no need to add **_
3102
+ def __init__(self, x, debug):
3103
+ if debug:
3104
+ print("__init__",x)
3105
+ self.x = x
3106
+
3107
+ Decorating classes with ``__slots__`` does not yet work.
3108
+
2347
3109
  See also
2348
- --------
2349
- For project-wide use it is usually inconvenient to control caching at the level of a 'directory'.
2350
- See VersionedCacheRoot() is a thin wrapper around a SubDir with a CacheController.
3110
+ ^^^^^^^^
3111
+
3112
+ For project-wide use it is usually inconvenient to control caching at the level of a
3113
+ project-wide cache root directory.
3114
+ See :class:`cdxcore.subdir.VersionedCacheRoot` for a thin convenience wrapper around a :class:`cdxcore.subdir.SubDir`
3115
+ with a :class:`cdxcore.subdir.CacheController`.
2351
3116
 
2352
3117
  Parameters
2353
3118
  ----------
2354
3119
  version : str, optional
2355
3120
  Version of the function.
2356
- * If None then F must be decorated with cdxbasics.version.version().
2357
- * If set, the function F is first decorated with cdxbasics.version.version().
2358
- dependencies : list, optional
2359
- List of version dependencies
2360
-
2361
- id : str, Callable
2362
- Create a call label for the function call and its parameters.
2363
- See above for a description.
2364
- * A plain string without {} formatting: this is the fully qualified id
2365
- * A string with {} formatting: id.str( name=name, **parameters ) will be used to generate the fully qualified id
2366
- * A Callable, in which case id( name=name, **parameters ) will be used to generate the fully qualified id
2367
-
2368
- unique : bool
2369
- Whether the 'id' generated by 'id' is unique for this function call with its parameters.
2370
- If True, then the function will attempt to use 'id' as filename as long as it has no invalid characters and is short
2371
- enough (see 'max_filename_length').
2372
- If False, the function will append to the 'id' a unique hash of the qualified function name and all pertinent parameters
2373
-
2374
- name : str
2375
- The name of the function, or None for using the fully qualified function name.
2376
3121
 
3122
+ * If ``None`` then ``F`` must be decorated with :dec:`cdxcore.version.version`.
3123
+ * If set, the function ``F`` is first decorated with :dec:`cdxcore.version.version`.
3124
+
3125
+ dependencies : list[type], optional
3126
+ A list of version dependencies, either by reference or by name.
3127
+ See :dec:`cdxcore.version.version` for details on name lookup if strings are used.
3128
+
3129
+ label : str | Callable
3130
+ Specify a human-readable label for the function call given its parameters.
3131
+ This label is used to generate the cache file name, and is also printed in when tracing
3132
+ hashing operations. Labels are not assumed to be unique, hence a unique hash of
3133
+ the label and the parameters to this function will be appended to generate
3134
+ the actual cache file name.
3135
+
3136
+ Use ``uid`` instead if ``label`` represents valid unique filenames.
3137
+
3138
+
3139
+ **Usage:**
3140
+
3141
+ * If ``label`` is a plain string without ``{}`` formatting: use this string as-is.
3142
+
3143
+ * If ``label`` is a string with ``{}`` formatting, then ``label.format( name=name, **parameters )``
3144
+ will be used to generate the actual label.
3145
+
3146
+ * If ``label`` is a ``Callable`` then ``label( name=name, **parameters )`` will be called
3147
+ to generate the actual label.
3148
+
3149
+ See above for examples.
3150
+
3151
+ ``label`` cannot be used alongside ``uid``.
3152
+
3153
+ uid : str | Callable
3154
+ Alternative to ``label`` which is assumed to generate a unique cache file name. It has the same
3155
+ semantics as ``label``. When used, parameters to the decorated function are not hashed.
3156
+
3157
+ ``uid`` be used alongside ``label``.
3158
+
3159
+ name : str, optional
3160
+ Name of this function which is used either on its own if neither ``label`` not ``uid`` are used.
3161
+ If either of them is used, ``name`` is passed as a parameter to either the callable or the
3162
+ formatting operator.
3163
+
3164
+ If ``name`` is not specified it defaults to ``__qualname__`` expanded
3165
+ by the module name the function is defined in.
3166
+
2377
3167
  include_args : list[str]
2378
- List of arguments to include in generating a unqiue id, or None for all.
3168
+ List of arguments to include in generating an unqiue ID, or ``None`` for all.
2379
3169
 
2380
3170
  exclude_args : list[str]:
2381
- List of argumernts to exclude
3171
+ List of arguments to exclude from generating an unique ID.
2382
3172
 
2383
3173
  exclude_arg_types : list[type]
2384
- List of types to exclude.
3174
+ List of parameter types to exclude from generating an unique ID.
2385
3175
 
2386
3176
  version_auto_class : bool
2387
-
2388
-
3177
+ Whether to automaticallty add version dependencies on base classes or, for member functions, on containing
3178
+ classes. This is the ``auto_class`` parameter for :dec:`cdxcore.version.version`.
2389
3179
 
2390
3180
  Returns
2391
3181
  -------
2392
- A callable to execute F if need be.
2393
- This callable has a member 'cache_info' which can be used to access information on caching activity.
2394
-
2395
- Information available at any time after decoration:
2396
- F.cache_info.name : qualified name of the function
2397
- F.cache_info.signature : signature of the function
3182
+ Decorated F: Callable
3183
+
3184
+ A decorator ``cache(F)`` whose ``__call__`` implements the cached call to ``F``.
2398
3185
 
2399
- Additonal information available during a call to a decorated function F, and thereafter:
2400
- F.cache_info.version : unique version string reflecting all dependencies.
2401
- F.cache_info.uid : unique call ID.
2402
- F.cache_info.label : last id generated, or None (if id was a string and unique was True)
2403
- F.cache_info.arguments : arguments parsed to create a unique call ID, or None (if id was a string and unique was True)
3186
+ This callable has a member ``cache_info``
3187
+ of type :class:`cdxcore.subdir.CacheInfo`
3188
+ which can be used to access information on caching activity.
3189
+
3190
+ * Information available at any time after decoration:**
3191
+
3192
+ * ``F.cache_info.name`` : qualified name of the function
3193
+ * ``F.cache_info.signature`` : signature of the function
3194
+
3195
+ * Additonal information available during a call to a decorated function F, and thereafter:
3196
+
3197
+ * ``F.cache_info.version`` : unique version string reflecting all dependencies.
3198
+ * ``F.cache_info.filename`` : unique filename used for caching logic during the last function call.
3199
+ * ``F.cache_info.label`` : last label generated, or ``None``.
3200
+ * ``F.cache_info.arguments`` : arguments parsed to create a unique call ID, or ``None``.
2404
3201
 
2405
- Additonal information available after a call to F:
2406
- F.cache_info.last_cached : whether the last function call returned a cached object
3202
+ * Additonal information available after a call to ``F``:
3203
+
3204
+ * ``F.cache_info.last_cached`` : whether the last function call returned a cached object.
2407
3205
 
2408
- The function F has additional function parameters
2409
- override_cache_mode : allows to override caching mode temporarily, in particular "off"
2410
- track_cached_files : pass a CacheTracker object to keep track of all files used (loaded from or saved to).
2411
- This can be used to delete intermediary files when a large operation was completed.
3206
+ The decorated ``F()`` has additional function parameters, namely:
3207
+
3208
+ * ``override_cache_mode`` : allows to override caching mode temporarily, in particular you can set it to ``"off"``.
3209
+ * ``track_cached_files`` : allows passing a :class:`cdxcore.subdir.CacheTracker`
3210
+ object to keep track of all
3211
+ files used (loaded from or saved to).
3212
+ The function :meth:`cdxcore.subdir.CacheTracker.delete_cache_files` can be used
3213
+ to delete all files involved in caching.
2412
3214
  """
2413
3215
  return CacheCallable(subdir = self,
2414
3216
  version = version,
@@ -2428,14 +3230,153 @@ class SubDir(object):
2428
3230
  version_auto_class : bool = True
2429
3231
  ):
2430
3232
  """
2431
- Short cut for SubDir.cache() for classes
2432
- See SubDir.cache() for documentation.
3233
+ Short-cut for :dec:`cdxcore.subdir.SubDir.cache` applied to classes
3234
+ with a reduced number of available parameters.
3235
+
3236
+ Example::
3237
+
3238
+ cache = SubDir("!/.cache")
3239
+
3240
+ @cache.cache_class("0.1")
3241
+ class A(object):
3242
+
3243
+ @cache.cache(exclude_args=['debug'])
3244
+ def __init__(self, x, debug):
3245
+ if debug:
3246
+ print("__init__",x)
3247
+ self.x = x
3248
+
2433
3249
  """
2434
3250
  return self.cache( name=name,
2435
- version=version,
2436
- dependencies=dependencies,
2437
- version_auto_class=version_auto_class)
2438
-
3251
+ version=version,
3252
+ dependencies=dependencies,
3253
+ version_auto_class=version_auto_class)
3254
+
3255
+ # ========================================================================
3256
+ # Caching, convenience
3257
+ # ========================================================================
3258
+
3259
+ def VersionedCacheRoot( directory : str, *,
3260
+ ext : str = None,
3261
+ fmt : Format = None,
3262
+ create_directory : bool = False,
3263
+ **controller_kwargs
3264
+ ):
3265
+ """
3266
+ Create a root directory for versioned caching on disk
3267
+ using :dec:`cdxcore.subdir.SubDir.cache`.
3268
+
3269
+ **Usage:**
3270
+
3271
+ In a central file, define a root directory for all caching activity::
3272
+
3273
+ from cdxcore.subdir import VersionedCacheRoot
3274
+ vroot = VersionedCacheRoot("!/cache")
3275
+
3276
+ Create sub-directories as suitable, for example::
3277
+
3278
+ vtest = vroot("test")
3279
+
3280
+ Use these for caching::
3281
+
3282
+ @vtest.cache("1.0")
3283
+ def f1( x=1, y=2 ):
3284
+ print(x,y)
3285
+
3286
+ @vtest.cache("1.0", dps=[f1])
3287
+ def f2( x=1, y=2, z=3 ):
3288
+ f1( x,y )
3289
+ print(z)
3290
+
3291
+ Parameters
3292
+ ----------
3293
+ directory : str
3294
+ Name of the root directory for caching.
3295
+
3296
+ Using SubDir the following Short-cuts are supported:
3297
+
3298
+ * ``"!/dir"`` creates ``dir`` in the temporary directory.
3299
+ * ``"~/dir"`` creates ``dir`` in the home directory.
3300
+ * ``"./dir"`` creates ``dir`` relative to the current directory.
3301
+
3302
+ ext : str
3303
+ Extension, which will automatically be appended to file names.
3304
+ The default value depends on ``fmt`; for ``Format.PICKLE`` it is "pck".
3305
+
3306
+ fmt : :class:`cdxcore.subdir.Format`
3307
+ File format; if ``ext`` is not specified, the format drives the extension, too.
3308
+ Default is ``Format.PICKLE``.
3309
+
3310
+ create_directory : bool
3311
+ Whether to create the directory upon creation. Default is ``False``.
3312
+
3313
+ controller_kwargs: dict
3314
+ Parameters passed to :class:`cdxcore.subdir.CacheController``.
3315
+
3316
+ Common parameters used:
3317
+
3318
+ * ``exclude_arg_types``: list of types or names of types to exclude when auto-generating function
3319
+ signatures from function arguments.
3320
+ An example is :class:`cdxcore.verbose.Context` which is used to print progress messages.
3321
+
3322
+ * ``max_filename_length``: maximum filename length.
3323
+
3324
+ * ``hash_length``: length used for hashes, see :class:`cdxcore.uniquehash.UniqueHash`.
3325
+
3326
+ Returns
3327
+ -------
3328
+ Root : SubDir
3329
+ A root directory suitable for caching.
3330
+ """
3331
+ controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
3332
+ return SubDir( directory=directory, ext=ext, fmt=fmt, create_directory=create_directory, controller=controller )
3333
+
3334
+ version = version_decorator
3335
+
3336
+ class CacheTracker(object):
3337
+ """
3338
+ Utility class to track caching and be able to delete all dependent objects.
3339
+ """
3340
+ def __init__(self):
3341
+ """ track cache files """
3342
+ self._files = []
3343
+ def __iadd__(self, new_file):
3344
+ """ Add a new file to the tracker """
3345
+ self._files.append( new_file )
3346
+ def delete_cache_files(self):
3347
+ """ Delete all tracked files """
3348
+ for file in self._files:
3349
+ if os.path.exists(file):
3350
+ os.remove(file)
3351
+ self._files = []
3352
+ def __str__(self) -> str:#NOQA
3353
+ return f"Tracked: {self._files}"
3354
+ def __repr__(self) -> str:#NOQA
3355
+ return f"Tracked: {self._files}"
3356
+
3357
+ class CacheInfo(object):
3358
+ """
3359
+ Information on cfunctions decorated with :dec:`cdxcore.subdir.SubDir.cache`.
3360
+
3361
+ Functions decorated with :dec:`cdxcore.subdir.SubDir.cache`
3362
+ will have a member ``cache_info`` of this type
3363
+ """
3364
+ def __init__(self, name, F, keep_last_arguments):
3365
+ """
3366
+ :meta private:
3367
+ """
3368
+ self.name = name #: Decoded name of the function.
3369
+
3370
+ self.signature = inspect.signature(F) #: :func:`inspect.signature` of the function.
3371
+
3372
+ self.filename = None #: Unique filename of the last function call.
3373
+ self.label = None #: Label of the last function call.
3374
+ self.version = None #: Last version used.
3375
+
3376
+ self.last_cached = None #: Whether the last function call restored data from disk.
3377
+
3378
+ if keep_last_arguments:
3379
+ self.arguments = None #: Last arguments used. This member is only present if ``keep_last_arguments`` was set to ``True`` for the relevant :class:`cdxcore.subdir.CacheController`.
2439
3380
 
2440
3381
  def _ensure_has_version( F,
2441
3382
  version : str = None,
@@ -2486,8 +3427,9 @@ def _qualified_name( F, name ):
2486
3427
 
2487
3428
  class CacheCallable(object):
2488
3429
  """
2489
- Utility class for SubDir.cache_callable.
2490
- See documentation for that function.
3430
+ Wrapper for a cached function.
3431
+
3432
+ This is the wrapper returned by :dec:`cdxcore.subdir.SubDir.cache`.
2491
3433
  """
2492
3434
 
2493
3435
  def __init__(self,
@@ -2503,8 +3445,9 @@ class CacheCallable(object):
2503
3445
  version_auto_class : bool = True,
2504
3446
  name_of_name_arg : str = "name"):
2505
3447
  """
2506
- Utility class for SubDir.cache_callable.
2507
- See documentation for that function.
3448
+ Utility class for :dec:`cdxcore.subdir.SubDir.cache`.
3449
+
3450
+ *Do not use directly.*
2508
3451
  """
2509
3452
  if not label is None and not uid is None:
2510
3453
  error("Cannot specify both 'label' and 'uid'.")
@@ -2523,35 +3466,41 @@ class CacheCallable(object):
2523
3466
 
2524
3467
  @property
2525
3468
  def uid_or_label(self) -> Callable:
3469
+ """ ID or label """
2526
3470
  return self._uid if self._label is None else self._label
2527
3471
  @property
2528
3472
  def unique(self) -> bool:
3473
+ """ Whether the ID is unique """
2529
3474
  return not self._uid is None
2530
-
2531
3475
  @property
2532
- def cacheController(self) -> CacheController:
2533
- """ Returns the cache controller """
2534
- return self._subdir.cacheController
3476
+ def cache_controller(self) -> CacheController:
3477
+ """ Returns the :class:`cdxcore.subdir.CacheController` """
3478
+ return self._subdir.cache_controller
2535
3479
  @property
2536
- def cache_mode(self) -> Context:
2537
- return self.cacheController.cache_mode
3480
+ def cache_mode(self) -> CacheMode:
3481
+ """ Returns the :class:`cdxcore.subdir.CacheMode` of the underlying :class:`cdxcore.subdir.CacheController` """
3482
+ return self.cache_controller.cache_mode
2538
3483
  @property
2539
3484
  def debug_verbose(self) -> Context:
2540
- return self.cacheController.debug_verbose
3485
+ """ Returns the debug :class:`cdxcore.verbose.Context` used to print caching information, or ``None`` """
3486
+ return self.cache_controller.debug_verbose
2541
3487
  @property
2542
- def uniqueNamedFileName(self) -> Callable:
2543
- return self.cacheController.uniqueNamedFileName
3488
+ def labelledFileName(self) -> Callable:
3489
+ """ Returns ``labelledFileName()`` of the underlying :class:`cdxcore.subdir.CacheController` """
3490
+ return self.cache_controller.labelledFileName
2544
3491
  @property
2545
- def uniqueLabelledFileName(self) -> Callable:
2546
- return self.cacheController.uniqueLabelledFileName
3492
+ def uniqueFileName(self) -> Callable:
3493
+ """ Returns ``uniqueFileName()`` of the underlying :class:`cdxcore.subdir.CacheController` """
3494
+ return self.cache_controller.uniqueFileName
2547
3495
  @property
2548
3496
  def global_exclude_arg_types(self) -> list[type]:
2549
- return self.cacheController.exclude_arg_types
3497
+ """ Returns ``exclude_arg_types`` of the underlying :class:`cdxcore.subdir.CacheController` """
3498
+ return self.cache_controller.exclude_arg_types
2550
3499
 
2551
3500
  def __call__(self, F : Callable):
2552
3501
  """
2553
- Decorate 'F' as cachable callable. Can also decorate classes via ClassCallable()
2554
- See SubDir.cache() for documentation.
3502
+ Decorate ``F`` as cachable callable.
3503
+ See :dec:`cdxcore.subdir.SubDir.cache` for documentation.
2555
3504
  """
2556
3505
  if inspect.isclass(F):
2557
3506
  if not self._label is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'label' for __init__, not the class")
@@ -2566,11 +3515,13 @@ class CacheCallable(object):
2566
3515
  def _wrap_class(self, C : type):
2567
3516
  """
2568
3517
  Wrap class
3518
+
2569
3519
  This wrapper:
2570
- 1) Assigns a cdxbasics.version.version() for the class (if not yet present)
2571
- 2) Extracts from __init__ the wrapper to decorate __new__
3520
+
3521
+ * Assigns a :dec:`cdxcore.version.version` for the class (if not yet present).
3522
+ * Extracts from ``__init__`` the wrapper to decorate`` __new__``.
2572
3523
  """
2573
- debug_verbose = self.cacheController.debug_verbose
3524
+ debug_verbose = self.cache_controller.debug_verbose
2574
3525
 
2575
3526
  assert not inspect.isclass(C), ("Not a class", C)
2576
3527
 
@@ -2609,8 +3560,7 @@ class CacheCallable(object):
2609
3560
  """
2610
3561
  Decorate callable 'F'.
2611
3562
  """
2612
-
2613
- debug_verbose = self.cacheController.debug_verbose
3563
+ debug_verbose = self.cache_controller.debug_verbose
2614
3564
  assert not inspect.isclass(F), ("Internal error")
2615
3565
 
2616
3566
  # check validity
@@ -2712,21 +3662,24 @@ class CacheCallable(object):
2712
3662
  # determine unique id_ for this function call
2713
3663
  # -------------------------------------------
2714
3664
 
2715
- label = None
2716
- uid = None
2717
3665
  uid_or_label = self.uid_or_label
3666
+ filename = None
2718
3667
  if isinstance(uid_or_label, str) and self.unique:
2719
- # if 'id' does not contain formatting codes, and the result is 'unique' then do not bother collecting
3668
+ # if 'id' does not contain formatting codes,
3669
+ # and the result is 'unique' then do not bother collecting
2720
3670
  # function arguments
2721
3671
  try:
2722
- uid = uid_or_label.format() # throws a KeyError if 'id' contains formatting information
3672
+ filename = uid_or_label.format() # throws a KeyError if 'id' contains formatting information
2723
3673
  except KeyError:
2724
3674
  pass
2725
3675
 
2726
- if not uid is None:
3676
+ if not filename is None:
2727
3677
  # generate name with the unique string provided by the user
2728
- label = uid
2729
- uid = self.uniqueLabelledFileName( self.id )
3678
+ if not is_filename(filename):
3679
+ raise ValueError(f"The unique filename '{filename}' computed for '{name}' contains invalid characters for filename. When using `uid` make sure that "+\
3680
+ "the returned ID is a valid filename (and unique)")
3681
+ label = filename
3682
+ filename = self.uniqueFileName( filename )
2730
3683
  arguments = None
2731
3684
 
2732
3685
  else:
@@ -2767,9 +3720,9 @@ class CacheCallable(object):
2767
3720
  if arg in arguments:
2768
3721
  del arguments[arg]
2769
3722
 
2770
- # apply logics
3723
+ # did the user provide a label or unique ID?
2771
3724
  if uid_or_label is None:
2772
- label = name
3725
+ uid_or_label = name
2773
3726
 
2774
3727
  else:
2775
3728
  if self._name_of_name_arg in arguments:
@@ -2789,24 +3742,30 @@ class CacheCallable(object):
2789
3742
  # call format or function
2790
3743
  if isinstance( uid_or_label, str ):
2791
3744
  try:
2792
- label = str.format( uid_or_label, **arguments )
3745
+ uid_or_label = str.format( uid_or_label, **arguments )
2793
3746
  except KeyError as e:
2794
3747
  raise KeyError(e, f"Error while generating id for '{name}' using format string '{uid_or_label}': {e}. Available arguments: {list(arguments)}")
2795
3748
 
2796
3749
  else:
2797
3750
  which = 'uid' if not self._uid is None else 'label'
2798
3751
  try:
2799
- label = uid_or_label(**arguments)
3752
+ uid_or_label = uid_or_label(**arguments)
2800
3753
  except TypeError as e:
2801
3754
  raise TypeError(e, f"Error while generating '{which}' for '{name}' using a function: {e}. Available arguments: {list(arguments)}")
2802
3755
  except Exception as e:
2803
3756
  raise type(e)(f"Error while generating '{which}' for '{name}': attempt to call '{which}' of type {type(uid_or_label)} failed: {e}")
2804
- assert isinstance(label, str), ("Error:", which,"callable must return a string. Found",type(label))
3757
+ assert isinstance(uid_or_label, str), ("Error:", which, "callable must return a string. Found",type(uid_or_label))
2805
3758
 
2806
3759
  if self.unique:
2807
- uid = self.uniqueLabelledFileName( label )
3760
+ if not is_filename(uid_or_label):
3761
+ raise ValueError(f"The unique filename '{uid_or_label}' computed for '{name}' contains invalid characters for filename. When using `uid` make sure that "+\
3762
+ "the returned filename is indeed a valid filename (and unique)")
3763
+
3764
+ label = uid_or_label
3765
+ filename = self.uniqueFileName( uid_or_label )
2808
3766
  else:
2809
- uid = self.uniqueNamedFileName( label, **arguments )
3767
+ label = uid_or_label
3768
+ filename = self.labelledFileName( uid_or_label, **arguments )
2810
3769
 
2811
3770
  # determine version, cache mode
2812
3771
  # ------------------
@@ -2818,11 +3777,11 @@ class CacheCallable(object):
2818
3777
  # store process information
2819
3778
  # -------------------------
2820
3779
 
2821
- execute.cache_info.label = str(label) if not label is None else None
2822
- execute.cache_info.uid = uid
2823
- execute.cache_info.version = version_
3780
+ execute.cache_info.label = str(label) if not label is None else None
3781
+ execute.cache_info.filename = filename
3782
+ execute.cache_info.version = version_
2824
3783
 
2825
- if self.cacheController.keep_last_arguments:
3784
+ if self.cache_controller.keep_last_arguments:
2826
3785
  info_arguments = OrderedDict()
2827
3786
  for argname, argvalue in arguments.items():
2828
3787
  info_arguments[argname] = str(argvalue)[:100]
@@ -2833,26 +3792,26 @@ class CacheCallable(object):
2833
3792
  # ---------------
2834
3793
 
2835
3794
  if cache_mode.delete:
2836
- self._subdir.delete( uid )
3795
+ self._subdir.delete( filename )
2837
3796
  elif cache_mode.read:
2838
3797
  class Tag:
2839
3798
  pass
2840
3799
  tag = Tag()
2841
3800
  if not is_new:
2842
- r = self._subdir.read( uid, tag, version=version_ )
3801
+ r = self._subdir.read( filename, tag, version=version_ )
2843
3802
  else:
2844
3803
  try:
2845
3804
  execute.__new_during_read = True
2846
- r = self._subdir.read( uid, tag, version=version_ )
3805
+ r = self._subdir.read( filename, tag, version=version_ )
2847
3806
  finally:
2848
3807
  execute.__new_during_read = False
2849
3808
 
2850
3809
  if not r is tag:
2851
3810
  if not track_cached_files is None:
2852
- track_cached_files += self._fullFileName(uid)
3811
+ track_cached_files += self._fullFileName(filename)
2853
3812
  execute.cache_info.last_cached = True
2854
3813
  if not debug_verbose is None:
2855
- debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.fullFileName(uid)}'.")
3814
+ debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.full_file_name(filename)}'.")
2856
3815
  if is_new:
2857
3816
  assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__", F.__qualname__, label)
2858
3817
  r.__magic_cache_call_init__ = False # since we called __new__, __init__ will be called next
@@ -2871,9 +3830,9 @@ class CacheCallable(object):
2871
3830
  assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__")
2872
3831
 
2873
3832
  if cache_mode.write:
2874
- self._subdir.write(uid,r,version=version_)
3833
+ self._subdir.write(filename,r,version=version_)
2875
3834
  if not track_cached_files is None:
2876
- track_cached_files += self._subdir.fullFileName(uid)
3835
+ track_cached_files += self._subdir.full_file_name(filename)
2877
3836
  execute.cache_info.last_cached = False
2878
3837
 
2879
3838
  if is_new:
@@ -2883,81 +3842,21 @@ class CacheCallable(object):
2883
3842
 
2884
3843
  if not debug_verbose is None:
2885
3844
  if cache_mode.write:
2886
- debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.fullFileName(uid)}'.")
3845
+ debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.full_file_name(filename)}'.")
2887
3846
  else:
2888
- debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.fullFileName(uid)}'.")
3847
+ debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.full_file_name(filename)}'.")
2889
3848
  return r
2890
3849
 
2891
3850
  update_wrapper( wrapper=execute, wrapped=F )
2892
- execute.cache_info = CacheInfo()
2893
-
2894
- execute.cache_info.name = name # decoded name of the function
2895
- execute.cache_info.signature = inspect.signature(F) # signature of the function
2896
-
2897
- execute.cache_info.uid = None # last function call ID
2898
- execute.cache_info.label = None # last unique file name cached to
2899
- execute.cache_info.version = None # last version used
2900
-
2901
- execute.cache_info.last_cached = None # last function call restored from disk?
2902
-
2903
- if self.cacheController.keep_last_arguments:
2904
- execute.cache_info.arguments = None # last function call arguments dictionary of strings
3851
+ execute.cache_info = CacheInfo(name, F, self.cache_controller.keep_last_arguments)
2905
3852
 
2906
3853
  if is_new:
2907
3854
  execute.__new_during_read = False
2908
3855
 
2909
3856
  if not debug_verbose is None:
2910
3857
  debug_verbose.write(f"cache({name}): {'function' if not is_new else 'class constructor function'} registered for caching into '{self._subdir.path}'.")
2911
- self.cacheController.versioned[name] = execute
3858
+ self.cache_controller.versioned[name] = execute
2912
3859
  return execute
2913
3860
 
2914
- def VersionedCacheRoot( directory : str, *,
2915
- ext : str = None,
2916
- fmt : Format = None,
2917
- createDirectory : bool = None,
2918
- **controller_kwargs
2919
- ):
2920
- """
2921
- Create a root directory for versioning caching on disk
2922
-
2923
- Usage:
2924
- In a central file, define a root directory
2925
- vroot = VersionedCacheRoot("!/cache")
2926
3861
 
2927
- and a sub-directory
2928
- vtest = vroot("test")
2929
-
2930
- @vtest.cache("1.0")
2931
- def f1( x=1, y=2 ):
2932
- print(x,y)
2933
-
2934
- @vtest.cache("1.0", dps=[f1])
2935
- def f2( x=1, y=2, z=3 ):
2936
- f1( x,y )
2937
- print(z)
2938
-
2939
- Parameters
2940
- ----------
2941
- directory : name of the directory. Using SubDir the following short cuts are supported:
2942
- "!/dir" creates 'dir' in the temporary directory
2943
- "~/dir" creates 'dir' in the home directory
2944
- "./dir" created 'dir' relative to the current directory
2945
- ext : extension, which will automatically be appended to file names (see SubDir). Default depends on format. For Format.PICKLE it is 'pck'
2946
- fmt : format, see SubDir.Format. Default is Format.PICKLE
2947
- createDirectory : whether to create the directory upon creation. Default is no.
2948
- controller_kwargs: parameters passed to VersionController, for example:
2949
- exclude_arg_types : list of types or names of types to exclude when auto-generating function signatures from function arguments.
2950
- A standard example from cdxbasics is "Context" as it is used to print progress messages.
2951
- max_filename_length : maximum filename length
2952
- hash_length: length used for hashes, see cdxbasics.util.uniqueHash()
2953
-
2954
- Returns
2955
- -------
2956
- A root cache directory
2957
- """
2958
- controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
2959
- return SubDir( directory=directory, ext=ext, fmt=fmt, createDirectory=createDirectory, controller=controller )
2960
3862
 
2961
- version = version_decorator
2962
-
2963
-