cdxcore 0.1.6__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cdxcore might be problematic. Click here for more details.
- cdxcore/__init__.py +1 -9
- cdxcore/config.py +1188 -521
- cdxcore/crman.py +95 -25
- cdxcore/err.py +371 -0
- cdxcore/pretty.py +468 -0
- cdxcore/pretty.py_bak.py +750 -0
- cdxcore/subdir.py +2238 -1339
- cdxcore/uniquehash.py +515 -363
- cdxcore/util.py +358 -417
- cdxcore/verbose.py +683 -248
- cdxcore/version.py +399 -140
- cdxcore-0.1.10.dist-info/METADATA +27 -0
- cdxcore-0.1.10.dist-info/RECORD +35 -0
- {cdxcore-0.1.6.dist-info → cdxcore-0.1.10.dist-info}/top_level.txt +2 -1
- docs/source/conf.py +123 -0
- tests/test_config.py +500 -0
- tests/test_crman.py +54 -0
- tests/test_err.py +86 -0
- tests/test_pretty.py +404 -0
- tests/test_subdir.py +289 -0
- tests/test_uniquehash.py +159 -144
- tests/test_util.py +122 -83
- tests/test_verbose.py +119 -0
- tests/test_version.py +153 -0
- up/git_message.py +2 -2
- cdxcore/logger.py +0 -319
- cdxcore/prettydict.py +0 -388
- cdxcore/prettyobject.py +0 -64
- cdxcore-0.1.6.dist-info/METADATA +0 -1418
- cdxcore-0.1.6.dist-info/RECORD +0 -30
- conda/conda_exists.py +0 -10
- conda/conda_modify_yaml.py +0 -42
- tests/_cdxbasics.py +0 -1086
- {cdxcore-0.1.6.dist-info → cdxcore-0.1.10.dist-info}/WHEEL +0 -0
- {cdxcore-0.1.6.dist-info → cdxcore-0.1.10.dist-info}/licenses/LICENSE +0 -0
- {cdxcore → tmp}/deferred.py +0 -0
- {cdxcore → tmp}/dynaplot.py +0 -0
- {cdxcore → tmp}/filelock.py +0 -0
- {cdxcore → tmp}/np.py +0 -0
- {cdxcore → tmp}/npio.py +0 -0
- {cdxcore → tmp}/sharedarray.py +0 -0
cdxcore/subdir.py
CHANGED
|
@@ -1,61 +1,387 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
2
|
+
Overview
|
|
3
|
+
--------
|
|
4
|
+
|
|
5
|
+
This module contains utilities for file i/o, directory management and
|
|
6
|
+
streamlined versioned caching.
|
|
7
|
+
|
|
8
|
+
The key idea is to provide transparent, concise :mod:`pickle` access to the file system
|
|
9
|
+
via the :class:`cdxcore.subdir.SubDir` class.
|
|
10
|
+
|
|
11
|
+
**Key design features:**
|
|
12
|
+
|
|
13
|
+
* Simple path construction via ``()`` operator. By default directories which do not exist yet
|
|
14
|
+
are only created upon writing a first file.
|
|
15
|
+
|
|
16
|
+
* Files managed by :class:`cdxcore.subdir.SubDir` all have the same extension.
|
|
17
|
+
|
|
18
|
+
* Files support fast versioning: the version of a file can be read without having to read the
|
|
19
|
+
entire file.
|
|
20
|
+
|
|
21
|
+
* :dec:`cdxcore.subdir.SubDir.cache` implements a convenient versioned caching framework.
|
|
22
|
+
|
|
23
|
+
Directories
|
|
24
|
+
^^^^^^^^^^^
|
|
25
|
+
|
|
26
|
+
The core of the framework is the :class:`cdxcore.subdir.SubDir` class which represents a directory
|
|
27
|
+
with files of a given extension.
|
|
28
|
+
|
|
29
|
+
Simply write::
|
|
30
|
+
|
|
31
|
+
from cdxcore.subdir import SubDir
|
|
32
|
+
subdir = SubDir("my_directory") # relative to current working directory
|
|
33
|
+
subdir = SubDir("./my_directory") # relative to current working directory
|
|
34
|
+
subdir = SubDir("~/my_directory") # relative to home directory
|
|
35
|
+
subdir = SubDir("!/my_directory") # relative to default temp directory
|
|
36
|
+
|
|
37
|
+
Note that ``my_directoy`` will not be created if it does not exist yet. It will be created the first
|
|
38
|
+
time we write a file.
|
|
39
|
+
|
|
40
|
+
You can specify a parent for relative path names::
|
|
41
|
+
|
|
42
|
+
from cdxcore.subdir import SubDir
|
|
43
|
+
subdir = SubDir("my_directory", "~") # relative to home directory
|
|
44
|
+
subdir = SubDir("my_directory", "!") # relative to default temp directory
|
|
45
|
+
subdir = SubDir("my_directory", ".") # relative to current directory
|
|
46
|
+
subdir2 = SubDir("my_directory", subdir) # subdir2 is relative to `subdir`
|
|
47
|
+
|
|
48
|
+
Change the extension to "bin"::
|
|
49
|
+
|
|
50
|
+
from cdxcore.subdir import SubDir
|
|
51
|
+
subdir = SubDir("~/my_directory;*.bin")
|
|
52
|
+
subdir = SubDir("~/my_directory", ext="bin")
|
|
53
|
+
subdir = SubDir("my_directory", "~", ext="bin")
|
|
54
|
+
|
|
55
|
+
You can turn off extension management by setting the extension to ""::
|
|
56
|
+
|
|
57
|
+
from cdxcore.subdir import SubDir
|
|
58
|
+
subdir = SubDir("~/my_directory", ext="")
|
|
59
|
+
|
|
60
|
+
You can also use :meth:`cdxcore.subdir.SubDir.__call__` to generate sub directories.
|
|
61
|
+
|
|
62
|
+
from cdxcore.subdir import SubDir
|
|
63
|
+
parent = SubDir("~/parent")
|
|
64
|
+
subdir = parent("subdir")
|
|
65
|
+
|
|
66
|
+
Be aware that when the operator :meth:`cdxcore.subdir.SubDir.__call__`
|
|
67
|
+
is called with two keyword arguments, then it reads files.
|
|
68
|
+
|
|
69
|
+
You can obtain a list of all sub directories in a directory by using :meth:`cdxcore.subdir.SubDir.sub_dirs`.
|
|
70
|
+
The list of files with the corresponding extension is accessible via :meth:`cdxcore.subdir.SubDir.files`.
|
|
71
|
+
|
|
72
|
+
File Format
|
|
73
|
+
^^^^^^^^^^^
|
|
74
|
+
|
|
75
|
+
:class:`cdxcore.subdir.SubDir` supports file i/o with a number of different file formats
|
|
76
|
+
via :class:`cdxcore.subdir.Format`.
|
|
77
|
+
|
|
78
|
+
* "PICKLE": standard pickling with default extension is "pck".
|
|
79
|
+
|
|
80
|
+
* "JSON_PICKLE": uses the :mod:`jsonpickle` package; default extension "jpck".
|
|
81
|
+
The advantage of this format over "PICKLE" is that it is somewhat human-readable.
|
|
82
|
+
However, ``jsonpickle`` uses compressed formats for complex objects such as :mod:`numpy`
|
|
83
|
+
arrays, hence readablility is somewhat limited. Using "JSON_PICKLE"
|
|
84
|
+
comes at cost of slower i/o speed.
|
|
85
|
+
|
|
86
|
+
* "JSON_PLAIN": calls :func:`cdxcore.util.plain` is used to generate human readable files
|
|
87
|
+
which cannot be loaded back from disk.
|
|
88
|
+
In this mode ``SubDir`` converts objects into plain Python objects before using :mod:`json`
|
|
89
|
+
to write them to disk.
|
|
90
|
+
That means that deserialized data does not have the correct object structure
|
|
91
|
+
to be able to restore files written in "JSON_PLAIN".
|
|
92
|
+
However, such files are much easier to read.
|
|
93
|
+
|
|
94
|
+
* "BLOSC" uses `blosc <https://github.com/blosc/python-blosc>`__
|
|
95
|
+
to read/write compressed binary data. The blosc compression algorithm is very fast,
|
|
96
|
+
hence using this mode will not usually lead to notably slower performanbce than using
|
|
97
|
+
"PICKLE" but will generate smaller files, depending on your data structure.
|
|
98
|
+
|
|
99
|
+
The default extension for "BLOSC" is "zbsc".
|
|
100
|
+
|
|
101
|
+
* "GZIP": uses :mod:`gzip` to
|
|
102
|
+
to read/write compressed binary data. The default extension is "pgz".
|
|
103
|
+
|
|
104
|
+
Summary of properties:
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
108
|
+
| Format | Restores objects | Human readable | Speed | Compression | Extension |
|
|
109
|
+
+==============+==================+================+=======+=============+===========+
|
|
110
|
+
| PICKLE | yes | no | high | no | .pck |
|
|
111
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
112
|
+
| JSON_PLAIN | no | yes | low | no | .json |
|
|
113
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
114
|
+
| JSON_PICKLE | yes | limited | low | no | .jpck |
|
|
115
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
116
|
+
| BLOSC | yes | no | high | yes | .zbsc |
|
|
117
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
118
|
+
| GZIP | yes | no | high | yes | .pgz |
|
|
119
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
You may specify the file format when instantiating :class:`cdxcore.subdir.SubDir`::
|
|
123
|
+
|
|
124
|
+
from cdxcore.subdir import SubDir
|
|
125
|
+
subdir = SubDir("~/my_directory", fmt=SubDir.PICKLE)
|
|
126
|
+
subdir = SubDir("~/my_directory", fmt=SubDir.JSON_PICKLE)
|
|
127
|
+
...
|
|
128
|
+
|
|
129
|
+
If ``ext`` is not specified the extension will defaulted to
|
|
130
|
+
the respective default extension of the format requested.
|
|
131
|
+
|
|
132
|
+
Reading Files
|
|
133
|
+
^^^^^^^^^^^^^
|
|
134
|
+
|
|
135
|
+
To read the data contained in a ``file`` from our subdirectory
|
|
136
|
+
with its reference extension use :meth:`cdxcore.subdir.SubDir.read`::
|
|
137
|
+
|
|
138
|
+
from cdxcore.subdir import SubDir
|
|
139
|
+
subdir = SubDir("!/test")
|
|
140
|
+
|
|
141
|
+
data = subdir.read("file") # returns the default `None` if file is not found
|
|
142
|
+
data = subdir.read("file", default=[]) # returns the default [] if file is not found
|
|
143
|
+
|
|
144
|
+
This function will return the "default"``" (which in turns defaults to ``None``)
|
|
145
|
+
if ``file.ext`` does not exist.
|
|
146
|
+
You can opt for :meth:`cdxcore.subdir.SubDir.read` to raise an error instead of returning a default
|
|
147
|
+
by using ``raise_on_error=True``::
|
|
148
|
+
|
|
149
|
+
data = subdir.read("file", raise_on_error=True) # raises 'KeyError' if not found
|
|
150
|
+
|
|
151
|
+
When calling ``read()`` you may specify an alternative extension::
|
|
152
|
+
|
|
153
|
+
data = subdir.read("file", ext="bin") # change extension to "bin"
|
|
154
|
+
data = subdir.read("file.bin", ext="") # no automatic extension
|
|
155
|
+
|
|
156
|
+
Specifying a different format for :meth:`cdxcore.subdir.SubDir.read` only changes
|
|
157
|
+
the extension automatically if you have not overwritten it before:
|
|
158
|
+
|
|
159
|
+
.. code-block:: python
|
|
160
|
+
|
|
161
|
+
subdir = SubDir("!/test") # default format PICKLE with extension pck
|
|
162
|
+
data = subdir.read("file", fmt=Subdir.JSON_PICKLE ) # uses "json" extension
|
|
163
|
+
|
|
164
|
+
subdir = SubDir("!/test", ext="bin") # user-specified extension
|
|
165
|
+
data = subdir.read("file", fmt=Subdir.JSON_PICKLE ) # keeps using "bin"
|
|
166
|
+
|
|
167
|
+
You can also use the :meth:`cdxcore.subdir.SubDir.__call__` to read files, in which case you must specify a default value
|
|
168
|
+
(if you don't, then the operator will return a sub directory)::
|
|
169
|
+
|
|
170
|
+
data = subdir("file", None) # returns None if file is not found
|
|
171
|
+
|
|
172
|
+
You can also use item notation to access files.
|
|
173
|
+
In this case, though, an error will be thrown if the file does not exist::
|
|
174
|
+
|
|
175
|
+
data = subdir['file'] # raises KeyError if file is not found
|
|
176
|
+
|
|
177
|
+
You can read a range of files in one function call::
|
|
178
|
+
|
|
179
|
+
data = subdir.read( ["file1", "file2"] ) # returns list
|
|
180
|
+
|
|
181
|
+
Finally, you can also iterate through all existing files using iterators::
|
|
182
|
+
|
|
183
|
+
# manual loading
|
|
184
|
+
for file in subdir:
|
|
185
|
+
data = subdir.read(file)
|
|
186
|
+
...
|
|
187
|
+
|
|
188
|
+
# automatic loading, with "None" as a default
|
|
189
|
+
for file, data in subdir.items():
|
|
190
|
+
...
|
|
191
|
+
|
|
192
|
+
To obtain a list of all files in our directory which have the correct extension, use :meth:`cdxcore.subdir.SubDir.files`.
|
|
193
|
+
|
|
194
|
+
Writing Files
|
|
195
|
+
^^^^^^^^^^^^^
|
|
6
196
|
|
|
197
|
+
Writing files mirrors reading them::
|
|
7
198
|
|
|
8
|
-
import
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
199
|
+
from cdxcore.subdir import SubDir
|
|
200
|
+
subdir = SubDir("!/test")
|
|
201
|
+
|
|
202
|
+
subdir.write("file", data)
|
|
203
|
+
subdir['file'] = data
|
|
204
|
+
|
|
205
|
+
You may specifify different a extension::
|
|
206
|
+
|
|
207
|
+
subdir.write("file", data, ext="bin")
|
|
208
|
+
|
|
209
|
+
You can also specify a file :class:`cdxcore.subdir.Format`.
|
|
210
|
+
The extension will be changed automatically if you have not set it manually::
|
|
211
|
+
|
|
212
|
+
subdir = SubDir("!/test")
|
|
213
|
+
subdir.write("file", data, fmt=SubDir.JSON_PICKLE ) # will write to "file.json"
|
|
214
|
+
|
|
215
|
+
To write several files at once, write::
|
|
216
|
+
|
|
217
|
+
subdir.write(["file1", "file"], [data1, data2])
|
|
218
|
+
|
|
219
|
+
Note that when writing to a file, :meth:`cdxcore.subdir.SubDir.write`
|
|
220
|
+
will first write to a temporary file, and then rename this file into the target file name.
|
|
221
|
+
The temporary file name is generated by applying :func:`cdxcore.uniquehash.unique_hash48`
|
|
222
|
+
to the
|
|
223
|
+
target file name,
|
|
224
|
+
current time, process and thread ID, as well as the machines's UUID.
|
|
225
|
+
his is done to reduce collisions between processes/machines accessing the same files,
|
|
226
|
+
potentially accross a network.
|
|
227
|
+
It does not remove collision risk entirely, though.
|
|
228
|
+
|
|
229
|
+
Filenames
|
|
230
|
+
^^^^^^^^^
|
|
231
|
+
|
|
232
|
+
:class:`cdxcore.subdir.SubDir` transparently handles directory access and extensions.
|
|
233
|
+
That means a user usually only uses ``file`` names which do not contain either.
|
|
234
|
+
To obtain the full qualified filename given a "file" use :meth:`cdxcore.subdir.SubDir.full_file_name`.
|
|
235
|
+
|
|
236
|
+
Reading and Writing Versioned Files
|
|
237
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
238
|
+
|
|
239
|
+
:class:`cdxcore.subdir.SubDir` supports versioned files.
|
|
240
|
+
If versions are used, then they *must* be used for both reading and writing.
|
|
241
|
+
:dec:`cdxcore.version.version` provides a standard decorator framework for definining
|
|
242
|
+
versions for classes and functions including the version dependencies.
|
|
243
|
+
|
|
244
|
+
If a ``version`` is provided to :func:`cdxcore.subdir.SubDir.write`
|
|
245
|
+
then ``SubDir`` will write the version in a block ahead of the main content of the file.
|
|
246
|
+
In case of the PICKLE format, this is a byte string. In case of JSON_PLAIN and JSON_PICKLE this is line of
|
|
247
|
+
text starting with ``#`` ahead of the file. (Note that this violates
|
|
248
|
+
the JSON file format.)
|
|
249
|
+
|
|
250
|
+
Writing short version block ahead of the main data allows :func:`cdxcore.subdir.SubDir.read`
|
|
251
|
+
reading this version information back quickly without needing to read the entire file.
|
|
252
|
+
``read()`` does attempt so if its called with a ``version`` parameter.
|
|
253
|
+
In this case it will compare the read version with the provided version,
|
|
254
|
+
and only return the main content of the file if versions match.
|
|
255
|
+
|
|
256
|
+
Use :func:`cdxcore.subdir.SubDir.is_version` to check whether a given file has a specific version.
|
|
257
|
+
Like ``read()`` this function only reads the information required to obtain the information and will
|
|
258
|
+
be much faster than reading the whole file.
|
|
259
|
+
|
|
260
|
+
*Important:* note that if a file was written, it has to be read again with a test version.
|
|
261
|
+
You can specify ``version="*"`` for :func:`cdxcore.subdir.SubDir.read` to match any version.
|
|
262
|
+
|
|
263
|
+
**Examples:**
|
|
264
|
+
|
|
265
|
+
Writing a versioned file::
|
|
266
|
+
|
|
267
|
+
from cdxcore.subdir import SubDir
|
|
268
|
+
sub_dir = SubDir("!/test_version)
|
|
269
|
+
sub_dir.write("test", [1,2,3], version="0.0.1" )
|
|
270
|
+
|
|
271
|
+
To read ``[1,2,3]`` from "test" we need to use the correct version::
|
|
272
|
+
|
|
273
|
+
_ = sub_dir.read("test", version="0.0.1")
|
|
274
|
+
|
|
275
|
+
The following will not read "test" as the versions do not match::
|
|
276
|
+
|
|
277
|
+
_ = sub_dir.read("test", version="0.0.2")
|
|
278
|
+
|
|
279
|
+
By default :func:`cdxcore.subdir.SubDir.read`
|
|
280
|
+
will not fail if a version mismatch is encountered; rather it will
|
|
281
|
+
attempt to delete the file and then return the ``default`` value.
|
|
282
|
+
|
|
283
|
+
This can be turned off
|
|
284
|
+
with the keyword ``delete_wrong_version`` set to ``False``.
|
|
285
|
+
|
|
286
|
+
You can ignore the version used to write a file by using `*` as version::
|
|
287
|
+
|
|
288
|
+
_ = sub_dir.read("test", version="*")
|
|
289
|
+
|
|
290
|
+
Note that reading files which have been written with a ``version`` without
|
|
291
|
+
``version`` keyword will fail because ``SubDir`` will only append additional version information
|
|
292
|
+
to the file if required.
|
|
293
|
+
|
|
294
|
+
Test existence of Files
|
|
295
|
+
^^^^^^^^^^^^^^^^^^^^^^^
|
|
296
|
+
|
|
297
|
+
To test existence of 'file' in a directory, use one of::
|
|
298
|
+
|
|
299
|
+
subdir.exist('file')
|
|
300
|
+
'file' in subdir
|
|
301
|
+
|
|
302
|
+
Deleting files
|
|
303
|
+
^^^^^^^^^^^^^^
|
|
304
|
+
|
|
305
|
+
To delete a 'file', use any of the following::
|
|
306
|
+
|
|
307
|
+
subdir.delete("file")
|
|
308
|
+
del subdir['file']
|
|
309
|
+
|
|
310
|
+
All of these are *silent*, and will not throw errors if "file" does not exist.
|
|
311
|
+
In order to throw an error use::
|
|
312
|
+
|
|
313
|
+
subdir.delete('file', raise_on_error=True)
|
|
314
|
+
|
|
315
|
+
A few member functions assist in deleting a number of files:
|
|
316
|
+
|
|
317
|
+
* :func:`cdxcore.subdir.SubDir.delete_all_files`: delete all files in the directory with matching extension. Do not delete sub directories, or files with extensions different to our own.
|
|
318
|
+
* :func:`cdxcore.subdir.SubDir.delete_all_content`: delete all files with our extension, including in all sub-directories. If a sub-directory is left empty
|
|
319
|
+
upon ``delete_all_content`` delete it, too.
|
|
320
|
+
* :func:`cdxcore.subdir.SubDir.delete_everything`: deletes *everything*, not just files with matching extensions.
|
|
321
|
+
|
|
322
|
+
Caching
|
|
323
|
+
^^^^^^^
|
|
324
|
+
|
|
325
|
+
A :class:`cdxcore.subdir.SubDir` object offers an advanced context for caching calls to :class:`collection.abc.Callable``
|
|
326
|
+
objects with :dec:`cdxcore.subdir.SubDir.cache`.
|
|
327
|
+
|
|
328
|
+
This involves keying the cache by the function name and its current parameters using :class:`cdxcore.uniquehash.UniqueHash`,
|
|
329
|
+
and monitoring the functions version using :dec:`cdxcore.version.version`. The caching behaviour itself can be controlled by
|
|
330
|
+
specifying the desired :class:`cdxcore.subdir.CacheMode`.
|
|
331
|
+
|
|
332
|
+
Import
|
|
333
|
+
------
|
|
334
|
+
.. code-block:: python
|
|
335
|
+
|
|
336
|
+
import cdxcore.uniquehash as uniquehash
|
|
337
|
+
|
|
338
|
+
Documentation
|
|
339
|
+
-------------
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
import os as os
|
|
343
|
+
import uuid as uuid
|
|
344
|
+
import threading as threading
|
|
345
|
+
import pickle as pickle
|
|
346
|
+
import tempfile as tempfile
|
|
347
|
+
import shutil as shutil
|
|
348
|
+
import datetime as datetime
|
|
349
|
+
import inspect as inspect
|
|
350
|
+
import platform as platform
|
|
17
351
|
from collections import OrderedDict
|
|
18
|
-
from collections.abc import Collection, Mapping, Callable
|
|
352
|
+
from collections.abc import Collection, Mapping, Callable, Iterable
|
|
19
353
|
from enum import Enum
|
|
20
|
-
import json as json
|
|
21
|
-
import platform as platform
|
|
22
354
|
from functools import update_wrapper
|
|
23
|
-
import warnings as warnings
|
|
24
355
|
|
|
25
|
-
import
|
|
26
|
-
import jsonpickle as jsonpickle
|
|
27
|
-
import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
28
|
-
import zlib as zlib
|
|
356
|
+
import json as json
|
|
29
357
|
import gzip as gzip
|
|
30
358
|
import blosc as blosc
|
|
31
|
-
|
|
32
|
-
from .
|
|
359
|
+
import sys as sys
|
|
360
|
+
from .err import verify, error, warn, fmt as txtfmt
|
|
361
|
+
from .pretty import PrettyObject
|
|
33
362
|
from .verbose import Context
|
|
34
|
-
from .version import Version, version as version_decorator
|
|
35
|
-
from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP,
|
|
36
|
-
from .uniquehash import
|
|
37
|
-
|
|
38
|
-
def error( text, *args, exception = RuntimeError, **kwargs ):
|
|
39
|
-
raise exception( txtfmt(text, *args, **kwargs) )
|
|
40
|
-
def verify( cond, text, *args, exception = RuntimeError, **kwargs ):
|
|
41
|
-
if not cond:
|
|
42
|
-
error( text, *args, **kwargs, exception=exception )
|
|
43
|
-
def warn( text, *args, warning=warnings.RuntimeWarning, stack_level=1, **kwargs ):
|
|
44
|
-
warnings.warn( txtfmt(text, *args, **kwargs), warning, stack_level=stack_level )
|
|
363
|
+
from .version import Version, version as version_decorator, VersionError
|
|
364
|
+
from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP, plain, is_filename
|
|
365
|
+
from .uniquehash import unique_hash48, UniqueLabel, NamedUniqueHash
|
|
366
|
+
|
|
45
367
|
|
|
46
368
|
"""
|
|
369
|
+
:meta private:
|
|
47
370
|
compression
|
|
48
371
|
"""
|
|
49
|
-
|
|
372
|
+
|
|
373
|
+
def _import_jsonpickle():
|
|
374
|
+
""" For some dodgy reason importing `jsonpickle` normally causes my tests to fail with a recursion error """
|
|
375
|
+
jsonpickle = sys.modules.get('jsonpickle', None)
|
|
376
|
+
if jsonpickle is None:
|
|
377
|
+
import jsonpickle as jsonpickle
|
|
378
|
+
import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
379
|
+
jsonpickle_numpy.register_handlers()
|
|
380
|
+
return jsonpickle
|
|
381
|
+
|
|
50
382
|
BLOSC_MAX_BLOCK = 2147483631
|
|
51
383
|
BLOSC_MAX_USE = 1147400000 # ... blosc really cannot handle large files
|
|
52
|
-
|
|
53
|
-
"""
|
|
54
|
-
Hashing
|
|
55
|
-
"""
|
|
56
|
-
uniqueFileName48 = uniqueHash48
|
|
57
|
-
uniqueNamedFileName48_16 = namedUniqueHashExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
|
|
58
|
-
uniqueLabelledFileName48_16 = uniqueLabelExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
|
|
384
|
+
#
|
|
59
385
|
|
|
60
386
|
def _remove_trailing( path ):
|
|
61
387
|
if len(path) > 0:
|
|
@@ -63,13 +389,34 @@ def _remove_trailing( path ):
|
|
|
63
389
|
return _remove_trailing(path[:-1])
|
|
64
390
|
return path
|
|
65
391
|
|
|
392
|
+
|
|
393
|
+
# ========================================================================
|
|
394
|
+
# Basics
|
|
395
|
+
# ========================================================================
|
|
396
|
+
|
|
66
397
|
class Format(Enum):
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
398
|
+
"""
|
|
399
|
+
File formats for :class:`cdxcore.subdir.SubDir`.
|
|
400
|
+
|
|
401
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
402
|
+
| Format | Restores objects | Human readable | Speed | Compression | Extension |
|
|
403
|
+
+==============+==================+================+=======+=============+===========+
|
|
404
|
+
| PICKLE | yes | no | high | no | .pck |
|
|
405
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
406
|
+
| JSON_PLAIN | no | yes | low | no | .json |
|
|
407
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
408
|
+
| JSON_PICKLE | yes | limited | low | no | .jpck |
|
|
409
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
410
|
+
| BLOSC | yes | no | high | yes | .zbsc |
|
|
411
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
412
|
+
| GZIP | yes | no | high | yes | .pgz |
|
|
413
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
414
|
+
"""
|
|
415
|
+
PICKLE = 0 #: Standard binary :mod:`pickle` format.
|
|
416
|
+
JSON_PICKLE = 1 #: :mod:`jsonpickle` format.
|
|
417
|
+
JSON_PLAIN = 2 #: ``json`` format.
|
|
418
|
+
BLOSC = 3 #: :mod:`blosc` binary compressed format.
|
|
419
|
+
GZIP = 4 #: :mod:`gzip` binary compressed format.
|
|
73
420
|
|
|
74
421
|
PICKLE = Format.PICKLE
|
|
75
422
|
JSON_PICKLE = Format.JSON_PICKLE
|
|
@@ -77,31 +424,79 @@ JSON_PLAIN = Format.JSON_PLAIN
|
|
|
77
424
|
BLOSC = Format.BLOSC
|
|
78
425
|
GZIP = Format.GZIP
|
|
79
426
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
427
|
+
class VersionPresentError(RuntimeError):
|
|
428
|
+
"""
|
|
429
|
+
Exception raised in case a file was read which had a version, but no test version
|
|
430
|
+
was provided.
|
|
431
|
+
"""
|
|
432
|
+
pass
|
|
433
|
+
|
|
434
|
+
# ========================================================================
|
|
435
|
+
# Caching utilities
|
|
436
|
+
# ========================================================================
|
|
89
437
|
|
|
90
438
|
class CacheMode(object):
|
|
91
439
|
"""
|
|
92
|
-
|
|
93
|
-
|
|
440
|
+
A class which encodes standard behaviour of a caching strategy.
|
|
441
|
+
|
|
442
|
+
**Summary mechanics:**
|
|
443
|
+
|
|
444
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
445
|
+
| Action | on | gen | off | update | clear | readonly |
|
|
446
|
+
+=========================================+=======+=======+=======+=========+========+==========+
|
|
447
|
+
| load cache from disk if exists | x | x | | | | x |
|
|
448
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
449
|
+
| write updates to disk | x | x | | x | | |
|
|
450
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
451
|
+
| delete existing object | | | | | x | |
|
|
452
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
453
|
+
| delete existing object if incompatible | x | | | x | x | |
|
|
454
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
**Standard Caching Semantics**
|
|
458
|
+
|
|
459
|
+
Assuming we wish to cache results from calling a function ``f`` in a file named ``filename``
|
|
460
|
+
in a directory ``directory``, then this is the ``CacheMode`` waterfall:
|
|
94
461
|
|
|
95
|
-
|
|
96
|
-
load cache from disk if exists x x - - - x
|
|
97
|
-
write updates to disk x x - x - -
|
|
98
|
-
delete existing object - - - - x -
|
|
99
|
-
delete existing object if incompatible x - - x x -
|
|
462
|
+
.. code-block:: python
|
|
100
463
|
|
|
101
|
-
|
|
464
|
+
def cache_f( filename : str, directory : SubDir, version : str, cache_mode : CacheMode ):
|
|
465
|
+
if cache_mode.delete:
|
|
466
|
+
directory.delete(filename)
|
|
467
|
+
if cache_mode.read:
|
|
468
|
+
r = directory.read(filename,
|
|
469
|
+
default=None,
|
|
470
|
+
version=version,
|
|
471
|
+
raise_on_error=False,
|
|
472
|
+
delete_wrong_version=cache_mode.del_incomp
|
|
473
|
+
)
|
|
474
|
+
if not r is None:
|
|
475
|
+
return r
|
|
476
|
+
|
|
477
|
+
r = f(...) # compute result
|
|
478
|
+
|
|
479
|
+
if cache_mode.write:
|
|
480
|
+
directory.write(filename,
|
|
481
|
+
r,
|
|
482
|
+
version=version,
|
|
483
|
+
raise_on_error=False
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
return r
|
|
487
|
+
|
|
488
|
+
See :func:`cdxcore.subdir.SubDir.cache` for a comprehensive
|
|
489
|
+
implementation.
|
|
490
|
+
|
|
491
|
+
Parameters
|
|
492
|
+
----------
|
|
493
|
+
mode : str, optional
|
|
494
|
+
Which mode to use: ``"on"``, ``"gen"``, ``"off"``, ``"update"``, ``"clear"`` or ``"readonly"``.
|
|
495
|
+
|
|
496
|
+
The default is ``None`` in which case ``"on"`` is used.
|
|
102
497
|
"""
|
|
103
498
|
|
|
104
|
-
ON = "on"
|
|
499
|
+
ON = "on"
|
|
105
500
|
GEN = "gen"
|
|
106
501
|
OFF = "off"
|
|
107
502
|
UPDATE = "update"
|
|
@@ -109,22 +504,31 @@ class CacheMode(object):
|
|
|
109
504
|
READONLY = "readonly"
|
|
110
505
|
|
|
111
506
|
MODES = [ ON, GEN, OFF, UPDATE, CLEAR, READONLY ]
|
|
507
|
+
"""
|
|
508
|
+
List of available modes in text form.
|
|
509
|
+
This list can be used as ``cast`` parameter when calling :func:`cdxcore.config.Config.__call__`::
|
|
510
|
+
|
|
511
|
+
from cdxcore.config import Config
|
|
512
|
+
from cdxcore.subdir import CacheMode
|
|
513
|
+
|
|
514
|
+
def get_cache_mode( config : Config ) -> CacheMode:
|
|
515
|
+
return CacheMode( config("cache_mode", "on", CacheMode.MODES, CacheMode.HELP) )
|
|
516
|
+
"""
|
|
517
|
+
|
|
112
518
|
HELP = "'on' for standard caching; 'gen' for caching but keep existing incompatible files; 'off' to turn off; 'update' to overwrite any existing cache; 'clear' to clear existing caches; 'readonly' to read existing caches but not write new ones"
|
|
113
|
-
|
|
519
|
+
"""
|
|
520
|
+
Standard ``config`` help text, to be used with :func:`cdxcore.config.Config.__call__` as follows::
|
|
521
|
+
|
|
522
|
+
from cdxcore.config import Config
|
|
523
|
+
from cdxcore.subdir import CacheMode
|
|
524
|
+
|
|
525
|
+
def get_cache_mode( config : Config ) -> CacheMode:
|
|
526
|
+
return CacheMode( config("cache_mode", "on", CacheMode.MODES, CacheMode.HELP) )
|
|
527
|
+
"""
|
|
528
|
+
|
|
114
529
|
def __init__(self, mode : str = None ):
|
|
115
530
|
"""
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
on gen off update clear readonly
|
|
119
|
-
load upon start from disk if exists x x - - - x
|
|
120
|
-
write updates to disk x x - x - -
|
|
121
|
-
delete existing object upon start - - - - x -
|
|
122
|
-
delete existing object if incompatible x - - x x -
|
|
123
|
-
|
|
124
|
-
Parameters
|
|
125
|
-
----------
|
|
126
|
-
mode : str
|
|
127
|
-
Which mode to use.
|
|
531
|
+
:meta private:
|
|
128
532
|
"""
|
|
129
533
|
if isinstance( mode, CacheMode ):
|
|
130
534
|
return# id copy constuctor
|
|
@@ -145,22 +549,22 @@ class CacheMode(object):
|
|
|
145
549
|
|
|
146
550
|
@property
|
|
147
551
|
def read(self) -> bool:
|
|
148
|
-
""" Whether to load any existing data
|
|
552
|
+
""" Whether to load any existing cached data. """
|
|
149
553
|
return self._read
|
|
150
554
|
|
|
151
555
|
@property
|
|
152
556
|
def write(self) -> bool:
|
|
153
|
-
""" Whether to
|
|
557
|
+
""" Whether to cache newly computed data to disk. """
|
|
154
558
|
return self._write
|
|
155
559
|
|
|
156
560
|
@property
|
|
157
561
|
def delete(self) -> bool:
|
|
158
|
-
""" Whether to delete existing data """
|
|
562
|
+
""" Whether to delete existing data. """
|
|
159
563
|
return self._delete
|
|
160
564
|
|
|
161
565
|
@property
|
|
162
566
|
def del_incomp(self) -> bool:
|
|
163
|
-
""" Whether to delete existing data if it is not compatible """
|
|
567
|
+
""" Whether to delete existing data if it is not compatible or has the wrong version. """
|
|
164
568
|
return self._del_in
|
|
165
569
|
|
|
166
570
|
def __str__(self) -> str:# NOQA
|
|
@@ -175,311 +579,327 @@ class CacheMode(object):
|
|
|
175
579
|
|
|
176
580
|
@property
|
|
177
581
|
def is_off(self) -> bool:
|
|
178
|
-
""" Whether this cache mode is OFF """
|
|
582
|
+
""" Whether this cache mode is OFF. """
|
|
179
583
|
return self.mode == self.OFF
|
|
180
584
|
|
|
181
585
|
@property
|
|
182
586
|
def is_on(self) -> bool:
|
|
183
|
-
""" Whether this cache mode is ON """
|
|
587
|
+
""" Whether this cache mode is ON. """
|
|
184
588
|
return self.mode == self.ON
|
|
185
589
|
|
|
186
590
|
@property
|
|
187
591
|
def is_gen(self) -> bool:
|
|
188
|
-
""" Whether this cache mode is GEN """
|
|
592
|
+
""" Whether this cache mode is GEN. """
|
|
189
593
|
return self.mode == self.GEN
|
|
190
594
|
|
|
191
595
|
@property
|
|
192
596
|
def is_update(self) -> bool:
|
|
193
|
-
""" Whether this cache mode is UPDATE """
|
|
597
|
+
""" Whether this cache mode is UPDATE. """
|
|
194
598
|
return self.mode == self.UPDATE
|
|
195
599
|
|
|
196
600
|
@property
|
|
197
601
|
def is_clear(self) -> bool:
|
|
198
|
-
""" Whether this cache mode is CLEAR """
|
|
602
|
+
""" Whether this cache mode is CLEAR. """
|
|
199
603
|
return self.mode == self.CLEAR
|
|
200
604
|
|
|
201
605
|
@property
|
|
202
606
|
def is_readonly(self) -> bool:
|
|
203
|
-
""" Whether this cache mode is READONLY """
|
|
607
|
+
""" Whether this cache mode is READONLY. """
|
|
204
608
|
return self.mode == self.READONLY
|
|
205
609
|
|
|
206
610
|
class CacheController( object ):
|
|
207
|
-
"""
|
|
208
|
-
Central control for
|
|
209
|
-
|
|
611
|
+
r"""
|
|
612
|
+
Central control parameters for caching.
|
|
613
|
+
|
|
614
|
+
When a parameter object of this type
|
|
615
|
+
is assigned to a :class:`cdxcore.subdir.SubDir`,
|
|
616
|
+
then it is passed on when sub-directories are
|
|
617
|
+
created. This way all ``SubDir`` have the same
|
|
618
|
+
caching behaviour.
|
|
619
|
+
|
|
620
|
+
See :class:`cdxcore.subdir.CacheController` for
|
|
621
|
+
a list of control parameters.
|
|
622
|
+
|
|
623
|
+
Parameters
|
|
624
|
+
----------
|
|
625
|
+
exclude_arg_types : list[type], optional
|
|
626
|
+
List of types to exclude from producing unique ids from function arguments.
|
|
627
|
+
|
|
628
|
+
Defaults to ``[Context]``.
|
|
629
|
+
|
|
630
|
+
cache_mode : CacheMode, optional
|
|
631
|
+
Top level cache control.
|
|
632
|
+
Set to "OFF" to turn off all caching.
|
|
633
|
+
Default is "ON".
|
|
634
|
+
|
|
635
|
+
max_filename_length : int, optional
|
|
636
|
+
Maximum filename length. If unique id's exceed the file name a hash of length
|
|
637
|
+
``hash_length`` will be intergated into the file name.
|
|
638
|
+
See :class:`cdxcore.uniquehash.NamedUniqueHash`.
|
|
639
|
+
Default is ``48``.
|
|
640
|
+
|
|
641
|
+
hash_length : int, optional
|
|
642
|
+
Length of the hash used to make sure each filename is unique
|
|
643
|
+
See :class:`cdxcore.uniquehash.NamedUniqueHash`.
|
|
644
|
+
Default is ``8``.
|
|
645
|
+
|
|
646
|
+
debug_verbose : :class:`cdxcore.verbose.Context`, optional
|
|
647
|
+
If not ``None`` print caching process messages to this object.
|
|
648
|
+
|
|
649
|
+
Default is ``None``.
|
|
650
|
+
|
|
651
|
+
keep_last_arguments : bool, optional
|
|
652
|
+
Keep a dictionary of all parameters as string representations after each function call.
|
|
653
|
+
If the function ``F`` was decorated using :meth:``cdxcore.subdir.SubDir.cache``,
|
|
654
|
+
you can access this information via ``F.cache_info.last_arguments``.
|
|
655
|
+
|
|
656
|
+
Note that strings are limited to 100 characters per argument to avoid memory
|
|
657
|
+
overload when large objects are passed.
|
|
658
|
+
|
|
659
|
+
Default is ``False``.
|
|
210
660
|
"""
|
|
211
661
|
|
|
212
662
|
def __init__(self, *,
|
|
213
663
|
exclude_arg_types : list[type] = [Context],
|
|
214
664
|
cache_mode : CacheMode = CacheMode.ON,
|
|
215
665
|
max_filename_length: int = 48,
|
|
216
|
-
hash_length : int =
|
|
666
|
+
hash_length : int = 8,
|
|
217
667
|
debug_verbose : Context = None,
|
|
218
668
|
keep_last_arguments: bool = False
|
|
219
669
|
):
|
|
220
670
|
"""
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
exclude_arg_types :
|
|
226
|
-
List of types to exclude from producing unique ids from function arguments. Defaults to [SubDir, Context]
|
|
227
|
-
cache_mode :
|
|
228
|
-
Top level cache control. Set to "OFF" to turn off all caching. Default is "ON"
|
|
229
|
-
max_filename_length :
|
|
230
|
-
Maximum filename length. If unique id's exceed the file name a hash of length 'hash_length' will be intergated into the file name.
|
|
231
|
-
See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
|
|
232
|
-
hash_length :
|
|
233
|
-
Length of the hash used to make sure each filename is unique
|
|
234
|
-
See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
|
|
235
|
-
debug_verbose :
|
|
236
|
-
If non-None print caching process messages to this object.
|
|
237
|
-
keep_last_arguments :
|
|
238
|
-
keep a dictionary of all parameters as string representations after each function call.
|
|
239
|
-
If the function F was decorated using SubDir.cache(), you can access this information via
|
|
240
|
-
F.cache_info.last_arguments
|
|
241
|
-
Note that strings are limited to 100 characters per argument to avoid memory
|
|
242
|
-
overload when large objects are passed.
|
|
243
|
-
"""
|
|
244
|
-
max_filename_length = int(max_filename_length)
|
|
245
|
-
hash_length = int(hash_length)
|
|
671
|
+
:meta private:
|
|
672
|
+
"""
|
|
673
|
+
max_filename_length = int(max_filename_length)
|
|
674
|
+
hash_length = int(hash_length)
|
|
246
675
|
assert max_filename_length>0, ("'max_filename_length' must be positive")
|
|
247
676
|
assert hash_length>0 and hash_length<=max_filename_length, ("'hash_length' must be positive and at most 'max_filename_length'")
|
|
248
677
|
assert max_filename_length>=hash_length, ("'hash_length' must not exceed 'max_filename_length")
|
|
249
678
|
self.cache_mode = CacheMode(cache_mode if not cache_mode is None else CacheMode.ON)
|
|
250
|
-
self.debug_verbose = debug_verbose
|
|
679
|
+
self.debug_verbose = Context(debug_verbose) if isinstance(debug_verbose, (int,str)) else debug_verbose
|
|
251
680
|
self.exclude_arg_types = set(exclude_arg_types) if not exclude_arg_types is None else None
|
|
252
|
-
self.versioned =
|
|
253
|
-
self.
|
|
254
|
-
self.
|
|
681
|
+
self.versioned = PrettyObject() # list
|
|
682
|
+
self.labelledFileName = NamedUniqueHash(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
|
|
683
|
+
self.uniqueFileName = UniqueLabel(max_length=max_filename_length,id_length=hash_length,filename_by=None)
|
|
255
684
|
self.keep_last_arguments = keep_last_arguments
|
|
256
685
|
|
|
257
686
|
default_cacheController = CacheController()
|
|
687
|
+
#
|
|
258
688
|
|
|
689
|
+
# ========================================================================
|
|
690
|
+
# SubDir
|
|
691
|
+
# ========================================================================
|
|
259
692
|
|
|
260
|
-
class
|
|
261
|
-
"""
|
|
262
|
-
|
|
693
|
+
class SubDir(object):
|
|
694
|
+
r"""
|
|
695
|
+
``SubDir`` implements a transparent i/o
|
|
696
|
+
interface for storing data in files.
|
|
263
697
|
|
|
264
|
-
|
|
265
|
-
def __init__(self):
|
|
266
|
-
""" track cache files """
|
|
267
|
-
self._files = []
|
|
268
|
-
def __iadd__(self, new_file):
|
|
269
|
-
""" Add a new file to the tracker """
|
|
270
|
-
self._files.append( new_file )
|
|
271
|
-
def delete_cache_files(self):
|
|
272
|
-
""" Delete all tracked files """
|
|
273
|
-
for file in self._files:
|
|
274
|
-
if os.path.exists(file):
|
|
275
|
-
os.remove(file)
|
|
276
|
-
self._files = []
|
|
277
|
-
def __str__(self) -> str:#NOQA
|
|
278
|
-
return f"Tracked: {self._files}"
|
|
279
|
-
def __repr__(self) -> str:#NOQA
|
|
280
|
-
return f"Tracked: {self._files}"
|
|
281
|
-
|
|
282
|
-
class InitCacheInfo(object):
|
|
283
|
-
pass
|
|
698
|
+
**Directories**
|
|
284
699
|
|
|
285
|
-
|
|
286
|
-
|
|
700
|
+
Instantiate a ``SubDir`` with a directory name. There are some
|
|
701
|
+
pre-defined relative system paths the name can refer to::
|
|
287
702
|
|
|
288
|
-
|
|
289
|
-
#
|
|
703
|
+
from cdxcore.subdir import SubDir
|
|
704
|
+
parent = SubDir("!/subdir") # relative to system temp directory
|
|
705
|
+
parent = SubDir("~/subdir") # relative to user home directory
|
|
706
|
+
parent = SubDir("./subdir") # relative to current working directory (explicit)
|
|
707
|
+
parent = SubDir("subdir") # relative to current working directory (implicit)
|
|
708
|
+
parent = SubDir("/tmp/subdir") # absolute path (linux)
|
|
709
|
+
parent = SubDir("C:/temp/subdir") # absolute path (windows)
|
|
710
|
+
parent = SubDir("") # current working directory
|
|
711
|
+
|
|
712
|
+
Sub-directories can be generated in a number of ways::
|
|
290
713
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
The generic pattern is:
|
|
714
|
+
subDir = parent('subdir') # using __call__
|
|
715
|
+
subDir = SubDir('subdir', parent) # explicit constructor
|
|
716
|
+
subDir = SubDir('subdir', parent="!/") # explicit constructor with parent being a string
|
|
295
717
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
718
|
+
Files managed by ``SubDir`` will usually have the same extension.
|
|
719
|
+
This extension can be specified with ``ext``, or as part of the directory string::
|
|
720
|
+
|
|
721
|
+
subDir = SubDir("~/subdir", ext="bin") # set extension to 'bin'
|
|
722
|
+
subDir = SubDir("~/subdir;*.bin") # set extension to 'bin'
|
|
723
|
+
|
|
724
|
+
Leaving the extension as default ``None`` allows ``SubDir`` to automatically use
|
|
725
|
+
the extension associated with any specified format.
|
|
301
726
|
|
|
302
|
-
|
|
303
|
-
assume f() will want to store some data:
|
|
727
|
+
**Copy Constructor**
|
|
304
728
|
|
|
305
|
-
|
|
729
|
+
The constructor is shallow.
|
|
306
730
|
|
|
307
|
-
|
|
308
|
-
or
|
|
309
|
-
subDir = SubDir('subdir', parentDir)
|
|
310
|
-
:
|
|
311
|
-
:
|
|
312
|
-
Write data:
|
|
731
|
+
**File I/O**
|
|
313
732
|
|
|
314
|
-
|
|
315
|
-
subDir.item2 = item2 <-- member style
|
|
316
|
-
subDir.write('item3',item3) <-- explicit
|
|
733
|
+
Write data with :meth:`cdxcore.subdir.SubDir.write`::
|
|
317
734
|
|
|
318
|
-
|
|
735
|
+
subDir.write('item3',item3) # explicit
|
|
736
|
+
subDir['item1'] = item1 # dictionary style
|
|
319
737
|
|
|
320
|
-
|
|
738
|
+
Note that :meth:`cdxcore.subdir.SubDir.write` can write to multiple files at the same time.
|
|
321
739
|
|
|
322
|
-
|
|
740
|
+
Read data with :meth:`cdxcore.subdir.SubDir.read`::
|
|
323
741
|
|
|
324
|
-
|
|
742
|
+
item = subDir('item', 'i1') # returns 'i1' if not found.
|
|
743
|
+
item = subdir.read('item') # returns None if not found
|
|
744
|
+
item = subdir.read('item','i2') # returns 'i2' if not found
|
|
745
|
+
item = subDir['item'] # raises a KeyError if not found
|
|
325
746
|
|
|
326
|
-
|
|
327
|
-
item = subdir.read('item') <-- returns None if not found
|
|
328
|
-
item = subdir.read('item','i2') <-- returns 'i2' if not found
|
|
329
|
-
item = subDir['item'] <-- throws a KeyError if not found
|
|
330
|
-
item = subDir.item <-- throws an AttributeError if not found
|
|
747
|
+
Treat files in a directory like dictionaries::
|
|
331
748
|
|
|
332
|
-
|
|
749
|
+
for file in subDir:
|
|
750
|
+
data = subDir[file]
|
|
751
|
+
f(item, data)
|
|
333
752
|
|
|
334
|
-
|
|
753
|
+
for file, data in subDir.items():
|
|
754
|
+
f(item, data)
|
|
335
755
|
|
|
336
|
-
|
|
756
|
+
Delete items::
|
|
337
757
|
|
|
338
|
-
|
|
339
|
-
|
|
758
|
+
del subDir['item'] # silently fails if 'item' does not exist
|
|
759
|
+
subDir.delete('item') # silently fails if 'item' does not exist
|
|
760
|
+
subDir.delete('item', True) # raises a KeyError if 'item' does not exit
|
|
340
761
|
|
|
341
|
-
|
|
762
|
+
Cleaning up::
|
|
342
763
|
|
|
343
|
-
|
|
344
|
-
del subDir.item <-- silently fails if 'item' does not exist
|
|
345
|
-
subDir.delete('item') <-- silently fails if 'item' does not exist
|
|
346
|
-
subDir.delete('item', True) <-- throw a KeyError if 'item' does not exit
|
|
764
|
+
parent.delete_all_content() # silently deletes all files with matching extensions, and sub directories.
|
|
347
765
|
|
|
348
|
-
|
|
766
|
+
**File Format**
|
|
349
767
|
|
|
350
|
-
|
|
768
|
+
``SubDir`` supports a number of file formats via :class:`cdxcore.subdir.Format`.
|
|
769
|
+
Those can be controlled with the ``fmt`` keyword in various functions not least
|
|
770
|
+
:class:`cdxcore.subdir.SubDir`::
|
|
351
771
|
|
|
352
|
-
|
|
353
|
-
The most straightfoward way is to specify the format of the directory itself:
|
|
772
|
+
subdir = SubDir("!/.test", fmt=SubDir.JSON_PICKLE)
|
|
354
773
|
|
|
355
|
-
|
|
774
|
+
See :class:`cdxcore.subdir.Format` for supported formats.
|
|
775
|
+
|
|
776
|
+
Parameters
|
|
777
|
+
----------
|
|
778
|
+
name : str:
|
|
779
|
+
Name of the directory.
|
|
780
|
+
|
|
781
|
+
The name may start with any of the following special characters:
|
|
782
|
+
|
|
783
|
+
* ``'.'`` for current directory
|
|
784
|
+
* ``'~'`` for home directory
|
|
785
|
+
* ``'!'`` for system default temp directory
|
|
786
|
+
|
|
787
|
+
The directory name may also contain a formatting string for defining ``ext`` on the fly:
|
|
788
|
+
for example use ``"!/test;*.bin"`` to specify a directory ``"test"`` in the user's
|
|
789
|
+
temp directory with extension ``"bin"``.
|
|
790
|
+
|
|
791
|
+
The directory name can be set to ``None`` in which case it is always empty
|
|
792
|
+
and attempts to write to it fail with :class:`EOFError`.
|
|
793
|
+
|
|
794
|
+
parent : str | SubDir, optional
|
|
795
|
+
Parent directory.
|
|
796
|
+
|
|
797
|
+
If ``parent`` is a :class:`cdxcore.subdir.SubDir` then its parameters are used
|
|
798
|
+
as default values here.
|
|
356
799
|
|
|
357
|
-
|
|
800
|
+
Default is ``None``.
|
|
801
|
+
|
|
802
|
+
ext : str, optional
|
|
803
|
+
Extension for files managed by this ``SubDir``. All files will share the same extension.
|
|
358
804
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
using, essentially, GZIP.
|
|
805
|
+
If set to ``""`` no extension is assigned to this directory. That means, for example, that
|
|
806
|
+
:meth:`cdxcore.subdir.SubDir.files` returns all files contained in the directory, not
|
|
807
|
+
just files with a specific extension.
|
|
808
|
+
|
|
809
|
+
If ``None``, use an extension depending on ``fmt``:
|
|
810
|
+
|
|
811
|
+
* 'pck' for the default PICKLE format.
|
|
812
|
+
* 'json' for JSON_PLAIN.
|
|
813
|
+
* 'jpck' for JSON_PICKLE.
|
|
814
|
+
* 'zbsc' for BLOSC.
|
|
815
|
+
* 'pgz' for GZIP.
|
|
816
|
+
|
|
817
|
+
Default is ``None``.
|
|
818
|
+
|
|
819
|
+
fmt : :class:`cdxcore.subdir.Format`, optional
|
|
375
820
|
|
|
376
|
-
|
|
821
|
+
One of the :class:`cdxcore.subdir.Format` codes.
|
|
822
|
+
If ``ext`` is left to ``None`` then setting the a format will also set the corrsponding ``ext``.
|
|
823
|
+
|
|
824
|
+
Default is ``Format.PICKLE``.
|
|
377
825
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
826
|
+
create_directory : bool | None, optional
|
|
827
|
+
|
|
828
|
+
Whether to create the directory upon creation of the ``SubDir`` object; otherwise it will be created upon first
|
|
829
|
+
:meth:`cdxcore.subdir.SubDir.write`.
|
|
830
|
+
|
|
831
|
+
Set to ``None`` to use the setting of the parent directory, or ``False`` if no parent
|
|
832
|
+
is specified.
|
|
833
|
+
|
|
834
|
+
Default is ``False``.
|
|
384
835
|
|
|
385
|
-
|
|
836
|
+
delete_everything : bool, optional
|
|
837
|
+
|
|
838
|
+
Delete all contents in the newly defined sub directory upon creation.
|
|
386
839
|
|
|
387
|
-
|
|
840
|
+
Default is ``False``.
|
|
841
|
+
|
|
842
|
+
cache_controller : :class:`cdxcore.subdir.CacheController`, optional
|
|
843
|
+
|
|
844
|
+
An object which fine-tunes the behaviour of :meth:`cdxcore.subdir.SubDir.cache`.
|
|
845
|
+
See that function's documentation for further details. Default is ``None``.
|
|
388
846
|
"""
|
|
389
847
|
|
|
390
848
|
class __RETURN_SUB_DIRECTORY(object):
|
|
391
849
|
pass
|
|
850
|
+
""" :meta private: """
|
|
392
851
|
|
|
393
|
-
Format = Format
|
|
394
|
-
|
|
852
|
+
Format = Format # :meta private
|
|
853
|
+
""" :meta private: """
|
|
854
|
+
|
|
855
|
+
PICKLE = Format.PICKLE
|
|
856
|
+
""" :meta private: """
|
|
857
|
+
|
|
395
858
|
JSON_PICKLE = Format.JSON_PICKLE
|
|
859
|
+
""" :meta private: """
|
|
860
|
+
|
|
396
861
|
JSON_PLAIN = Format.JSON_PLAIN
|
|
862
|
+
""" :meta private: """
|
|
863
|
+
|
|
397
864
|
BLOSC = Format.BLOSC
|
|
865
|
+
""" :meta private: """
|
|
866
|
+
|
|
398
867
|
GZIP = Format.GZIP
|
|
399
|
-
|
|
400
|
-
|
|
868
|
+
""" :meta private: """
|
|
869
|
+
|
|
401
870
|
RETURN_SUB_DIRECTORY = __RETURN_SUB_DIRECTORY
|
|
871
|
+
""" :meta private: """
|
|
872
|
+
|
|
402
873
|
DEFAULT_FORMAT = Format.PICKLE
|
|
403
|
-
|
|
874
|
+
""" Default :class:`cdxcore.subdir.Format`: ``Format.PICKLE`` """
|
|
875
|
+
|
|
404
876
|
EXT_FMT_AUTO = "*"
|
|
877
|
+
""" :meta private: """
|
|
405
878
|
|
|
406
879
|
MAX_VERSION_BINARY_LEN = 128
|
|
407
|
-
|
|
880
|
+
""" :meta private: """
|
|
881
|
+
|
|
408
882
|
VER_NORMAL = 0
|
|
883
|
+
""" :meta private: """
|
|
409
884
|
VER_CHECK = 1
|
|
885
|
+
""" :meta private: """
|
|
410
886
|
VER_RETURN = 2
|
|
887
|
+
""" :meta private: """
|
|
888
|
+
|
|
411
889
|
|
|
412
890
|
def __init__(self, name : str,
|
|
413
|
-
parent = None, *,
|
|
891
|
+
parent : str|type = None, *,
|
|
414
892
|
ext : str = None,
|
|
415
893
|
fmt : Format = None,
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
894
|
+
create_directory : bool = None,
|
|
895
|
+
delete_everything : bool = False,
|
|
896
|
+
cache_controller : CacheController = None
|
|
419
897
|
):
|
|
420
898
|
"""
|
|
421
|
-
Instantiates a sub directory which contains
|
|
422
|
-
By default the directory is created.
|
|
423
|
-
|
|
424
|
-
Absolute directories
|
|
425
|
-
sd = SubDir("!/subdir") - relative to system temp directory
|
|
426
|
-
sd = SubDir("~/subdir") - relative to user home directory
|
|
427
|
-
sd = SubDir("./subdir") - relative to current working directory (explicit)
|
|
428
|
-
sd = SubDir("subdir") - relative to current working directory (implicit)
|
|
429
|
-
sd = SubDir("/tmp/subdir") - absolute path (linux)
|
|
430
|
-
sd = SubDir("C:/temp/subdir") - absolute path (windows)
|
|
431
|
-
Short-cut
|
|
432
|
-
sd = SubDir("") - current working directory
|
|
433
|
-
|
|
434
|
-
It is often desired that the user specifies a sub-directory name under some common parent directory.
|
|
435
|
-
You can create sub directories if you provide a 'parent' directory:
|
|
436
|
-
sd2 = SubDir("subdir2", parent=sd) - relative to other sub directory
|
|
437
|
-
sd2 = sd("subdir2") - using call operator
|
|
438
|
-
Works with strings, too:
|
|
439
|
-
sd2 = SubDir("subdir2", parent="~/my_config") - relative to ~/my_config
|
|
440
|
-
|
|
441
|
-
All files managed by SubDir will have the same extension.
|
|
442
|
-
The extension can be specified with 'ext', or as part of the directory string:
|
|
443
|
-
sd = SubDir("~/subdir;*.bin") - set extension to 'bin'
|
|
444
|
-
|
|
445
|
-
COPY CONSTRUCTION
|
|
446
|
-
This function also allows copy construction and constrution from a repr() string.
|
|
447
|
-
|
|
448
|
-
HANDLING KEYS
|
|
449
|
-
SubDirs allows reading data using the item and attribute notation, i.e. we may use
|
|
450
|
-
sd = SubDir("~/subdir")
|
|
451
|
-
x = sd.x
|
|
452
|
-
y = sd['y']
|
|
453
|
-
If the respective keys are not found, exceptions are thrown.
|
|
454
|
-
|
|
455
|
-
NONE OBJECTS
|
|
456
|
-
It is possible to set the directory name to 'None'. In this case the directory will behave as if:
|
|
457
|
-
No files exist
|
|
458
|
-
Writing fails with a EOFError.
|
|
899
|
+
Instantiates a sub directory which contains files with a common extension.
|
|
459
900
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
name - Name of the directory.
|
|
463
|
-
'.' for current directory
|
|
464
|
-
'~' for home directory
|
|
465
|
-
'!' for system default temp directory
|
|
466
|
-
May contain a formatting string for defining 'ext' on the fly:
|
|
467
|
-
Use "!/test;*.bin" to specify 'test' in the system temp directory as root directory with extension 'bin'
|
|
468
|
-
Can be set to None, see above.
|
|
469
|
-
parent - Parent directory. If provided, will also set defaults for 'ext' and 'raiseOnError'
|
|
470
|
-
ext - standard file extenson for data files. All files will share the same extension.
|
|
471
|
-
If None, use the parent extension, or if that is not specified use an extension depending on 'fmt':
|
|
472
|
-
'pck' for the default PICKLE format
|
|
473
|
-
'json' for JSON_PLAIN
|
|
474
|
-
'jpck' for JSON_PICKLE
|
|
475
|
-
Set to "" to turn off managing extensions.
|
|
476
|
-
fmt - format, current pickle or json
|
|
477
|
-
eraseEverything - delete all contents in the newly defined subdir
|
|
478
|
-
createDirectory - whether to create the directory.
|
|
479
|
-
Otherwise it will be created upon first write().
|
|
480
|
-
Set to None to use the setting of the parent directory
|
|
481
|
-
"""
|
|
482
|
-
createDirectory = bool(createDirectory) if not createDirectory is None else None
|
|
901
|
+
"""
|
|
902
|
+
create_directory = bool(create_directory) if not create_directory is None else None
|
|
483
903
|
|
|
484
904
|
# copy constructor support
|
|
485
905
|
if isinstance(name, SubDir):
|
|
@@ -487,9 +907,9 @@ class SubDir(object):
|
|
|
487
907
|
self._path = name._path
|
|
488
908
|
self._ext = name._ext if ext is None else ext
|
|
489
909
|
self._fmt = name._fmt if fmt is None else fmt
|
|
490
|
-
self._crt = name._crt if
|
|
491
|
-
self._cctrl = name._cctrl if
|
|
492
|
-
if
|
|
910
|
+
self._crt = name._crt if create_directory is None else create_directory
|
|
911
|
+
self._cctrl = name._cctrl if cache_controller is None else cache_controller
|
|
912
|
+
if delete_everything: raise ValueError( "Cannot use 'delete_everything' when cloning a directory")
|
|
493
913
|
return
|
|
494
914
|
|
|
495
915
|
# reconstruction from a dictionary
|
|
@@ -498,14 +918,14 @@ class SubDir(object):
|
|
|
498
918
|
self._path = name['_path']
|
|
499
919
|
self._ext = name['_ext'] if ext is None else ext
|
|
500
920
|
self._fmt = name['_fmt'] if fmt is None else fmt
|
|
501
|
-
self._crt = name['_crt'] if
|
|
502
|
-
self._cctrl = name['_cctrl'] if
|
|
503
|
-
if
|
|
921
|
+
self._crt = name['_crt'] if create_directory is None else create_directory
|
|
922
|
+
self._cctrl = name['_cctrl'] if cache_controller is None else cache_controller
|
|
923
|
+
if delete_everything: raise ValueError( "Cannot use 'delete_everything' when cloning a directory")
|
|
504
924
|
return
|
|
505
925
|
|
|
506
926
|
# parent
|
|
507
927
|
if isinstance(parent, str):
|
|
508
|
-
parent = SubDir( parent, ext=ext, fmt=fmt,
|
|
928
|
+
parent = SubDir( parent, ext=ext, fmt=fmt, create_directory=create_directory, cache_controller=cache_controller )
|
|
509
929
|
if not parent is None and not isinstance(parent, SubDir):
|
|
510
930
|
raise ValueError( "'parent' must be SubDir, str, or None. Found object of type '{type(parent)}'")
|
|
511
931
|
|
|
@@ -543,15 +963,15 @@ class SubDir(object):
|
|
|
543
963
|
else:
|
|
544
964
|
self._ext = SubDir._extract_ext(ext)
|
|
545
965
|
|
|
546
|
-
#
|
|
547
|
-
if
|
|
548
|
-
self._crt =
|
|
966
|
+
# create_directory
|
|
967
|
+
if create_directory is None:
|
|
968
|
+
self._crt = False if parent is None else parent._crt
|
|
549
969
|
else:
|
|
550
|
-
self._crt = bool(
|
|
970
|
+
self._crt = bool(create_directory)
|
|
551
971
|
|
|
552
972
|
# cache controller
|
|
553
|
-
assert type(
|
|
554
|
-
self._cctrl =
|
|
973
|
+
assert cache_controller is None or type(cache_controller).__name__ == CacheController.__name__, ("'cache_controller' should be of type 'CacheController'", type(cache_controller))
|
|
974
|
+
self._cctrl = cache_controller
|
|
555
975
|
|
|
556
976
|
# name
|
|
557
977
|
if name is None:
|
|
@@ -566,12 +986,12 @@ class SubDir(object):
|
|
|
566
986
|
if len(name) > 1 and name[1] != '/':
|
|
567
987
|
raise ValueError( txtfmt("If 'name' starts with '%s', then the second character must be '/' (or '\\' on windows). Found 'name' set to '%s'", name[:1], _name ))
|
|
568
988
|
if name[0] == '!':
|
|
569
|
-
name = SubDir.
|
|
989
|
+
name = SubDir.temp_dir()[:-1] + name[1:]
|
|
570
990
|
elif name[0] == ".":
|
|
571
|
-
name = SubDir.
|
|
991
|
+
name = SubDir.working_dir()[:-1] + name[1:]
|
|
572
992
|
else:
|
|
573
993
|
assert name[0] == "~", ("Internal error", name[0] )
|
|
574
|
-
name = SubDir.
|
|
994
|
+
name = SubDir.user_dir()[:-1] + name[1:]
|
|
575
995
|
elif name == "..":
|
|
576
996
|
error("Cannot use name '..'")
|
|
577
997
|
elif not parent is None:
|
|
@@ -587,33 +1007,37 @@ class SubDir(object):
|
|
|
587
1007
|
self._path = os.path.abspath(name) + '/'
|
|
588
1008
|
self._path = self._path.replace('\\','/')
|
|
589
1009
|
|
|
590
|
-
if
|
|
591
|
-
self.
|
|
1010
|
+
if delete_everything:
|
|
1011
|
+
self.delete_everything(keep_directory=self._crt)
|
|
592
1012
|
if self._crt:
|
|
593
|
-
self.
|
|
1013
|
+
self.create_directory()
|
|
594
1014
|
|
|
595
1015
|
@staticmethod
|
|
596
|
-
def
|
|
1016
|
+
def expand_std_root( name ):
|
|
597
1017
|
"""
|
|
598
|
-
Expands
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
1018
|
+
Expands ``name`` by a standardized root directory if provided:
|
|
1019
|
+
|
|
1020
|
+
The first character of ``name`` can be either of:
|
|
1021
|
+
|
|
1022
|
+
* ``"!"`` returns :meth:`cdxcore.subdir.SubDir.temp_dir()`.
|
|
1023
|
+
* ``"."`` returns :meth:`cdxcore.subdir.SubDir.working_dir()`.
|
|
1024
|
+
* ``"~"`` returns :meth:`cdxcore.subdir.SubDir.user_dir()`.
|
|
1025
|
+
|
|
1026
|
+
If neither of these matches the first character, ``name``
|
|
1027
|
+
is returned as is.
|
|
603
1028
|
"""
|
|
604
1029
|
if len(name) < 2 or name[0] not in ['.','!','~'] or name[1] not in ["\\","/"]:
|
|
605
1030
|
return name
|
|
606
1031
|
if name[0] == '!':
|
|
607
|
-
return SubDir.
|
|
1032
|
+
return SubDir.temp_dir() + name[2:]
|
|
608
1033
|
elif name[0] == ".":
|
|
609
|
-
return SubDir.
|
|
1034
|
+
return SubDir.working_dir() + name[2:]
|
|
610
1035
|
else:
|
|
611
|
-
return SubDir.
|
|
1036
|
+
return SubDir.user_dir() + name[2:]
|
|
612
1037
|
|
|
613
|
-
def
|
|
1038
|
+
def create_directory( self ):
|
|
614
1039
|
"""
|
|
615
|
-
Creates the directory if it doesn't exist yet.
|
|
616
|
-
Does not do anything if is_none.
|
|
1040
|
+
Creates the current directory if it doesn't exist yet.
|
|
617
1041
|
"""
|
|
618
1042
|
# create directory/clean up
|
|
619
1043
|
if self._path is None:
|
|
@@ -628,8 +1052,8 @@ class SubDir(object):
|
|
|
628
1052
|
if not os.path.isdir(self._path[:-1]):
|
|
629
1053
|
raise NotADirectoryError(txtfmt( "Cannot use sub directory %s: object exists but is not a directory", self._path[:-1] ))
|
|
630
1054
|
|
|
631
|
-
def
|
|
632
|
-
"""
|
|
1055
|
+
def path_exists(self) -> bool:
|
|
1056
|
+
""" Whether the current directory exists """
|
|
633
1057
|
return os.path.exists( self._path[:-1] ) if not self._path is None else False
|
|
634
1058
|
|
|
635
1059
|
# -- a few basic properties --
|
|
@@ -659,60 +1083,79 @@ class SubDir(object):
|
|
|
659
1083
|
|
|
660
1084
|
@property
|
|
661
1085
|
def is_none(self) -> bool:
|
|
662
|
-
""" Whether this object is
|
|
1086
|
+
""" Whether this object is ``None`` or not. For such ``SubDir`` object no files exists, and writing any file will fail. """
|
|
663
1087
|
return self._path is None
|
|
664
1088
|
|
|
665
1089
|
@property
|
|
666
1090
|
def path(self) -> str:
|
|
667
1091
|
"""
|
|
668
|
-
Return current path, including trailing '/'
|
|
669
|
-
|
|
1092
|
+
Return current path, including trailing ``'/'``.
|
|
1093
|
+
|
|
1094
|
+
Note that the path may not exist yet. If existence is required, consider using
|
|
1095
|
+
:meth:`cdxcore.subdir.SubDir.existing_path`.
|
|
670
1096
|
"""
|
|
671
1097
|
return self._path
|
|
672
1098
|
|
|
673
1099
|
@property
|
|
674
1100
|
def existing_path(self) -> str:
|
|
675
1101
|
"""
|
|
676
|
-
Return current path, including training '/'
|
|
677
|
-
|
|
1102
|
+
Return current path, including training ``'/'``.
|
|
1103
|
+
|
|
1104
|
+
``existing_path`` ensures that the directory structure exists (or raises an exception).
|
|
1105
|
+
Use :meth:`cdxcore.subdir.SubDir.path` if creation on the fly is not desired.
|
|
678
1106
|
"""
|
|
679
|
-
self.
|
|
1107
|
+
self.create_directory()
|
|
680
1108
|
return self.path
|
|
681
1109
|
|
|
682
1110
|
@property
|
|
683
1111
|
def fmt(self) -> Format:
|
|
684
|
-
""" Returns current
|
|
1112
|
+
""" Returns current :class:`cdxcore.subdir.Format`. """
|
|
685
1113
|
return self._fmt
|
|
686
1114
|
|
|
687
1115
|
@property
|
|
688
1116
|
def ext(self) -> str:
|
|
689
1117
|
"""
|
|
690
|
-
Returns the common extension of the files in this directory, including leading '.'
|
|
691
|
-
Resolves
|
|
1118
|
+
Returns the common extension of the files in this directory, including leading ``'.'``.
|
|
1119
|
+
Resolves ``"*"`` into the extension associated with the current :class:`cdxcore.subdir.Format`.
|
|
692
1120
|
"""
|
|
693
1121
|
return self._ext if self._ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
|
|
694
1122
|
|
|
695
|
-
def
|
|
1123
|
+
def auto_ext( self, ext_or_fmt : str|Format = None ) -> str:
|
|
696
1124
|
"""
|
|
697
|
-
Computes the effective extension based on inputs
|
|
698
|
-
|
|
699
|
-
|
|
1125
|
+
Computes the effective extension based on theh inputs ``ext_or_fmt``,
|
|
1126
|
+
and the current settings for ``self``.
|
|
1127
|
+
|
|
1128
|
+
If ``ext_or_fmt`` is set to ``"*"`` then the extension associated to
|
|
1129
|
+
the format of ``self`` is returned.
|
|
1130
|
+
|
|
1131
|
+
Parameters
|
|
1132
|
+
----------
|
|
1133
|
+
ext_or_fmt : str or :class:`cdxcore.subdir.Format`
|
|
1134
|
+
An extension or a format.
|
|
700
1135
|
|
|
701
|
-
Returns
|
|
1136
|
+
Returns
|
|
1137
|
+
-------
|
|
1138
|
+
ext : str
|
|
1139
|
+
The extension with leading ``'.'``.
|
|
702
1140
|
"""
|
|
703
|
-
if isinstance(
|
|
704
|
-
return self._auto_ext(
|
|
1141
|
+
if isinstance(ext_or_fmt, Format):
|
|
1142
|
+
return self._auto_ext(ext_or_fmt)
|
|
705
1143
|
else:
|
|
706
|
-
ext = self._ext if
|
|
1144
|
+
ext = self._ext if ext_or_fmt is None else SubDir._extract_ext(ext_or_fmt)
|
|
707
1145
|
return ext if ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
|
|
708
1146
|
|
|
709
|
-
def
|
|
1147
|
+
def auto_ext_fmt( self, *, ext : str = None, fmt : Format = None ) -> tuple[str]:
|
|
710
1148
|
"""
|
|
711
|
-
Computes the effective extension and format based on inputs
|
|
712
|
-
|
|
713
|
-
|
|
1149
|
+
Computes the effective extension and format based on inputs ``ext`` and ``fmt``,
|
|
1150
|
+
each of which defaults to the respective values of ``self``.
|
|
1151
|
+
|
|
1152
|
+
Resolves an ``ext`` of ``"*"`` into the extension associated with ``fmt``.
|
|
714
1153
|
|
|
715
|
-
Returns
|
|
1154
|
+
Returns
|
|
1155
|
+
-------
|
|
1156
|
+
(ext, fmt) : tuple
|
|
1157
|
+
Here ``ext`` contains the leading ``'.'`` and ``fmt`` is
|
|
1158
|
+
of type :class:`cdxcore.subdir.Format`.
|
|
716
1159
|
"""
|
|
717
1160
|
if isinstance(ext, Format):
|
|
718
1161
|
verify( fmt is None or fmt == ext, "If 'ext' is a Format, then 'fmt' must match 'ext' or be None. Found '%s' and '%s', respectively.", ext, fmt, exception=ValueError )
|
|
@@ -724,8 +1167,8 @@ class SubDir(object):
|
|
|
724
1167
|
return ext, fmt
|
|
725
1168
|
|
|
726
1169
|
@property
|
|
727
|
-
def
|
|
728
|
-
""" Returns an assigned CacheController
|
|
1170
|
+
def cache_controller(self):
|
|
1171
|
+
""" Returns an assigned :class:`cdxcore.subdir.CacheController`, or ``None`` """
|
|
729
1172
|
return self._cctrl if not self._cctrl is None else default_cacheController
|
|
730
1173
|
|
|
731
1174
|
# -- static helpers --
|
|
@@ -747,7 +1190,10 @@ class SubDir(object):
|
|
|
747
1190
|
|
|
748
1191
|
@staticmethod
|
|
749
1192
|
def _version_to_bytes( version : str ) -> bytearray:
|
|
750
|
-
"""
|
|
1193
|
+
"""
|
|
1194
|
+
Convert string version to byte string of at most size
|
|
1195
|
+
:data:`cdxcore.subdir.SubDir.MAX_VERSION_BINARY_LEN` + 1
|
|
1196
|
+
"""
|
|
751
1197
|
if version is None:
|
|
752
1198
|
return None
|
|
753
1199
|
version_ = bytearray(version,'utf-8')
|
|
@@ -790,69 +1236,67 @@ class SubDir(object):
|
|
|
790
1236
|
|
|
791
1237
|
# -- public utilities --
|
|
792
1238
|
|
|
793
|
-
def
|
|
1239
|
+
def full_file_name(self, file : str, *, ext : str = None) -> str:
|
|
794
1240
|
"""
|
|
795
1241
|
Returns fully qualified file name.
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
If 'self' is None, then this function returns None
|
|
799
|
-
If key is None then this function returns None
|
|
1242
|
+
|
|
1243
|
+
The function tests that ``file`` does not contain directory information.
|
|
800
1244
|
|
|
801
1245
|
Parameters
|
|
802
1246
|
----------
|
|
803
|
-
|
|
804
|
-
Core file name
|
|
1247
|
+
file : str
|
|
1248
|
+
Core file name without path or extension.
|
|
805
1249
|
ext : str
|
|
806
|
-
If not None
|
|
1250
|
+
If not ``None``, use this extension rather than :attr:`cdxcore.subdir.SubDir.ext`.
|
|
807
1251
|
|
|
808
1252
|
Returns
|
|
809
1253
|
-------
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
1254
|
+
Filename : str
|
|
1255
|
+
Fully qualified system file name.
|
|
1256
|
+
If ``self`` is ``None``, then this function returns ``None``; if ``file`` is ``None`` then this function also returns ``None``.
|
|
813
1257
|
"""
|
|
814
|
-
if self._path is None or
|
|
1258
|
+
if self._path is None or file is None:
|
|
815
1259
|
return None
|
|
816
|
-
|
|
817
|
-
verify( len(
|
|
1260
|
+
file = str(file)
|
|
1261
|
+
verify( len(file) > 0, "'file' cannot be empty")
|
|
818
1262
|
|
|
819
|
-
sub, _ = os.path.split(
|
|
820
|
-
verify( len(sub) == 0, "Key '%s' contains directory information",
|
|
1263
|
+
sub, _ = os.path.split(file)
|
|
1264
|
+
verify( len(sub) == 0, "Key '%s' contains directory information", file)
|
|
821
1265
|
|
|
822
|
-
verify(
|
|
823
|
-
verify(
|
|
1266
|
+
verify( file[0] != "!", "Key '%s' cannot start with '!' (this symbol indicates the temp directory)", file, exception=ValueError )
|
|
1267
|
+
verify( file[0] != "~", "Key '%s' cannot start with '~' (this symbol indicates the user's directory)", file, exception=ValueError )
|
|
824
1268
|
|
|
825
|
-
ext = self.
|
|
826
|
-
if len(ext) > 0 and
|
|
827
|
-
return self._path +
|
|
828
|
-
return self._path +
|
|
829
|
-
|
|
1269
|
+
ext = self.auto_ext( ext )
|
|
1270
|
+
if len(ext) > 0 and file[-len(ext):] != ext:
|
|
1271
|
+
return self._path + file + ext
|
|
1272
|
+
return self._path + file
|
|
1273
|
+
full_file_name = full_file_name # backwards compatibility
|
|
830
1274
|
|
|
831
1275
|
@staticmethod
|
|
832
|
-
def
|
|
1276
|
+
def temp_dir() -> str:
|
|
833
1277
|
"""
|
|
834
|
-
Return system temp directory. Short
|
|
835
|
-
Result contains trailing '/'
|
|
1278
|
+
Return system temp directory. Short-cut to :func:`tempfile.gettempdir`.
|
|
1279
|
+
Result contains trailing ``'/'``.
|
|
836
1280
|
"""
|
|
837
1281
|
d = tempfile.gettempdir()
|
|
838
1282
|
assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-1", d)
|
|
839
1283
|
return d + "/"
|
|
840
1284
|
|
|
841
1285
|
@staticmethod
|
|
842
|
-
def
|
|
1286
|
+
def working_dir() -> str:
|
|
843
1287
|
"""
|
|
844
|
-
Return current working directory. Short
|
|
845
|
-
Result contains trailing '/'
|
|
1288
|
+
Return current working directory. Short-cut for :func:`os.getcwd`.
|
|
1289
|
+
Result contains trailing ``'/'``.
|
|
846
1290
|
"""
|
|
847
1291
|
d = os.getcwd()
|
|
848
1292
|
assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-2", d)
|
|
849
1293
|
return d + "/"
|
|
850
1294
|
|
|
851
1295
|
@staticmethod
|
|
852
|
-
def
|
|
1296
|
+
def user_dir() -> str:
|
|
853
1297
|
"""
|
|
854
|
-
Return current working directory. Short
|
|
855
|
-
Result contains trailing '/'
|
|
1298
|
+
Return current working directory. Short-cut for :func:`os.path.expanduser` with parameter ``'~'``.
|
|
1299
|
+
Result contains trailing ``'/'``.
|
|
856
1300
|
"""
|
|
857
1301
|
d = os.path.expanduser('~')
|
|
858
1302
|
assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-3", d)
|
|
@@ -860,95 +1304,101 @@ class SubDir(object):
|
|
|
860
1304
|
|
|
861
1305
|
# -- read --
|
|
862
1306
|
|
|
863
|
-
def _read_reader( self, reader,
|
|
1307
|
+
def _read_reader( self, reader, file : str, default, raise_on_error : bool, *, ext : str = None ):
|
|
864
1308
|
"""
|
|
865
1309
|
Utility function for read() and readLine()
|
|
866
1310
|
|
|
867
1311
|
Parameters
|
|
868
1312
|
----------
|
|
869
|
-
reader(
|
|
1313
|
+
reader( file, full_file_name, default )
|
|
870
1314
|
A function which is called to read the file once the correct directory is identified
|
|
871
|
-
|
|
872
|
-
|
|
1315
|
+
file : file (for error messages, might include '/')
|
|
1316
|
+
full_file_name : full file name
|
|
873
1317
|
default value
|
|
874
|
-
|
|
875
|
-
str: fully qualified
|
|
1318
|
+
file : str or list
|
|
1319
|
+
str: fully qualified file
|
|
876
1320
|
list: list of fully qualified names
|
|
877
1321
|
default :
|
|
878
1322
|
default value. None is a valid default value
|
|
879
1323
|
list : list of defaults for a list of keys
|
|
880
|
-
|
|
1324
|
+
raise_on_error : bool
|
|
881
1325
|
If True, and the file does not exist, throw exception
|
|
882
1326
|
ext :
|
|
883
1327
|
Extension or None for current extension.
|
|
884
1328
|
list : list of extensions for a list of keys
|
|
885
1329
|
"""
|
|
886
1330
|
# vector version
|
|
887
|
-
if not isinstance(
|
|
888
|
-
if not isinstance(
|
|
889
|
-
l = len(
|
|
1331
|
+
if not isinstance(file,str):
|
|
1332
|
+
if not isinstance(file, Collection): raise ValueError(txtfmt( "'file' must be a string, or an interable object. Found type %s", type(file)))
|
|
1333
|
+
l = len(file)
|
|
890
1334
|
if default is None or isinstance(default,str) or not isinstance(default, Collection):
|
|
891
1335
|
default = [ default ] * l
|
|
892
1336
|
else:
|
|
893
|
-
if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as '
|
|
1337
|
+
if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(default), l ))
|
|
894
1338
|
if ext is None or isinstance(ext, str) or not isinstance(ext, Collection):
|
|
895
1339
|
ext = [ ext ] * l
|
|
896
1340
|
else:
|
|
897
|
-
if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as '
|
|
898
|
-
return [ self._read_reader(reader=reader,
|
|
1341
|
+
if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l ))
|
|
1342
|
+
return [ self._read_reader(reader=reader,file=k,default=d,raise_on_error=raise_on_error,ext=e) for k, d, e in zip(file,default,ext) ]
|
|
899
1343
|
|
|
900
1344
|
# deleted directory?
|
|
901
1345
|
if self._path is None:
|
|
902
|
-
verify( not
|
|
1346
|
+
verify( not raise_on_error, "Trying to read '%s' from an empty directory object", file, exception=NotADirectoryError)
|
|
903
1347
|
return default
|
|
904
1348
|
|
|
905
|
-
# single
|
|
906
|
-
if len(
|
|
907
|
-
sub, key_ = os.path.split(
|
|
1349
|
+
# single file
|
|
1350
|
+
if len(file) == 0: raise ValueError(txtfmt("'file' missing (the filename)" ))
|
|
1351
|
+
sub, key_ = os.path.split(file)
|
|
908
1352
|
if len(sub) > 0:
|
|
909
|
-
return self(sub)._read_reader(reader=reader,
|
|
910
|
-
if len(key_) == 0: ValueError(txtfmt("'
|
|
1353
|
+
return self(sub)._read_reader(reader=reader,file=key_,default=default,raise_on_error=raise_on_error,ext=ext)
|
|
1354
|
+
if len(key_) == 0: ValueError(txtfmt("'file' %s indicates a directory, not a file", file))
|
|
911
1355
|
|
|
912
1356
|
# don't try if directory doesn't exist
|
|
913
|
-
|
|
914
|
-
if not self.
|
|
915
|
-
if
|
|
916
|
-
raise KeyError(
|
|
1357
|
+
full_file_name = self.full_file_name(file,ext=ext)
|
|
1358
|
+
if not self.path_exists():
|
|
1359
|
+
if raise_on_error:
|
|
1360
|
+
raise KeyError(file, full_file_name)
|
|
917
1361
|
return default
|
|
918
1362
|
|
|
919
1363
|
# does file exit?
|
|
920
|
-
if not os.path.exists(
|
|
921
|
-
if
|
|
922
|
-
raise KeyError(
|
|
1364
|
+
if not os.path.exists(full_file_name):
|
|
1365
|
+
if raise_on_error:
|
|
1366
|
+
raise KeyError(file,full_file_name)
|
|
923
1367
|
return default
|
|
924
|
-
if not os.path.isfile(
|
|
925
|
-
raise IOError(txtfmt( "Cannot read %s: object exists, but is not a file (full path %s)",
|
|
1368
|
+
if not os.path.isfile(full_file_name):
|
|
1369
|
+
raise IOError(txtfmt( "Cannot read '%s': object exists, but is not a file (full path %s)", file, full_file_name ))
|
|
926
1370
|
|
|
927
1371
|
# read content
|
|
928
1372
|
# delete existing files upon read error
|
|
929
1373
|
try:
|
|
930
|
-
return reader(
|
|
1374
|
+
return reader( file, full_file_name, default )
|
|
931
1375
|
except EOFError as e:
|
|
932
1376
|
try:
|
|
933
|
-
os.remove(
|
|
934
|
-
warn("Cannot read %s; file deleted (full path %s).\nError: %s",
|
|
1377
|
+
os.remove(full_file_name)
|
|
1378
|
+
warn("Cannot read '%s'; file deleted (full path '%s').\nError: %s",file,full_file_name, str(e))
|
|
935
1379
|
except Exception as e:
|
|
936
|
-
warn("Cannot read %s; attempt to delete file failed (full path %s): %s",
|
|
1380
|
+
warn("Cannot read '%s'; subsequent attempt to delete file failed (full path '%s''): %s",file,full_file_name,str(e))
|
|
937
1381
|
except FileNotFoundError as e:
|
|
938
|
-
if
|
|
939
|
-
raise KeyError(
|
|
1382
|
+
if raise_on_error:
|
|
1383
|
+
raise KeyError(file, full_file_name, str(e)) from e
|
|
1384
|
+
except VersionError as e:
|
|
1385
|
+
if raise_on_error:
|
|
1386
|
+
raise e
|
|
1387
|
+
except VersionPresentError as e:
|
|
1388
|
+
if raise_on_error:
|
|
1389
|
+
raise e
|
|
940
1390
|
except Exception as e:
|
|
941
|
-
if
|
|
942
|
-
raise KeyError(
|
|
1391
|
+
if raise_on_error:
|
|
1392
|
+
raise KeyError(file, full_file_name, str(e)) from e
|
|
943
1393
|
except (ImportError, BaseException) as e:
|
|
944
|
-
e.add_note(
|
|
945
|
-
e.add_note(
|
|
1394
|
+
e.add_note( file )
|
|
1395
|
+
e.add_note( full_file_name )
|
|
946
1396
|
raise e
|
|
947
1397
|
return default
|
|
948
1398
|
|
|
949
|
-
def _read( self,
|
|
1399
|
+
def _read( self, file : str,
|
|
950
1400
|
default = None,
|
|
951
|
-
|
|
1401
|
+
raise_on_error : bool = False,
|
|
952
1402
|
*,
|
|
953
1403
|
version : str = None,
|
|
954
1404
|
ext : str = None,
|
|
@@ -957,18 +1407,34 @@ class SubDir(object):
|
|
|
957
1407
|
handle_version : int = 0
|
|
958
1408
|
):
|
|
959
1409
|
""" See read() """
|
|
960
|
-
ext, fmt = self.
|
|
1410
|
+
ext, fmt = self.auto_ext_fmt(ext=ext, fmt=fmt)
|
|
961
1411
|
version = str(version) if not version is None else None
|
|
962
1412
|
version = version if handle_version != SubDir.VER_RETURN else ""
|
|
963
1413
|
assert not fmt == self.EXT_FMT_AUTO, ("'fmt' is '*' ...?")
|
|
964
1414
|
|
|
965
1415
|
if version is None and fmt in [Format.BLOSC, Format.GZIP]:
|
|
966
|
-
|
|
1416
|
+
# blosc and gzip have unexpected side effects
|
|
1417
|
+
# a version is attempted to be read but is not present
|
|
1418
|
+
# (e.g. blosc causes a MemoryError)
|
|
1419
|
+
version = ""
|
|
967
1420
|
|
|
968
|
-
def reader(
|
|
1421
|
+
def reader( file, full_file_name, default ):
|
|
969
1422
|
test_version = "(unknown)"
|
|
970
|
-
|
|
971
|
-
|
|
1423
|
+
|
|
1424
|
+
def handle_pickle_error(e):
|
|
1425
|
+
err = "invalid load key, '\\x03'."
|
|
1426
|
+
if not version is None or e.args[0] != err:
|
|
1427
|
+
print("####", e.args)
|
|
1428
|
+
raise e
|
|
1429
|
+
raise VersionPresentError(
|
|
1430
|
+
f"Error reading '{full_file_name}': encountered an unpickling error '{err}' "+\
|
|
1431
|
+
f"while attempting to read file using {str(fmt)}. "+\
|
|
1432
|
+
"This is likely caused by attempting to read a file which was written with "+\
|
|
1433
|
+
"version information without providing a test version during read(). If the version is of the file "+\
|
|
1434
|
+
"is not important, use `version=\"*\"'", e) from e
|
|
1435
|
+
if fmt == Format.PICKLE:
|
|
1436
|
+
# we do not read any version information if not requested
|
|
1437
|
+
with open(full_file_name,"rb") as f:
|
|
972
1438
|
# handle version as byte string
|
|
973
1439
|
ok = True
|
|
974
1440
|
if not version is None:
|
|
@@ -981,37 +1447,55 @@ class SubDir(object):
|
|
|
981
1447
|
if ok:
|
|
982
1448
|
if handle_version == SubDir.VER_CHECK:
|
|
983
1449
|
return True
|
|
984
|
-
|
|
1450
|
+
try:
|
|
985
1451
|
data = pickle.load(f)
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1452
|
+
except pickle.UnpicklingError as e:
|
|
1453
|
+
handle_pickle_error(e)
|
|
1454
|
+
return data
|
|
1455
|
+
|
|
1456
|
+
elif fmt == Format.BLOSC:
|
|
1457
|
+
# we do not write
|
|
1458
|
+
# any version information if not requested
|
|
1459
|
+
with open(full_file_name,"rb") as f:
|
|
1460
|
+
# handle version as byte string
|
|
1461
|
+
ok = True
|
|
1462
|
+
if not version is None: # it's never None
|
|
1463
|
+
test_len = int( f.read( 1 )[0] )
|
|
1464
|
+
test_version = f.read(test_len)
|
|
1465
|
+
test_version = test_version.decode("utf-8")
|
|
1466
|
+
if handle_version == SubDir.VER_RETURN:
|
|
1467
|
+
return test_version
|
|
1468
|
+
ok = (version == "*" or test_version == version)
|
|
1469
|
+
if ok:
|
|
1470
|
+
if handle_version == SubDir.VER_CHECK:
|
|
1471
|
+
return True
|
|
1472
|
+
nnbb = f.read(2)
|
|
1473
|
+
num_blocks = int.from_bytes( nnbb, 'big', signed=False )
|
|
1474
|
+
data = bytearray()
|
|
1475
|
+
for i in range(num_blocks):
|
|
1476
|
+
blockl = int.from_bytes( f.read(6), 'big', signed=False )
|
|
1477
|
+
if blockl>0:
|
|
1478
|
+
bdata = blosc.decompress( f.read(blockl) )
|
|
1479
|
+
data += bdata
|
|
1480
|
+
del bdata
|
|
1481
|
+
try:
|
|
998
1482
|
data = pickle.loads(data)
|
|
999
|
-
|
|
1000
|
-
|
|
1483
|
+
except pickle.UnpicklingError as e:
|
|
1484
|
+
handle_pickle_error(e)
|
|
1001
1485
|
return data
|
|
1002
1486
|
|
|
1003
1487
|
elif fmt == Format.GZIP:
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
with gzip.open(fullFileName,"rb") as f:
|
|
1488
|
+
# always read version information
|
|
1489
|
+
with gzip.open(full_file_name,"rb") as f:
|
|
1007
1490
|
# handle version as byte string
|
|
1008
|
-
ok
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1491
|
+
ok = True
|
|
1492
|
+
if not version is None: # it's never None
|
|
1493
|
+
test_len = int( f.read( 1 )[0] )
|
|
1494
|
+
test_version = f.read(test_len)
|
|
1495
|
+
test_version = test_version.decode("utf-8")
|
|
1496
|
+
if handle_version == SubDir.VER_RETURN:
|
|
1497
|
+
return test_version
|
|
1498
|
+
ok = (version == "*" or test_version == version)
|
|
1015
1499
|
if ok:
|
|
1016
1500
|
if handle_version == SubDir.VER_CHECK:
|
|
1017
1501
|
return True
|
|
@@ -1019,13 +1503,16 @@ class SubDir(object):
|
|
|
1019
1503
|
return data
|
|
1020
1504
|
|
|
1021
1505
|
elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
|
|
1022
|
-
|
|
1506
|
+
# only read version information if requested
|
|
1507
|
+
with open(full_file_name,"rt",encoding="utf-8") as f:
|
|
1023
1508
|
# handle versioning
|
|
1024
1509
|
ok = True
|
|
1025
1510
|
if not version is None:
|
|
1026
1511
|
test_version = f.readline()
|
|
1027
1512
|
if test_version[:2] != "# ":
|
|
1028
|
-
raise
|
|
1513
|
+
raise VersionError("Error reading '{full_file_name}' using {fmt}: file does not appear to contain a version (it should start with '# ')",
|
|
1514
|
+
version_found="",
|
|
1515
|
+
version_expected=version)
|
|
1029
1516
|
test_version = test_version[2:]
|
|
1030
1517
|
if test_version[-1:] == "\n":
|
|
1031
1518
|
test_version = test_version[:-1]
|
|
@@ -1037,8 +1524,7 @@ class SubDir(object):
|
|
|
1037
1524
|
return ok
|
|
1038
1525
|
# read
|
|
1039
1526
|
if fmt == Format.JSON_PICKLE:
|
|
1040
|
-
|
|
1041
|
-
raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found'")
|
|
1527
|
+
jsonpickle = _import_jsonpickle()
|
|
1042
1528
|
return jsonpickle.decode( f.read() )
|
|
1043
1529
|
else:
|
|
1044
1530
|
assert fmt == Format.JSON_PLAIN, ("Internal error: unknown Format", fmt)
|
|
@@ -1048,25 +1534,33 @@ class SubDir(object):
|
|
|
1048
1534
|
|
|
1049
1535
|
# arrive here if version is wrong
|
|
1050
1536
|
# delete a wrong version
|
|
1537
|
+
|
|
1538
|
+
if version == "":
|
|
1539
|
+
raise VersionPresentError(f"Error reading '{full_file_name}' using {fmt}: the file has version '{test_version}', but was attempted to be read without "+\
|
|
1540
|
+
"a test version. If you intended to accept any version, use 'version=\"*\"' instead.")
|
|
1541
|
+
|
|
1051
1542
|
deleted = ""
|
|
1052
1543
|
if delete_wrong_version:
|
|
1053
1544
|
try:
|
|
1054
|
-
os.remove(
|
|
1545
|
+
os.remove(full_file_name)
|
|
1055
1546
|
e = None
|
|
1056
1547
|
except Exception as e_:
|
|
1057
1548
|
e = str(e_)
|
|
1058
1549
|
if handle_version == SubDir.VER_CHECK:
|
|
1059
1550
|
return False
|
|
1060
|
-
if not
|
|
1551
|
+
if not raise_on_error:
|
|
1061
1552
|
return default
|
|
1062
1553
|
deleted = " (file was deleted)" if e is None else " (attempt to delete file failed: %s)" % e
|
|
1063
|
-
raise
|
|
1554
|
+
raise VersionError( f"Error reading '{full_file_name}' using {fmt}: found version '{test_version}' not '{version}'{deleted}",
|
|
1555
|
+
version_found=test_version,
|
|
1556
|
+
version_expected=version
|
|
1557
|
+
)
|
|
1064
1558
|
|
|
1065
|
-
return self._read_reader( reader=reader,
|
|
1559
|
+
return self._read_reader( reader=reader, file=file, default=default, raise_on_error=raise_on_error, ext=ext )
|
|
1066
1560
|
|
|
1067
|
-
def read( self,
|
|
1561
|
+
def read( self, file : str,
|
|
1068
1562
|
default = None,
|
|
1069
|
-
|
|
1563
|
+
raise_on_error : bool = False,
|
|
1070
1564
|
*,
|
|
1071
1565
|
version : str = None,
|
|
1072
1566
|
delete_wrong_version : bool = True,
|
|
@@ -1074,296 +1568,323 @@ class SubDir(object):
|
|
|
1074
1568
|
fmt : Format = None
|
|
1075
1569
|
):
|
|
1076
1570
|
"""
|
|
1077
|
-
Read
|
|
1078
|
-
-- Supports 'key' containing directories
|
|
1079
|
-
-- Supports 'key' (and default, ext) being iterable.
|
|
1080
|
-
In this case any any iterable 'default' except strings are considered accordingly.
|
|
1081
|
-
In order to have a unit default which is an iterable, you will have to wrap it in another iterable, e.g.
|
|
1082
|
-
E.g.:
|
|
1083
|
-
keys = ['file1', 'file2']
|
|
1084
|
-
|
|
1085
|
-
sd.read( keys )
|
|
1086
|
-
--> works, both are using default None
|
|
1571
|
+
Read data from a file if the file exists, or return ``default``.
|
|
1087
1572
|
|
|
1088
|
-
|
|
1089
|
-
|
|
1573
|
+
* Supports ``file`` containing directory information.
|
|
1574
|
+
* Supports ``file`` (and ``default``as well as ``ext``) being iterable.
|
|
1575
|
+
Examples::
|
|
1576
|
+
|
|
1577
|
+
from cdxcore.subdir import SubDir
|
|
1578
|
+
files = ['file1', 'file2']
|
|
1579
|
+
sd = SubDir("!/test")
|
|
1090
1580
|
|
|
1091
|
-
|
|
1092
|
-
|
|
1581
|
+
sd.read( files ) # both files are using default None
|
|
1582
|
+
sd.read( files, 1 ) # both files are using default '1'
|
|
1583
|
+
sd.read( files, [1,2] ) # files use defaults 1 and 2, respectively
|
|
1093
1584
|
|
|
1094
|
-
|
|
1095
|
-
--> produces error as len(keys) != len(default)
|
|
1585
|
+
sd.read( files, [1] ) # produces error as len(keys) != len([1])
|
|
1096
1586
|
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
sd.read( keys, ['1','2'] )
|
|
1103
|
-
in case the intention was using '1' and '2', respectively.
|
|
1104
|
-
|
|
1105
|
-
Returns the read object, or a list of objects if 'key' was iterable.
|
|
1106
|
-
If the current directory is 'None', then behaviour is as if the file did not exist.
|
|
1587
|
+
Strings are iterable but are treated as single value.
|
|
1588
|
+
Therefore::
|
|
1589
|
+
|
|
1590
|
+
sd.read( files, '12' ) # the default value '12' is used for both files
|
|
1591
|
+
sd.read( files, ['1','2'] ) # use defaults '1' and '2', respectively
|
|
1107
1592
|
|
|
1108
1593
|
Parameters
|
|
1109
1594
|
----------
|
|
1110
|
-
|
|
1111
|
-
A
|
|
1595
|
+
file : str
|
|
1596
|
+
A file name or a list thereof. ``file`` may contain subdirectories.
|
|
1597
|
+
|
|
1112
1598
|
default :
|
|
1113
|
-
Default value, or default values if
|
|
1114
|
-
|
|
1599
|
+
Default value, or default values if ``file`` is a list.
|
|
1600
|
+
|
|
1601
|
+
raise_on_error : bool
|
|
1115
1602
|
Whether to raise an exception if reading an existing file failed.
|
|
1116
1603
|
By default this function fails silently and returns the default.
|
|
1604
|
+
|
|
1117
1605
|
version : str
|
|
1118
|
-
If not None
|
|
1606
|
+
If not ``None``, specifies the version of the current code base.
|
|
1607
|
+
|
|
1119
1608
|
In this case, this version will be compared to the version of the file being read.
|
|
1120
|
-
If they do not match, read fails (either by returning default or throwing
|
|
1121
|
-
|
|
1609
|
+
If they do not match, read fails (either by returning default or throwing a :class:`cdxcore.version.VersionError` exception).
|
|
1610
|
+
|
|
1611
|
+
You can specify version ``"*"`` to accept any version.
|
|
1612
|
+
Note that this is distinct
|
|
1613
|
+
to using ``None`` which stipulates that the file should not
|
|
1614
|
+
have version information.
|
|
1615
|
+
|
|
1122
1616
|
delete_wrong_version : bool
|
|
1123
|
-
If True
|
|
1617
|
+
If ``True``, and if a wrong version was found, delete the file.
|
|
1618
|
+
|
|
1124
1619
|
ext : str
|
|
1125
|
-
Extension overwrite, or a list thereof if
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1620
|
+
Extension overwrite, or a list thereof if ``file`` is a list.
|
|
1621
|
+
|
|
1622
|
+
Use:
|
|
1623
|
+
|
|
1624
|
+
* ``None`` to use directory's default.
|
|
1625
|
+
* ``'*'`` to use the extension implied by ``fmt``.
|
|
1626
|
+
* ``""`` to turn of extension management.
|
|
1627
|
+
|
|
1628
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
1629
|
+
File :class:`cdxcore.subdir.Format` or ``None`` to use the directory's default.
|
|
1630
|
+
|
|
1631
|
+
Note:
|
|
1632
|
+
|
|
1633
|
+
* ``fmt`` cannot be a list even if ``file`` is.
|
|
1634
|
+
* Unless ``ext`` or the SubDir's extension is ``'*'``, changing the format does not automatically change the extension.
|
|
1134
1635
|
|
|
1135
1636
|
Returns
|
|
1136
1637
|
-------
|
|
1137
|
-
|
|
1138
|
-
|
|
1638
|
+
Content
|
|
1639
|
+
For a single ``file`` returns the content of the file if successfully read, or ``default`` otherwise.
|
|
1640
|
+
If ``file``` is a list: list of contents.
|
|
1641
|
+
|
|
1642
|
+
Raises
|
|
1643
|
+
------
|
|
1644
|
+
:class:`cdxcore.version.VersionError`:
|
|
1645
|
+
If the file's version did not match the ``version`` provided.
|
|
1646
|
+
|
|
1139
1647
|
"""
|
|
1140
|
-
return self._read(
|
|
1648
|
+
return self._read( file=file,
|
|
1141
1649
|
default=default,
|
|
1142
|
-
|
|
1650
|
+
raise_on_error=raise_on_error,
|
|
1143
1651
|
version=version,
|
|
1144
1652
|
ext=ext,
|
|
1145
1653
|
fmt=fmt,
|
|
1146
1654
|
delete_wrong_version=delete_wrong_version,
|
|
1147
1655
|
handle_version=SubDir.VER_NORMAL )
|
|
1148
1656
|
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
def is_version( self, key : str, version : str = None, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
|
|
1657
|
+
def is_version( self, file : str, version : str = None, raise_on_error : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
|
|
1152
1658
|
"""
|
|
1153
|
-
|
|
1659
|
+
Tests the version of a file.
|
|
1154
1660
|
|
|
1155
1661
|
Parameters
|
|
1156
1662
|
----------
|
|
1157
|
-
|
|
1158
|
-
A
|
|
1663
|
+
file : str
|
|
1664
|
+
A filename, or a list thereof.
|
|
1665
|
+
|
|
1159
1666
|
version : str
|
|
1160
|
-
Specifies the version
|
|
1161
|
-
|
|
1667
|
+
Specifies the version to compare the file's version with.
|
|
1668
|
+
|
|
1669
|
+
You can use ``"*"`` to match any version.
|
|
1162
1670
|
|
|
1163
|
-
|
|
1671
|
+
raise_on_error : bool
|
|
1164
1672
|
Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
|
|
1165
1673
|
By default this function fails silently and returns the default.
|
|
1674
|
+
|
|
1166
1675
|
delete_wrong_version : bool
|
|
1167
|
-
If True, and if a wrong version was found, delete
|
|
1676
|
+
If True, and if a wrong version was found, delete ``file``.
|
|
1677
|
+
|
|
1168
1678
|
ext : str
|
|
1169
|
-
Extension overwrite, or a list thereof if
|
|
1679
|
+
Extension overwrite, or a list thereof if file is a list.
|
|
1680
|
+
|
|
1170
1681
|
Set to:
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1682
|
+
|
|
1683
|
+
* ``None`` to use directory's default.
|
|
1684
|
+
* ``"*"`` to use the extension implied by ``fmt``.
|
|
1685
|
+
* ``""`` for no extension.
|
|
1686
|
+
|
|
1687
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
1688
|
+
File format or ``None`` to use the directory's default.
|
|
1689
|
+
Note that ``fmt`` cannot be a list even if ``file`` is.
|
|
1178
1690
|
|
|
1179
1691
|
Returns
|
|
1180
1692
|
-------
|
|
1181
|
-
|
|
1693
|
+
Status : bool
|
|
1694
|
+
Returns `True` only if the file exists, has version information, and its version is equal to ``version``.
|
|
1182
1695
|
"""
|
|
1183
|
-
return self._read(
|
|
1696
|
+
return self._read( file=file,default=False,raise_on_error=raise_on_error,version=version,ext=ext,fmt=fmt,delete_wrong_version=delete_wrong_version,handle_version=SubDir.VER_CHECK )
|
|
1184
1697
|
|
|
1185
|
-
def get_version( self,
|
|
1698
|
+
def get_version( self, file : str, raise_on_error : bool = False, *, ext : str = None, fmt : Format = None ):
|
|
1186
1699
|
"""
|
|
1187
|
-
Returns
|
|
1700
|
+
Returns a version stored in a file.
|
|
1701
|
+
|
|
1188
1702
|
This requires that the file has previously been saved with a version.
|
|
1189
|
-
Otherwise this function will
|
|
1703
|
+
Otherwise this function will have unpredictable results.
|
|
1190
1704
|
|
|
1191
1705
|
Parameters
|
|
1192
1706
|
----------
|
|
1193
|
-
|
|
1194
|
-
A
|
|
1195
|
-
|
|
1707
|
+
file : str
|
|
1708
|
+
A filename, or a list thereof.
|
|
1709
|
+
|
|
1710
|
+
raise_on_error : bool
|
|
1196
1711
|
Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
|
|
1197
1712
|
By default this function fails silently and returns the default.
|
|
1713
|
+
|
|
1714
|
+
delete_wrong_version : bool
|
|
1715
|
+
If ``True``, and if a wrong version was found, delete ``file``.
|
|
1716
|
+
|
|
1198
1717
|
ext : str
|
|
1199
|
-
Extension overwrite, or a list thereof if
|
|
1718
|
+
Extension overwrite, or a list thereof if ``file`` is a list.
|
|
1719
|
+
|
|
1200
1720
|
Set to:
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1721
|
+
|
|
1722
|
+
* ``None`` to use directory's default.
|
|
1723
|
+
* ``"*"`` to use the extension implied by ``fmt``.
|
|
1724
|
+
* ``""`` for no extension.
|
|
1725
|
+
|
|
1726
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
1727
|
+
File format or ``None`` to use the directory's default.
|
|
1728
|
+
Note that ``fmt`` cannot be a list even if ``file`` is.
|
|
1208
1729
|
|
|
1209
1730
|
Returns
|
|
1210
1731
|
-------
|
|
1211
|
-
|
|
1732
|
+
version : str
|
|
1212
1733
|
"""
|
|
1213
|
-
return self._read(
|
|
1734
|
+
return self._read( file=file,default=None,raise_on_error=raise_on_error,version="",ext=ext,fmt=fmt,delete_wrong_version=False,handle_version=SubDir.VER_RETURN )
|
|
1214
1735
|
|
|
1215
|
-
def
|
|
1736
|
+
def read_string( self, file : str, default = None, raise_on_error : bool = False, *, ext : str = None ) -> str:
|
|
1216
1737
|
"""
|
|
1217
|
-
Reads text from
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
Returns the read string, or a list of strings if 'key' was iterable.
|
|
1222
|
-
If the current directory is 'None', then behaviour is as if the file did not exist.
|
|
1223
|
-
|
|
1224
|
-
Use 'ext' to specify the extension.
|
|
1225
|
-
You cannot use 'ext' to specify a format as the format is plain text.
|
|
1226
|
-
If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
|
|
1738
|
+
Reads text from a file. Removes trailing EOLs.
|
|
1739
|
+
|
|
1740
|
+
Returns the read string, or a list of strings if ``file`` was iterable.
|
|
1227
1741
|
"""
|
|
1228
1742
|
verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext)
|
|
1229
1743
|
ext = ext if not ext is None else self._ext
|
|
1230
1744
|
ext = ext if ext != self.EXT_FMT_AUTO else ".txt"
|
|
1231
1745
|
|
|
1232
|
-
def reader(
|
|
1233
|
-
with open(
|
|
1746
|
+
def reader( file, full_file_name, default ):
|
|
1747
|
+
with open(full_file_name,"rt",encoding="utf-8") as f:
|
|
1234
1748
|
line = f.readline()
|
|
1235
1749
|
if len(line) > 0 and line[-1] == '\n':
|
|
1236
1750
|
line = line[:-1]
|
|
1237
1751
|
return line
|
|
1238
|
-
return self._read_reader( reader=reader,
|
|
1752
|
+
return self._read_reader( reader=reader, file=file, default=default, raise_on_error=raise_on_error, ext=ext )
|
|
1239
1753
|
|
|
1240
1754
|
# -- write --
|
|
1241
1755
|
|
|
1242
|
-
def _write( self, writer,
|
|
1756
|
+
def _write( self, writer, file : str, obj, raise_on_error : bool, *, ext : str = None ) -> bool:
|
|
1243
1757
|
""" Utility function for write() and writeLine() """
|
|
1244
1758
|
if self._path is None:
|
|
1245
|
-
raise EOFError("Cannot write to '%s': current directory is not specified" %
|
|
1246
|
-
self.
|
|
1759
|
+
raise EOFError("Cannot write to '%s': current directory is not specified" % file)
|
|
1760
|
+
self.create_directory()
|
|
1247
1761
|
|
|
1248
1762
|
# vector version
|
|
1249
|
-
if not isinstance(
|
|
1250
|
-
if not isinstance(
|
|
1251
|
-
l = len(
|
|
1763
|
+
if not isinstance(file,str):
|
|
1764
|
+
if not isinstance(file, Collection): error( "'file' must be a string or an interable object. Found type %s", type(file), exception=ValueError)
|
|
1765
|
+
l = len(file)
|
|
1252
1766
|
if obj is None or isinstance(obj,str) or not isinstance(obj, Collection):
|
|
1253
1767
|
obj = [ obj ] * l
|
|
1254
1768
|
else:
|
|
1255
|
-
if len(obj) != l: error("'obj' must have same lengths as '
|
|
1769
|
+
if len(obj) != l: error("'obj' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(obj), l, exception=ValueError )
|
|
1256
1770
|
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1257
1771
|
ext = [ ext ] * l
|
|
1258
1772
|
else:
|
|
1259
|
-
if len(ext) != l: error("'ext' must have same lengths as '
|
|
1773
|
+
if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l, exception=ValueError )
|
|
1260
1774
|
ok = True
|
|
1261
|
-
for k,o,e in zip(
|
|
1262
|
-
ok |= self._write( writer, k, o,
|
|
1775
|
+
for k,o,e in zip(file,obj,ext):
|
|
1776
|
+
ok |= self._write( writer, k, o, raise_on_error=raise_on_error, ext=e )
|
|
1263
1777
|
return ok
|
|
1264
1778
|
|
|
1265
|
-
# single
|
|
1266
|
-
if not len(
|
|
1267
|
-
sub,
|
|
1268
|
-
if len(
|
|
1779
|
+
# single file
|
|
1780
|
+
if not len(file) > 0: error("'file is empty (the filename)" )
|
|
1781
|
+
sub, file = os.path.split(file)
|
|
1782
|
+
if len(file) == 0: error("'file '%s' refers to a directory, not a file", file)
|
|
1269
1783
|
if len(sub) > 0:
|
|
1270
|
-
return SubDir(sub,parent=self)._write(writer,
|
|
1784
|
+
return SubDir(sub,parent=self)._write(writer,file,obj, raise_on_error=raise_on_error,ext=ext )
|
|
1271
1785
|
|
|
1272
1786
|
# write to temp file, then rename into target file
|
|
1273
1787
|
# this reduces collision when i/o operations are slow
|
|
1274
|
-
|
|
1275
|
-
tmp_file =
|
|
1788
|
+
full_file_name = self.full_file_name(file,ext=ext)
|
|
1789
|
+
tmp_file = unique_hash48( [ file, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
|
|
1276
1790
|
tmp_i = 0
|
|
1277
|
-
fullTmpFile = self.
|
|
1791
|
+
fullTmpFile = self.full_file_name(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
|
|
1278
1792
|
while os.path.exists(fullTmpFile):
|
|
1279
|
-
fullTmpFile = self.
|
|
1793
|
+
fullTmpFile = self.full_file_name(tmp_file) + "." + str(tmp_i) + ".tmp"
|
|
1280
1794
|
tmp_i += 1
|
|
1281
1795
|
if tmp_i >= 10:
|
|
1282
|
-
raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % (
|
|
1796
|
+
raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % ( full_file_name, fullTmpFile ) )
|
|
1283
1797
|
|
|
1284
1798
|
# write
|
|
1285
|
-
if not writer(
|
|
1799
|
+
if not writer( file, fullTmpFile, obj ):
|
|
1286
1800
|
return False
|
|
1287
|
-
assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile,
|
|
1801
|
+
assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile, full_file_name)
|
|
1288
1802
|
try:
|
|
1289
|
-
if os.path.exists(
|
|
1290
|
-
os.remove(
|
|
1291
|
-
os.rename(fullTmpFile,
|
|
1803
|
+
if os.path.exists(full_file_name):
|
|
1804
|
+
os.remove(full_file_name)
|
|
1805
|
+
os.rename(fullTmpFile, full_file_name)
|
|
1292
1806
|
except Exception as e:
|
|
1293
1807
|
os.remove(fullTmpFile)
|
|
1294
|
-
if
|
|
1808
|
+
if raise_on_error:
|
|
1295
1809
|
raise e
|
|
1296
1810
|
return False
|
|
1297
1811
|
return True
|
|
1298
1812
|
|
|
1299
|
-
def write( self,
|
|
1813
|
+
def write( self, file : str,
|
|
1300
1814
|
obj,
|
|
1301
|
-
|
|
1815
|
+
raise_on_error : bool = True,
|
|
1302
1816
|
*,
|
|
1303
1817
|
version : str = None,
|
|
1304
1818
|
ext : str = None,
|
|
1305
1819
|
fmt : Format = None ) -> bool:
|
|
1306
1820
|
"""
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
keys = ['file1', 'file2']
|
|
1314
|
-
|
|
1315
|
-
sd.write( keys, 1 )
|
|
1316
|
-
--> works, writes '1' in both files.
|
|
1317
|
-
|
|
1318
|
-
sd.read( keys, [1,2] )
|
|
1319
|
-
--> works, writes 1 and 2, respectively
|
|
1821
|
+
Writes an object to file.
|
|
1822
|
+
|
|
1823
|
+
* Supports ``file`` containing directories.
|
|
1824
|
+
* Supports ``file`` being a list.
|
|
1825
|
+
In this case, if ``obj`` is an iterable it is considered the list of values for the elements of ``file``.
|
|
1826
|
+
If ``obj`` is not iterable, it will be written into all files from ``file``::
|
|
1320
1827
|
|
|
1321
|
-
|
|
1322
|
-
--> works, writes '12' in both files
|
|
1828
|
+
from cdxcore.subdir import SubDir
|
|
1323
1829
|
|
|
1324
|
-
|
|
1325
|
-
|
|
1830
|
+
keys = ['file1', 'file2']
|
|
1831
|
+
sd = SubDir("!/test")
|
|
1832
|
+
sd.write( keys, 1 ) # works, writes '1' in both files.
|
|
1833
|
+
sd.write( keys, [1,2] ) # works, writes 1 and 2, respectively
|
|
1834
|
+
sd.write( keys, "12" ) # works, writes '12' in both files
|
|
1835
|
+
sd.write( keys, [1] ) # produces error as len(keys) != len(obj)
|
|
1326
1836
|
|
|
1327
|
-
If the current directory is
|
|
1837
|
+
If the current directory is ``None``, then the function raises an :class:`EOFError` exception.
|
|
1328
1838
|
|
|
1329
1839
|
Parameters
|
|
1330
1840
|
----------
|
|
1331
|
-
|
|
1332
|
-
Core filename
|
|
1841
|
+
file : str
|
|
1842
|
+
Core filename, or list thereof.
|
|
1843
|
+
|
|
1333
1844
|
obj :
|
|
1334
|
-
Object to write, or list thereof if
|
|
1335
|
-
|
|
1336
|
-
|
|
1845
|
+
Object to write, or list thereof if ``file`` is a list.
|
|
1846
|
+
|
|
1847
|
+
raise_on_error : bool
|
|
1848
|
+
If ``False``, this function will return ``False`` upon failure.
|
|
1849
|
+
|
|
1337
1850
|
version : str
|
|
1338
|
-
If not None
|
|
1851
|
+
If not ``None``, specifies the version of the code which generated ``obj``.
|
|
1339
1852
|
This version will be written to the beginning of the file.
|
|
1853
|
+
|
|
1340
1854
|
ext : str
|
|
1341
|
-
Extension, or list thereof if
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
fmt : Format
|
|
1347
|
-
File format or None to use the directory's default.
|
|
1348
|
-
Note that
|
|
1349
|
-
Note that unless
|
|
1855
|
+
Extension, or list thereof if ``file`` is a list.
|
|
1856
|
+
|
|
1857
|
+
* Use ``None`` to use directory's default extension.
|
|
1858
|
+
* Use ``"*"`` to use the extension implied by ``fmt``.
|
|
1859
|
+
|
|
1860
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
1861
|
+
File format or ``None`` to use the directory's default.
|
|
1862
|
+
Note that ``fmt`` cannot be a list even if ``file`` is.
|
|
1863
|
+
Note that unless ``ext`` or the SubDir's extension is '*',
|
|
1864
|
+
changing the format does not automatically change the extension used.
|
|
1350
1865
|
|
|
1351
1866
|
Returns
|
|
1352
1867
|
-------
|
|
1353
|
-
|
|
1868
|
+
Success : bool
|
|
1869
|
+
Boolean to indicate success if ``raise_on_error`` is ``False``.
|
|
1354
1870
|
"""
|
|
1355
|
-
ext, fmt = self.
|
|
1871
|
+
ext, fmt = self.auto_ext_fmt(ext=ext, fmt=fmt)
|
|
1356
1872
|
version = str(version) if not version is None else None
|
|
1357
1873
|
assert ext != self.EXT_FMT_AUTO, ("'ext' is '*'...?")
|
|
1358
1874
|
|
|
1359
1875
|
if version=='*': error("You cannot write version '*'. Use None to write a file without version.")
|
|
1876
|
+
|
|
1360
1877
|
if version is None and fmt in [Format.BLOSC, Format.GZIP]:
|
|
1361
|
-
|
|
1878
|
+
# blosc and gzip have unexpected side effects
|
|
1879
|
+
# a version is attempted to be read but is not present
|
|
1880
|
+
# (e.g. blosc causes a MemoryError)
|
|
1881
|
+
version = ""
|
|
1362
1882
|
|
|
1363
|
-
def writer(
|
|
1883
|
+
def writer( file, full_file_name, obj ):
|
|
1364
1884
|
try:
|
|
1365
|
-
if fmt == Format.PICKLE
|
|
1366
|
-
|
|
1885
|
+
if fmt == Format.PICKLE:
|
|
1886
|
+
# only if a version is provided write it into the file
|
|
1887
|
+
with open(full_file_name,"wb") as f:
|
|
1367
1888
|
# handle version as byte string
|
|
1368
1889
|
if not version is None:
|
|
1369
1890
|
version_ = bytearray(version, "utf-8")
|
|
@@ -1372,35 +1893,41 @@ class SubDir(object):
|
|
|
1372
1893
|
len8[0] = len(version_)
|
|
1373
1894
|
f.write(len8)
|
|
1374
1895
|
f.write(version_)
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1896
|
+
pickle.dump(obj,f,-1)
|
|
1897
|
+
|
|
1898
|
+
elif fmt == Format.BLOSC:
|
|
1899
|
+
# only if a version is provided write it into the file
|
|
1900
|
+
with open(full_file_name,"wb") as f:
|
|
1901
|
+
# handle version as byte string
|
|
1902
|
+
if not version is None: # it's never None
|
|
1903
|
+
version_ = bytearray(version, "utf-8")
|
|
1904
|
+
if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
|
|
1905
|
+
len8 = bytearray(1)
|
|
1906
|
+
len8[0] = len(version_)
|
|
1907
|
+
f.write(len8)
|
|
1908
|
+
f.write(version_)
|
|
1909
|
+
pdata = pickle.dumps(obj) # returns data as a bytes object
|
|
1910
|
+
del obj
|
|
1911
|
+
len_data = len(pdata)
|
|
1912
|
+
num_blocks = max(0,len_data-1) // BLOSC_MAX_USE + 1
|
|
1913
|
+
f.write(num_blocks.to_bytes(2, 'big', signed=False))
|
|
1914
|
+
for i in range(num_blocks):
|
|
1915
|
+
start = i*BLOSC_MAX_USE
|
|
1916
|
+
end = min(len_data,start+BLOSC_MAX_USE)
|
|
1917
|
+
assert end>start, ("Internal error; nothing to write")
|
|
1918
|
+
block = blosc.compress( pdata[start:end] )
|
|
1919
|
+
blockl = len(block)
|
|
1920
|
+
f.write( blockl.to_bytes(6, 'big', signed=False) )
|
|
1921
|
+
if blockl > 0:
|
|
1922
|
+
f.write( block )
|
|
1923
|
+
del block
|
|
1924
|
+
del pdata
|
|
1397
1925
|
|
|
1398
1926
|
elif fmt == Format.GZIP:
|
|
1399
|
-
if
|
|
1400
|
-
|
|
1401
|
-
with gzip.open(fullFileName,"wb") as f:
|
|
1927
|
+
# only if a version is provided write it into the file
|
|
1928
|
+
with gzip.open(full_file_name,"wb") as f:
|
|
1402
1929
|
# handle version as byte string
|
|
1403
|
-
if not version is None:
|
|
1930
|
+
if not version is None: # it's never None
|
|
1404
1931
|
version_ = bytearray(version, "utf-8")
|
|
1405
1932
|
if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
|
|
1406
1933
|
len8 = bytearray(1)
|
|
@@ -1410,12 +1937,12 @@ class SubDir(object):
|
|
|
1410
1937
|
pickle.dump(obj,f,-1)
|
|
1411
1938
|
|
|
1412
1939
|
elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
|
|
1413
|
-
|
|
1940
|
+
# only if a version is provided write it into the file
|
|
1941
|
+
with open(full_file_name,"wt",encoding="utf-8") as f:
|
|
1414
1942
|
if not version is None:
|
|
1415
1943
|
f.write("# " + version + "\n")
|
|
1416
1944
|
if fmt == Format.JSON_PICKLE:
|
|
1417
|
-
|
|
1418
|
-
raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found")
|
|
1945
|
+
jsonpickle = _import_jsonpickle()
|
|
1419
1946
|
f.write( jsonpickle.encode(obj) )
|
|
1420
1947
|
else:
|
|
1421
1948
|
assert fmt == Format.JSON_PLAIN, ("Internal error: invalid Format", fmt)
|
|
@@ -1424,27 +1951,21 @@ class SubDir(object):
|
|
|
1424
1951
|
else:
|
|
1425
1952
|
raise NotImplementedError(fmt, txtfmt("Internal error: invalid format '%s'", fmt))
|
|
1426
1953
|
except Exception as e:
|
|
1427
|
-
if
|
|
1954
|
+
if raise_on_error:
|
|
1428
1955
|
raise e
|
|
1429
1956
|
return False
|
|
1430
1957
|
return True
|
|
1431
|
-
return self._write( writer=writer,
|
|
1432
|
-
|
|
1433
|
-
set = write
|
|
1958
|
+
return self._write( writer=writer, file=file, obj=obj, raise_on_error=raise_on_error, ext=ext )
|
|
1434
1959
|
|
|
1435
|
-
def
|
|
1960
|
+
def write_string( self, file : str, line : str, raise_on_error : bool = True, *, ext : str = None ) -> bool:
|
|
1436
1961
|
"""
|
|
1437
|
-
Writes
|
|
1438
|
-
-- Supports 'key' containing directories
|
|
1439
|
-
-- Supports 'key' being a list.
|
|
1440
|
-
In this case, line can either be the same value for all key's or a list, too.
|
|
1441
|
-
|
|
1442
|
-
If the current directory is 'None', then the function throws an EOFError exception
|
|
1443
|
-
See additional comments for write()
|
|
1962
|
+
Writes a line of text into a file.
|
|
1444
1963
|
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1964
|
+
* Supports ``file``` containing directories.
|
|
1965
|
+
* Supports ``file``` being a list.
|
|
1966
|
+
In this case, ``line`` can either be the same value for all file's or a list, too.
|
|
1967
|
+
|
|
1968
|
+
If the current directory is ``None``, then the function throws an EOFError exception
|
|
1448
1969
|
"""
|
|
1449
1970
|
verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext, exception=ValueError )
|
|
1450
1971
|
ext = ext if not ext is None else self._ext
|
|
@@ -1452,38 +1973,37 @@ class SubDir(object):
|
|
|
1452
1973
|
|
|
1453
1974
|
if len(line) == 0 or line[-1] != '\n':
|
|
1454
1975
|
line += '\n'
|
|
1455
|
-
def writer(
|
|
1976
|
+
def writer( file, full_file_name, obj ):
|
|
1456
1977
|
try:
|
|
1457
|
-
with open(
|
|
1978
|
+
with open(full_file_name,"wt",encoding="utf-8") as f:
|
|
1458
1979
|
f.write(obj)
|
|
1459
1980
|
except Exception as e:
|
|
1460
|
-
if
|
|
1981
|
+
if raise_on_error:
|
|
1461
1982
|
raise e
|
|
1462
1983
|
return False
|
|
1463
1984
|
return True
|
|
1464
|
-
return self._write( writer=writer,
|
|
1985
|
+
return self._write( writer=writer, file=file, obj=line, raise_on_error=raise_on_error, ext=ext )
|
|
1465
1986
|
|
|
1466
1987
|
# -- iterate --
|
|
1467
1988
|
|
|
1468
1989
|
def files(self, *, ext : str = None) -> list:
|
|
1469
1990
|
"""
|
|
1470
|
-
Returns a list of
|
|
1991
|
+
Returns a list of files in this subdirectory with the current extension, or the specified extension.
|
|
1471
1992
|
|
|
1472
1993
|
In other words, if the extension is ".pck", and the files are "file1.pck", "file2.pck", "file3.bin"
|
|
1473
1994
|
then this function will return [ "file1", "file2" ]
|
|
1474
1995
|
|
|
1475
|
-
If
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
This function ignores directories. Use subDirs() to retrieve those.
|
|
1996
|
+
If ``ext`` is:
|
|
1997
|
+
|
|
1998
|
+
* ``None``, then the directory's default extension will be used.
|
|
1999
|
+
* ``""`` then this function will return all files in this directory.
|
|
2000
|
+
* ``"*"`` then the extension corresponding to the current format will be used.
|
|
1481
2001
|
|
|
1482
|
-
|
|
2002
|
+
This function ignores directories. Use :meth:`cdxcore.subdir.SubDir.sub_dirs` to retrieve those.
|
|
1483
2003
|
"""
|
|
1484
|
-
if not self.
|
|
2004
|
+
if not self.path_exists():
|
|
1485
2005
|
return []
|
|
1486
|
-
ext = self.
|
|
2006
|
+
ext = self.auto_ext( ext )
|
|
1487
2007
|
ext_l = len(ext)
|
|
1488
2008
|
keys = []
|
|
1489
2009
|
with os.scandir(self._path) as it:
|
|
@@ -1497,15 +2017,15 @@ class SubDir(object):
|
|
|
1497
2017
|
else:
|
|
1498
2018
|
keys.append( entry.name )
|
|
1499
2019
|
return keys
|
|
1500
|
-
keys = files
|
|
1501
2020
|
|
|
1502
|
-
def
|
|
2021
|
+
def sub_dirs(self) -> list:
|
|
1503
2022
|
"""
|
|
1504
|
-
|
|
1505
|
-
|
|
2023
|
+
Retrieve a list of all sub directories.
|
|
2024
|
+
|
|
2025
|
+
If ``self`` does not refer to an existing directory, then this function returns an empty list.
|
|
1506
2026
|
"""
|
|
1507
2027
|
# do not do anything if the object was deleted
|
|
1508
|
-
if not self.
|
|
2028
|
+
if not self.path_exists():
|
|
1509
2029
|
return []
|
|
1510
2030
|
subdirs = []
|
|
1511
2031
|
with os.scandir(self._path[:-1]) as it:
|
|
@@ -1517,322 +2037,345 @@ class SubDir(object):
|
|
|
1517
2037
|
|
|
1518
2038
|
# -- delete --
|
|
1519
2039
|
|
|
1520
|
-
def delete( self,
|
|
2040
|
+
def delete( self, file : str, raise_on_error: bool = False, *, ext : str = None ):
|
|
1521
2041
|
"""
|
|
1522
|
-
Deletes
|
|
2042
|
+
Deletes ``file``.
|
|
2043
|
+
|
|
2044
|
+
This function will quietly fail if ``file`` does not exist unless ``raise_on_error``
|
|
2045
|
+
is set to ``True``.
|
|
1523
2046
|
|
|
1524
2047
|
Parameters
|
|
1525
2048
|
----------
|
|
1526
|
-
|
|
2049
|
+
file :
|
|
1527
2050
|
filename, or list of filenames
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
2051
|
+
|
|
2052
|
+
raise_on_error : bool
|
|
2053
|
+
If ``False``, do not throw :class:`KeyError` if file does not exist
|
|
2054
|
+
or another error occurs.
|
|
2055
|
+
|
|
2056
|
+
ext : str
|
|
2057
|
+
Extension, or list thereof if ``file`` is a list.
|
|
2058
|
+
|
|
1532
2059
|
Use
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
2060
|
+
|
|
2061
|
+
* ``None`` for the directory default.
|
|
2062
|
+
* ``""`` to not use an automatic extension.
|
|
2063
|
+
* ``"*"`` to use the extension associated with the format of the directory.
|
|
1536
2064
|
"""
|
|
1537
2065
|
# do not do anything if the object was deleted
|
|
1538
2066
|
if self._path is None:
|
|
1539
|
-
if
|
|
2067
|
+
if raise_on_error: raise EOFError("Cannot delete '%s': current directory not specified" % file)
|
|
1540
2068
|
return
|
|
1541
2069
|
|
|
1542
2070
|
# vector version
|
|
1543
|
-
if not isinstance(
|
|
1544
|
-
if not isinstance(
|
|
1545
|
-
l = len(
|
|
2071
|
+
if not isinstance(file,str):
|
|
2072
|
+
if not isinstance(file, Collection): error( "'file' must be a string or an interable object. Found type %s", type(file))
|
|
2073
|
+
l = len(file)
|
|
1546
2074
|
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1547
2075
|
ext = [ ext ] * l
|
|
1548
2076
|
else:
|
|
1549
|
-
if len(ext) != l: error("'ext' must have same lengths as '
|
|
1550
|
-
for k, e in zip(
|
|
1551
|
-
self.delete(k,
|
|
2077
|
+
if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
2078
|
+
for k, e in zip(file,ext):
|
|
2079
|
+
self.delete(k, raise_on_error=raise_on_error, ext=e)
|
|
1552
2080
|
return
|
|
1553
2081
|
|
|
1554
|
-
# handle directories in '
|
|
1555
|
-
if len(
|
|
1556
|
-
sub, key_ = os.path.split(
|
|
1557
|
-
if len(key_) == 0: error("'
|
|
1558
|
-
if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,
|
|
2082
|
+
# handle directories in 'file'
|
|
2083
|
+
if len(file) == 0: error( "'file' is empty" )
|
|
2084
|
+
sub, key_ = os.path.split(file)
|
|
2085
|
+
if len(key_) == 0: error("'file' %s indicates a directory, not a file", file)
|
|
2086
|
+
if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,raise_on_error=raise_on_error,ext=ext)
|
|
1559
2087
|
# don't try if directory doesn't existy
|
|
1560
|
-
if not self.
|
|
1561
|
-
if
|
|
1562
|
-
raise KeyError(
|
|
2088
|
+
if not self.path_exists():
|
|
2089
|
+
if raise_on_error:
|
|
2090
|
+
raise KeyError(file)
|
|
1563
2091
|
return
|
|
1564
|
-
|
|
1565
|
-
if not os.path.exists(
|
|
1566
|
-
if
|
|
1567
|
-
raise KeyError(
|
|
2092
|
+
full_file_name = self.full_file_name(file, ext=ext)
|
|
2093
|
+
if not os.path.exists(full_file_name):
|
|
2094
|
+
if raise_on_error:
|
|
2095
|
+
raise KeyError(file)
|
|
1568
2096
|
else:
|
|
1569
|
-
os.remove(
|
|
2097
|
+
os.remove(full_file_name)
|
|
1570
2098
|
|
|
1571
|
-
def
|
|
2099
|
+
def delete_all_files( self, raise_on_error : bool = False, *, ext : str = None ):
|
|
1572
2100
|
"""
|
|
1573
2101
|
Deletes all valid keys in this sub directory with the correct extension.
|
|
1574
2102
|
|
|
1575
2103
|
Parameters
|
|
1576
2104
|
----------
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
-- A Format to specify the default extension for that format.
|
|
2105
|
+
raise_on_error : bool
|
|
2106
|
+
Set to ``False`` to quietly ignore errors.
|
|
2107
|
+
|
|
2108
|
+
ext : str
|
|
2109
|
+
Extension to be used:
|
|
2110
|
+
|
|
2111
|
+
* ``None`` for the directory default.
|
|
2112
|
+
* ``""`` to not use an automatic extension.
|
|
2113
|
+
* ``"*"`` to use the extension associated with the format of the directory.
|
|
1587
2114
|
"""
|
|
1588
2115
|
if self._path is None:
|
|
1589
|
-
if
|
|
2116
|
+
if raise_on_error: raise EOFError("Cannot delete all files: current directory not specified")
|
|
1590
2117
|
return
|
|
1591
|
-
if not self.
|
|
2118
|
+
if not self.path_exists():
|
|
1592
2119
|
return
|
|
1593
|
-
self.delete( self.
|
|
2120
|
+
self.delete( self.files(ext=ext), raise_on_error=raise_on_error, ext=ext )
|
|
1594
2121
|
|
|
1595
|
-
def
|
|
2122
|
+
def delete_all_content( self, delete_self : bool = False, raise_on_error : bool = False, *, ext : str = None ):
|
|
1596
2123
|
"""
|
|
1597
2124
|
Deletes all valid keys and subdirectories in this sub directory.
|
|
2125
|
+
|
|
1598
2126
|
Does not delete files with other extensions.
|
|
1599
|
-
Use
|
|
2127
|
+
Use :meth:`cdxcore.subdir.SubDir.delete_everything` if the aim is to delete, well, everything.
|
|
1600
2128
|
|
|
1601
2129
|
Parameters
|
|
1602
2130
|
----------
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
False for silent failure
|
|
1607
|
-
ext:
|
|
1608
|
-
Extension for keys, or None for the directory's default.
|
|
1609
|
-
|
|
1610
|
-
Use "" to match all files regardless of extension.
|
|
2131
|
+
delete_self: bool
|
|
2132
|
+
Whether to delete the directory itself as well, or only its contents.
|
|
2133
|
+
raise_on_error: bool
|
|
2134
|
+
``False`` for silent failure
|
|
2135
|
+
ext: str
|
|
2136
|
+
Extension for keys, or ``None`` for the directory's default.
|
|
2137
|
+
Use ``""`` to match all files regardless of extension.
|
|
1611
2138
|
"""
|
|
1612
2139
|
# do not do anything if the object was deleted
|
|
1613
2140
|
if self._path is None:
|
|
1614
|
-
if
|
|
2141
|
+
if raise_on_error: raise EOFError("Cannot delete all contents: current directory not specified")
|
|
1615
2142
|
return
|
|
1616
|
-
if not self.
|
|
2143
|
+
if not self.path_exists():
|
|
1617
2144
|
return
|
|
1618
2145
|
# delete sub directories
|
|
1619
|
-
subdirs = self.
|
|
2146
|
+
subdirs = self.sub_dirs();
|
|
1620
2147
|
for subdir in subdirs:
|
|
1621
|
-
SubDir(subdir, parent=self).
|
|
2148
|
+
SubDir(subdir, parent=self).delete_all_content( delete_self=True, raise_on_error=raise_on_error, ext=ext )
|
|
1622
2149
|
# delete keys
|
|
1623
|
-
self.
|
|
2150
|
+
self.delete_all_files( raise_on_error=raise_on_error,ext=ext )
|
|
1624
2151
|
# delete myself
|
|
1625
|
-
if not
|
|
2152
|
+
if not delete_self:
|
|
1626
2153
|
return
|
|
1627
2154
|
rest = list( os.scandir(self._path[:-1]) )
|
|
1628
2155
|
txt = str(rest)
|
|
1629
2156
|
txt = txt if len(txt) < 50 else (txt[:47] + '...')
|
|
1630
2157
|
if len(rest) > 0:
|
|
1631
|
-
if
|
|
2158
|
+
if raise_on_error: error( "Cannot delete my own directory %s: directory not empty: found %ld object(s): %s", self._path,len(rest), txt)
|
|
1632
2159
|
return
|
|
1633
2160
|
os.rmdir(self._path[:-1]) ## does not work ????
|
|
1634
2161
|
self._path = None
|
|
1635
2162
|
|
|
1636
|
-
def
|
|
2163
|
+
def delete_everything( self, keep_directory : bool = True ):
|
|
1637
2164
|
"""
|
|
1638
|
-
Deletes the entire sub directory will all contents
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
If
|
|
1642
|
-
|
|
1643
|
-
If self is None, do nothing. That means you can call this function several times.
|
|
2165
|
+
Deletes the entire sub directory will all contents.
|
|
2166
|
+
|
|
2167
|
+
*WARNING:* deletes *all* files and sub-directories, not just those with the present extension.
|
|
2168
|
+
If ``keep_directory`` is ``False``, the directory referred to by this object will also be deleted.
|
|
2169
|
+
In this case, ``self`` will be set to ``None``.
|
|
1644
2170
|
"""
|
|
1645
2171
|
if self._path is None:
|
|
1646
2172
|
return
|
|
1647
|
-
if not self.
|
|
2173
|
+
if not self.path_exists():
|
|
1648
2174
|
return
|
|
1649
2175
|
shutil.rmtree(self._path[:-1], ignore_errors=True)
|
|
1650
|
-
if not
|
|
2176
|
+
if not keep_directory and os.path.exists(self._path[:-1]):
|
|
1651
2177
|
os.rmdir(self._path[:-1])
|
|
1652
2178
|
self._path = None
|
|
1653
|
-
elif
|
|
2179
|
+
elif keep_directory and not os.path.exists(self._path[:-1]):
|
|
1654
2180
|
os.makedirs(self._path[:-1])
|
|
1655
2181
|
|
|
1656
2182
|
# -- file ops --
|
|
1657
2183
|
|
|
1658
|
-
def exists(self,
|
|
2184
|
+
def exists(self, file : str, *, ext : str = None ) -> bool:
|
|
1659
2185
|
"""
|
|
1660
|
-
Checks whether
|
|
2186
|
+
Checks whether a file exists.
|
|
1661
2187
|
|
|
1662
2188
|
Parameters
|
|
1663
2189
|
----------
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
2190
|
+
file :
|
|
2191
|
+
Filename, or list of filenames.
|
|
2192
|
+
|
|
2193
|
+
ext : str
|
|
2194
|
+
Extension, or list thereof if ``file`` is a list.
|
|
2195
|
+
|
|
1668
2196
|
Use
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
2197
|
+
|
|
2198
|
+
* ``None`` for the directory default.
|
|
2199
|
+
* ``""`` to not use an automatic extension.
|
|
2200
|
+
* ``"*"`` to use the extension associated with the format of the directory.
|
|
1672
2201
|
|
|
1673
2202
|
Returns
|
|
1674
2203
|
-------
|
|
1675
|
-
|
|
2204
|
+
Status : bool
|
|
2205
|
+
If ``file`` is a string, returns ``True`` or ``False``, else it will return a list of ``bool`` values.
|
|
1676
2206
|
"""
|
|
1677
2207
|
# vector version
|
|
1678
|
-
if not isinstance(
|
|
1679
|
-
verify( isinstance(
|
|
1680
|
-
l = len(
|
|
2208
|
+
if not isinstance(file,str):
|
|
2209
|
+
verify( isinstance(file, Collection), "'file' must be a string or an interable object. Found type %s", type(file))
|
|
2210
|
+
l = len(file)
|
|
1681
2211
|
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1682
2212
|
ext = [ ext ] * l
|
|
1683
2213
|
else:
|
|
1684
|
-
if len(ext) != l: error("'ext' must have same lengths as '
|
|
1685
|
-
return [ self.exists(k,ext=e) for k,e in zip(
|
|
2214
|
+
if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
2215
|
+
return [ self.exists(k,ext=e) for k,e in zip(file,ext) ]
|
|
1686
2216
|
# empty directory
|
|
1687
2217
|
if self._path is None:
|
|
1688
2218
|
return False
|
|
1689
|
-
# handle directories in '
|
|
1690
|
-
if len(
|
|
1691
|
-
sub, key_ = os.path.split(
|
|
1692
|
-
if len(key_) == 0: raise IsADirectoryError(
|
|
2219
|
+
# handle directories in 'file'
|
|
2220
|
+
if len(file) == 0: raise ValueError("'file' missing (the filename)")
|
|
2221
|
+
sub, key_ = os.path.split(file)
|
|
2222
|
+
if len(key_) == 0: raise IsADirectoryError( file, txtfmt("'file' %s indicates a directory, not a file", file) )
|
|
1693
2223
|
if len(sub) > 0:
|
|
1694
|
-
return self(sub).exists(
|
|
2224
|
+
return self(sub).exists(file=key_,ext=ext)
|
|
1695
2225
|
# if directory doesn't exit
|
|
1696
|
-
if not self.
|
|
2226
|
+
if not self.path_exists():
|
|
1697
2227
|
return False
|
|
1698
|
-
# single
|
|
1699
|
-
|
|
1700
|
-
if not os.path.exists(
|
|
2228
|
+
# single file
|
|
2229
|
+
full_file_name = self.full_file_name(file, ext=ext)
|
|
2230
|
+
if not os.path.exists(full_file_name):
|
|
1701
2231
|
return False
|
|
1702
|
-
if not os.path.isfile(
|
|
1703
|
-
raise IsADirectoryError("Structural error:
|
|
2232
|
+
if not os.path.isfile(full_file_name):
|
|
2233
|
+
raise IsADirectoryError("Structural error: file %s: exists, but is not a file (full path %s)",file,full_file_name)
|
|
1704
2234
|
return True
|
|
1705
2235
|
|
|
1706
|
-
def _getFileProperty( self, *,
|
|
2236
|
+
def _getFileProperty( self, *, file : str, ext : str, func ):
|
|
1707
2237
|
# vector version
|
|
1708
|
-
if not isinstance(
|
|
1709
|
-
verify( isinstance(
|
|
1710
|
-
l = len(
|
|
2238
|
+
if not isinstance(file,str):
|
|
2239
|
+
verify( isinstance(file, Collection), "'file' must be a string or an interable object. Found type %s", type(file))
|
|
2240
|
+
l = len(file)
|
|
1711
2241
|
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1712
2242
|
ext = [ ext ] * l
|
|
1713
2243
|
else:
|
|
1714
|
-
if len(ext) != l: error("'ext' must have same lengths as '
|
|
1715
|
-
return [ self._getFileProperty(
|
|
2244
|
+
if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
2245
|
+
return [ self._getFileProperty(file=k,ext=e,func=func) for k,e in zip(file,ext) ]
|
|
1716
2246
|
# empty directory
|
|
1717
2247
|
if self._path is None:
|
|
1718
2248
|
return None
|
|
1719
|
-
# handle directories in '
|
|
1720
|
-
if len(
|
|
1721
|
-
sub, key_ = os.path.split(
|
|
1722
|
-
if len(key_) == 0: raise IsADirectoryError(
|
|
1723
|
-
if len(sub) > 0: return self(sub)._getFileProperty(
|
|
2249
|
+
# handle directories in 'file'
|
|
2250
|
+
if len(file) == 0: raise ValueError("'file' missing (the filename)")
|
|
2251
|
+
sub, key_ = os.path.split(file)
|
|
2252
|
+
if len(key_) == 0: raise IsADirectoryError( file, txtfmt("'file' %s indicates a directory, not a file", file) )
|
|
2253
|
+
if len(sub) > 0: return self(sub)._getFileProperty(file=key_,ext=ext,func=func)
|
|
1724
2254
|
# if directory doesn't exit
|
|
1725
|
-
if not self.
|
|
2255
|
+
if not self.path_exists():
|
|
1726
2256
|
return None
|
|
1727
|
-
# single
|
|
1728
|
-
|
|
1729
|
-
if not os.path.exists(
|
|
2257
|
+
# single file
|
|
2258
|
+
full_file_name = self.full_file_name(file, ext=ext)
|
|
2259
|
+
if not os.path.exists(full_file_name):
|
|
1730
2260
|
return None
|
|
1731
|
-
return func(
|
|
2261
|
+
return func(full_file_name)
|
|
1732
2262
|
|
|
1733
|
-
def
|
|
2263
|
+
def get_creation_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
|
|
1734
2264
|
"""
|
|
1735
|
-
Returns the creation time of
|
|
1736
|
-
|
|
2265
|
+
Returns the creation time of a file.
|
|
2266
|
+
|
|
2267
|
+
See comments on :func:`os.path.getctime` for system compatibility information.
|
|
1737
2268
|
|
|
1738
2269
|
Parameters
|
|
1739
2270
|
----------
|
|
1740
|
-
|
|
1741
|
-
filename, or list of filenames
|
|
2271
|
+
file :
|
|
2272
|
+
filename, or list of filenames.
|
|
1742
2273
|
ext :
|
|
1743
|
-
Extension, or list thereof if
|
|
1744
|
-
Use
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
2274
|
+
Extension, or list thereof if ``file`` is an extension.
|
|
2275
|
+
Use:
|
|
2276
|
+
|
|
2277
|
+
* ``None`` for the directory default.
|
|
2278
|
+
* ``""`` for no automatic extension.
|
|
2279
|
+
* A :class:`cdxcore.subdir.Format` to use the default extension for that format.
|
|
1748
2280
|
|
|
1749
2281
|
Returns
|
|
1750
2282
|
-------
|
|
1751
|
-
datetime.datetime
|
|
2283
|
+
Datetime : :class:`datetime.datetime`
|
|
2284
|
+
A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
|
|
2285
|
+
Returns ``None`` if an error occured.
|
|
1752
2286
|
"""
|
|
1753
|
-
return self._getFileProperty(
|
|
2287
|
+
return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getctime(x)) )
|
|
1754
2288
|
|
|
1755
|
-
def
|
|
2289
|
+
def get_last_modification_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
|
|
1756
2290
|
"""
|
|
1757
|
-
Returns the last modification time
|
|
1758
|
-
|
|
2291
|
+
Returns the last modification time a file.
|
|
2292
|
+
|
|
2293
|
+
See comments on :func:`os.path.getmtime` for system compatibility information.
|
|
1759
2294
|
|
|
1760
2295
|
Parameters
|
|
1761
2296
|
----------
|
|
1762
|
-
|
|
1763
|
-
filename, or list of filenames
|
|
2297
|
+
file :
|
|
2298
|
+
filename, or list of filenames.
|
|
1764
2299
|
ext :
|
|
1765
|
-
Extension, or list thereof if
|
|
1766
|
-
Use
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
2300
|
+
Extension, or list thereof if ``file`` is an extension.
|
|
2301
|
+
Use:
|
|
2302
|
+
|
|
2303
|
+
* ``None`` for the directory default.
|
|
2304
|
+
* ``""`` for no automatic extension.
|
|
2305
|
+
* A :class:`cdxcore.subdir.Format` to use the default extension for that format.
|
|
1770
2306
|
|
|
1771
2307
|
Returns
|
|
1772
2308
|
-------
|
|
1773
|
-
datetime.datetime
|
|
2309
|
+
Datetime : :class:`datetime.datetime`
|
|
2310
|
+
A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
|
|
2311
|
+
Returns ``None`` if an error occured.
|
|
1774
2312
|
"""
|
|
1775
|
-
return self._getFileProperty(
|
|
2313
|
+
return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getmtime(x)) )
|
|
1776
2314
|
|
|
1777
|
-
def
|
|
2315
|
+
def get_last_access_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
|
|
1778
2316
|
"""
|
|
1779
|
-
Returns the last access time of
|
|
1780
|
-
|
|
2317
|
+
Returns the last access time of a file.
|
|
2318
|
+
|
|
2319
|
+
See comments on :func:`os.path.getatime` for system compatibility information.
|
|
1781
2320
|
|
|
1782
2321
|
Parameters
|
|
1783
2322
|
----------
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
2323
|
+
file : str
|
|
2324
|
+
Filename, or list of filenames.
|
|
2325
|
+
|
|
2326
|
+
ext : str
|
|
2327
|
+
Extension, or list thereof if ``file`` is an extension.
|
|
2328
|
+
|
|
2329
|
+
* Use ``None`` for the directory default.
|
|
2330
|
+
* Use ``""`` for no automatic extension.
|
|
1792
2331
|
|
|
1793
2332
|
Returns
|
|
1794
2333
|
-------
|
|
1795
|
-
datetime.datetime
|
|
2334
|
+
Datetime : :class:`datetime.datetime`
|
|
2335
|
+
A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
|
|
2336
|
+
Returns ``None`` if an error occured.
|
|
1796
2337
|
"""
|
|
1797
|
-
return self._getFileProperty(
|
|
2338
|
+
return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getatime(x)) )
|
|
1798
2339
|
|
|
1799
|
-
def
|
|
2340
|
+
def file_size( self, file : str, *, ext : str = None ) -> int:
|
|
1800
2341
|
"""
|
|
1801
|
-
Returns the file size of
|
|
1802
|
-
|
|
2342
|
+
Returns the file size of a file.
|
|
2343
|
+
|
|
2344
|
+
See comments on :func:`os.path.getatime` for system compatibility information.
|
|
1803
2345
|
|
|
1804
2346
|
Parameters
|
|
1805
2347
|
----------
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
2348
|
+
file : str
|
|
2349
|
+
Filename, or list of filenames.
|
|
2350
|
+
|
|
2351
|
+
ext : str
|
|
2352
|
+
Extension, or list thereof if ``file`` is an extension.
|
|
2353
|
+
|
|
2354
|
+
* Use ``None`` for the directory default.
|
|
2355
|
+
* Use ``""`` for no automatic extension.
|
|
1814
2356
|
|
|
1815
2357
|
Returns
|
|
1816
2358
|
-------
|
|
1817
|
-
File size if
|
|
2359
|
+
File size if ``file``, or ``None`` if an error occured.
|
|
1818
2360
|
"""
|
|
1819
|
-
return self._getFileProperty(
|
|
2361
|
+
return self._getFileProperty( file=file, ext=ext, func=lambda x : os.path.getsize(x) )
|
|
1820
2362
|
|
|
1821
2363
|
def rename( self, source : str, target : str, *, ext : str = None ):
|
|
1822
2364
|
"""
|
|
1823
|
-
Rename
|
|
1824
|
-
|
|
2365
|
+
Rename a file.
|
|
2366
|
+
|
|
2367
|
+
This function will raise an exception if not successful.
|
|
1825
2368
|
|
|
1826
2369
|
Parameters
|
|
1827
2370
|
----------
|
|
1828
|
-
source, target:
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
2371
|
+
source, target : str
|
|
2372
|
+
Filenames.
|
|
2373
|
+
|
|
2374
|
+
ext : str
|
|
2375
|
+
Extension.
|
|
2376
|
+
|
|
2377
|
+
* Use ``None`` for the directory default.
|
|
2378
|
+
* Use ``""`` for no automatic extension.
|
|
1836
2379
|
"""
|
|
1837
2380
|
# empty directory
|
|
1838
2381
|
if self._path is None:
|
|
@@ -1843,9 +2386,9 @@ class SubDir(object):
|
|
|
1843
2386
|
sub, source_ = os.path.split(source)
|
|
1844
2387
|
if len(source_) == 0: raise IsADirectoryError( source, txtfmt("'source' %s indicates a directory, not a file", source ))
|
|
1845
2388
|
if len(sub) > 0:
|
|
1846
|
-
src_full = self(sub).
|
|
2389
|
+
src_full = self(sub).full_file_name(file=source_,ext=ext)
|
|
1847
2390
|
else:
|
|
1848
|
-
src_full = self.
|
|
2391
|
+
src_full = self.full_file_name( source, ext=ext )
|
|
1849
2392
|
|
|
1850
2393
|
# handle directories in 'target'
|
|
1851
2394
|
if len(target) == 0: raise ValueError("'target' missing (the filename)" )
|
|
@@ -1853,191 +2396,257 @@ class SubDir(object):
|
|
|
1853
2396
|
if len(target_) == 0: raise IsADirectoryError( target, txtfmt("'target' %s indicates a directory, not a file", target))
|
|
1854
2397
|
if len(sub) > 0:
|
|
1855
2398
|
tar_dir = self(sub)
|
|
1856
|
-
tar_dir.
|
|
1857
|
-
tar_full = tar_dir.
|
|
2399
|
+
tar_dir.create_directory()
|
|
2400
|
+
tar_full = tar_dir.full_file_name(file=target_,ext=ext)
|
|
1858
2401
|
else:
|
|
1859
|
-
tar_full = self.
|
|
1860
|
-
self.
|
|
2402
|
+
tar_full = self.full_file_name( target, ext=ext )
|
|
2403
|
+
self.create_directory()
|
|
1861
2404
|
|
|
1862
2405
|
os.rename(src_full, tar_full)
|
|
1863
2406
|
|
|
1864
2407
|
# utilities
|
|
1865
2408
|
|
|
1866
2409
|
@staticmethod
|
|
1867
|
-
def
|
|
2410
|
+
def remove_bad_file_characters( file : str, by : str="default" ) -> str:
|
|
1868
2411
|
"""
|
|
1869
|
-
Replaces invalid characters in a filename
|
|
1870
|
-
|
|
2412
|
+
Replaces invalid characters in a filename using the map ``by``.
|
|
2413
|
+
|
|
2414
|
+
See :func:`cdxcore.util.fmt_filename` for documentation and further options.
|
|
1871
2415
|
"""
|
|
1872
|
-
return fmt_filename(
|
|
1873
|
-
|
|
1874
|
-
def unqiueLabelToKey( self, unique_label:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
|
|
1875
|
-
"""
|
|
1876
|
-
Converts a unique label which might contain invalid characters into a unique file name, such that the full file name does not exceed 'max_length' bytes.
|
|
1877
|
-
The returned key has the format
|
|
1878
|
-
name + separator + ID
|
|
1879
|
-
where ID has length id_length.
|
|
1880
|
-
If unique_label is already guaranteed to be a valid filename, use unqiueLongFileNameToKey() instead.
|
|
1881
|
-
"""
|
|
1882
|
-
len_ext = len(self.ext)
|
|
1883
|
-
assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
|
|
1884
|
-
uqf = uniqueLabelExt( max_length=max_length-len_ext, id_length=id_length, separator=separator, filename_by="default" )
|
|
1885
|
-
return uqf( unique_label )
|
|
2416
|
+
return fmt_filename( file, by=by )
|
|
1886
2417
|
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
2418
|
+
if False:
|
|
2419
|
+
def unqiueLabelToKey( self, unique_label:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
|
|
2420
|
+
"""
|
|
2421
|
+
Converts a unique label which might contain invalid characters into a unique file name, such that the full file name does not exceed 'max_length' bytes.
|
|
2422
|
+
The returned file has the format
|
|
2423
|
+
name + separator + ID
|
|
2424
|
+
where ID has length id_length.
|
|
2425
|
+
If unique_label is already guaranteed to be a valid filename, use unqiueLongFileNameToKey() instead.
|
|
2426
|
+
"""
|
|
2427
|
+
len_ext = len(self.ext)
|
|
2428
|
+
assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
|
|
2429
|
+
uqf = UniqueLabel( max_length=max_length-len_ext, id_length=id_length, separator=separator, filename_by="default" )
|
|
2430
|
+
return uqf( unique_label )
|
|
2431
|
+
|
|
2432
|
+
def unqiueLongFileNameToKey( self, unique_filename:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
|
|
2433
|
+
"""
|
|
2434
|
+
Converts a unique filename which might be too long to a unique filename such that the total length plus 'ext' does not exceed 'max_length' bytes.
|
|
2435
|
+
If the filename is already short enough, no change is made.
|
|
2436
|
+
|
|
2437
|
+
If 'unique_filename' is not guaranteed to be a valid filename, use unqiueLabelToKey() instead.
|
|
2438
|
+
"""
|
|
2439
|
+
len_ext = len(self.ext)
|
|
2440
|
+
assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
|
|
2441
|
+
uqf = UniqueLabel( max_length=max_length-len_ext, id_length=id_length, separator=separator )
|
|
2442
|
+
return uqf( unique_filename )
|
|
1898
2443
|
|
|
1899
2444
|
# -- dict-like interface --
|
|
1900
2445
|
|
|
1901
|
-
def __call__(self,
|
|
2446
|
+
def __call__(self, element : str,
|
|
1902
2447
|
default = RETURN_SUB_DIRECTORY,
|
|
1903
|
-
|
|
2448
|
+
raise_on_error : bool = False,
|
|
1904
2449
|
*,
|
|
1905
2450
|
version : str = None,
|
|
1906
2451
|
ext : str = None,
|
|
1907
2452
|
fmt : Format = None,
|
|
1908
2453
|
delete_wrong_version : bool = True,
|
|
1909
|
-
|
|
2454
|
+
create_directory : bool = None ):
|
|
1910
2455
|
"""
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
If
|
|
2456
|
+
Read either data from a file, or return a new sub directory.
|
|
2457
|
+
|
|
2458
|
+
If only the ``element`` argument is used, then this function returns a new sub directory
|
|
2459
|
+
named ``element``.
|
|
2460
|
+
|
|
2461
|
+
If both ``element`` and ``default`` arguments are used, then this function attempts to read the file ``element``
|
|
2462
|
+
from disk, returning ``default`` if it does not exist.
|
|
1914
2463
|
|
|
1915
|
-
sd
|
|
2464
|
+
Assume we have a subdirectory ``sd``::
|
|
2465
|
+
|
|
2466
|
+
from cdxcore.subdir import SubDir
|
|
2467
|
+
sd = SubDir("!/test")
|
|
1916
2468
|
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
x = sd('
|
|
1920
|
-
x = sd('
|
|
2469
|
+
Reading files::
|
|
2470
|
+
|
|
2471
|
+
x = sd('file', None) # reads 'file' with default value None
|
|
2472
|
+
x = sd('sd/file', default=1) # reads 'file' from sub directory 'sd' with default value 1
|
|
2473
|
+
x = sd('file', default=1, ext="tmp") # reads 'file.tmp' with default value 1
|
|
1921
2474
|
|
|
1922
|
-
Create sub directory
|
|
1923
|
-
|
|
1924
|
-
sd2 = sd("
|
|
1925
|
-
sd2 = sd("subdir1/subdir2"
|
|
1926
|
-
sd2 = sd(ext=".tmp")
|
|
2475
|
+
Create sub directory::
|
|
2476
|
+
|
|
2477
|
+
sd2 = sd("subdir") # creates and returns handle to subdirectory 'subdir'
|
|
2478
|
+
sd2 = sd("subdir1/subdir2") # creates and returns handle to subdirectory 'subdir1/subdir2'
|
|
2479
|
+
sd2 = sd("subdir1/subdir2", ext=".tmp") # creates and returns handle to subdirectory 'subdir1/subdir2' with extension "tmp"
|
|
2480
|
+
sd2 = sd(ext=".tmp") # returns handle to current subdirectory with extension "tmp"
|
|
1927
2481
|
|
|
1928
2482
|
Parameters
|
|
1929
2483
|
----------
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
If
|
|
2484
|
+
element : str
|
|
2485
|
+
File or directory name, or a list thereof.
|
|
2486
|
+
|
|
2487
|
+
default : optional
|
|
2488
|
+
If specified, this function reads ``element`` with
|
|
2489
|
+
``read( element, default, *args, **kwargs )``.
|
|
1935
2490
|
|
|
1936
|
-
|
|
1937
|
-
|
|
2491
|
+
If ``default`` is not specified, then this function returns a new sub-directory by calling
|
|
2492
|
+
``SubDir(element,parent=self,ext=ext,fmt=fmt)``.
|
|
1938
2493
|
|
|
1939
|
-
|
|
2494
|
+
create_directory : bool, optional
|
|
2495
|
+
*When creating sub-directories:*
|
|
2496
|
+
|
|
2497
|
+
Whether or not to instantly create the sub-directory. The default, ``None``, is to inherit the behaviour from ``self``.
|
|
2498
|
+
|
|
2499
|
+
raise_on_error : bool, optional
|
|
2500
|
+
*When reading files:*
|
|
2501
|
+
|
|
1940
2502
|
Whether to raise an exception if reading an existing file failed.
|
|
1941
|
-
By default this function fails silently and returns
|
|
1942
|
-
version : str
|
|
1943
|
-
If not None, specifies the version of the current code base.
|
|
1944
|
-
Use '*' to read any version (this is distrinct from reading a file without version).
|
|
1945
|
-
If version is not' '*', then this version will be compared to the version of the file being read.
|
|
1946
|
-
If they do not match, read fails (either by returning default or throwing an exception).
|
|
1947
|
-
delete_wrong_version : bool
|
|
1948
|
-
If True, and if a wrong version was found, delete the file.
|
|
1949
|
-
ext : str
|
|
1950
|
-
Extension overwrite, or a list thereof if key is a list
|
|
1951
|
-
Set to:
|
|
1952
|
-
-- None to use directory's default
|
|
1953
|
-
-- '*' to use the extension implied by 'fmt'
|
|
1954
|
-
-- for convenience 'ext' can also be a Format (in this case leave fmt to None)
|
|
1955
|
-
fmt : Format
|
|
1956
|
-
File format or None to use the directory's default.
|
|
1957
|
-
Note that 'fmt' cannot be a list even if 'key' is.
|
|
1958
|
-
Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
|
|
2503
|
+
By default this function fails silently and returns ``default``.
|
|
1959
2504
|
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
2505
|
+
Default is ``False``.
|
|
2506
|
+
|
|
2507
|
+
version : str, optional
|
|
2508
|
+
*When reading files:*
|
|
2509
|
+
|
|
2510
|
+
If not ``None``, specifies the version of the current code base.
|
|
2511
|
+
|
|
2512
|
+
In this case, this version will be compared to the version of the file being read.
|
|
2513
|
+
If they do not match, read fails (either by returning default or throwing a :class:`cdxcore.version.VersionError` exception).
|
|
2514
|
+
|
|
2515
|
+
You can specify version ``"*"`` to accept any version.
|
|
2516
|
+
Note that this is distinct
|
|
2517
|
+
to using ``None`` which stipulates that the file should not
|
|
2518
|
+
have version information.
|
|
2519
|
+
|
|
2520
|
+
Default is ``None``.
|
|
2521
|
+
|
|
2522
|
+
delete_wrong_version : bool, optional
|
|
2523
|
+
*When reading files:*
|
|
2524
|
+
|
|
2525
|
+
If ``True``, and if a wrong version was found, delete the file.
|
|
2526
|
+
|
|
2527
|
+
Default is ``True``.
|
|
2528
|
+
|
|
2529
|
+
ext : str, optional
|
|
2530
|
+
*When reading files:*
|
|
2531
|
+
|
|
2532
|
+
Extension to be used, or a list thereof if ``element`` is a list. Defaults
|
|
2533
|
+
to the extension of ``self``.
|
|
2534
|
+
|
|
2535
|
+
Semantics:
|
|
2536
|
+
|
|
2537
|
+
* ``None`` to use the default extension of ``self``.
|
|
2538
|
+
* ``"*"`` to use the extension implied by ``fmt``.
|
|
2539
|
+
* ``""`` to turn off extension management.
|
|
2540
|
+
|
|
2541
|
+
*When creating sub-directories:*
|
|
2542
|
+
|
|
2543
|
+
Extension for the new subdirectory; set to ``None`` to inherit the parent's extension.
|
|
2544
|
+
|
|
2545
|
+
Default is ``None``.
|
|
2546
|
+
|
|
2547
|
+
|
|
2548
|
+
fmt : :class:`cdxcore.subdir.Format`, optional
|
|
2549
|
+
*When reading files:*
|
|
2550
|
+
|
|
2551
|
+
File format or ``None`` to use the directory's default.
|
|
2552
|
+
Note that ``fmt`` cannot be a list even if ``element`` is.
|
|
2553
|
+
Unless
|
|
2554
|
+
``ext`` or the SubDir's extension is ``"*"``, changing the
|
|
2555
|
+
format does not automatically change the extension.
|
|
2556
|
+
|
|
2557
|
+
*When creating sub-directories:*
|
|
1969
2558
|
|
|
2559
|
+
Format for the new sub-directory; set to ``None`` to inherit the parent's format.
|
|
2560
|
+
|
|
2561
|
+
Default is ``None``.
|
|
2562
|
+
|
|
1970
2563
|
Returns
|
|
1971
2564
|
-------
|
|
2565
|
+
Object : type|SubDir
|
|
1972
2566
|
Either the value in the file, a new sub directory, or lists thereof.
|
|
1973
|
-
Returns None if an element was not found.
|
|
1974
2567
|
"""
|
|
1975
2568
|
if default == SubDir.RETURN_SUB_DIRECTORY:
|
|
1976
|
-
if not isinstance(
|
|
1977
|
-
if not isinstance(
|
|
1978
|
-
raise ValueError(txtfmt("'
|
|
1979
|
-
return [ SubDir( k,parent=self,ext=ext,fmt=fmt,
|
|
1980
|
-
return SubDir(
|
|
1981
|
-
return self.read(
|
|
2569
|
+
if not isinstance(element, str):
|
|
2570
|
+
if not isinstance(element, Collection):
|
|
2571
|
+
raise ValueError(txtfmt("'element' must be a string or an iterable object. Found type '%s;", type(element)))
|
|
2572
|
+
return [ SubDir( k,parent=self,ext=ext,fmt=fmt,create_directory=create_directory) for k in element ]
|
|
2573
|
+
return SubDir(element,parent=self,ext=ext,fmt=fmt,create_directory=create_directory)
|
|
2574
|
+
return self.read( file=element,
|
|
1982
2575
|
default=default,
|
|
1983
|
-
|
|
2576
|
+
raise_on_error=raise_on_error,
|
|
1984
2577
|
version=version,
|
|
1985
2578
|
delete_wrong_version=delete_wrong_version,
|
|
1986
2579
|
ext=ext,
|
|
1987
2580
|
fmt=fmt )
|
|
1988
2581
|
|
|
1989
|
-
def __getitem__( self,
|
|
2582
|
+
def __getitem__( self, file ):
|
|
1990
2583
|
"""
|
|
1991
|
-
Reads
|
|
1992
|
-
If '
|
|
2584
|
+
Reads ``file`` using :meth:`cdxcore.subdir.SubDir.read`.
|
|
2585
|
+
If '`file'` does not exist, throw a :class:`KeyError`.
|
|
1993
2586
|
"""
|
|
1994
|
-
return self.read(
|
|
2587
|
+
return self.read( file=file, default=None, raise_on_error=True )
|
|
1995
2588
|
|
|
1996
|
-
def __setitem__( self,
|
|
1997
|
-
""" Writes
|
|
1998
|
-
self.write(
|
|
2589
|
+
def __setitem__( self, file, value):
|
|
2590
|
+
""" Writes ``value`` to ``file`` using :meth:`cdxcore.subdir.SubDir.write`. """
|
|
2591
|
+
self.write(file,value)
|
|
1999
2592
|
|
|
2000
|
-
def __delitem__(self,
|
|
2001
|
-
""" Silently delete
|
|
2002
|
-
self.delete(
|
|
2593
|
+
def __delitem__(self,file):
|
|
2594
|
+
""" Silently delete ``file`` using :meth:`cdxcore.subdir.SubDir.delete`. """
|
|
2595
|
+
self.delete(file, False )
|
|
2003
2596
|
|
|
2004
2597
|
def __len__(self) -> int:
|
|
2005
|
-
""" Return the number of files
|
|
2006
|
-
return len(self.
|
|
2598
|
+
""" Return the number of files in this directory with matching extension. """
|
|
2599
|
+
return len(self.files())
|
|
2007
2600
|
|
|
2008
2601
|
def __iter__(self):
|
|
2009
|
-
""" Returns an iterator which allows traversing through all
|
|
2010
|
-
return self.
|
|
2011
|
-
|
|
2012
|
-
def __contains__(self, key):
|
|
2013
|
-
""" Implements 'in' operator """
|
|
2014
|
-
return self.exists(key)
|
|
2015
|
-
|
|
2016
|
-
# -- object like interface --
|
|
2602
|
+
""" Returns an iterator which allows traversing through all files below in this directory with matching extension. """
|
|
2603
|
+
return self.files().__iter__()
|
|
2017
2604
|
|
|
2018
|
-
def
|
|
2019
|
-
"""
|
|
2020
|
-
|
|
2021
|
-
|
|
2605
|
+
def __contains__(self, file):
|
|
2606
|
+
""" Tests whether ``file`` :meth:`cdxcore.subdir.SubDir.exists`. """
|
|
2607
|
+
return self.exists(file)
|
|
2608
|
+
|
|
2609
|
+
def items(self, *, ext : str = None, raise_on_error : bool = False) -> Iterable:
|
|
2022
2610
|
"""
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2611
|
+
Dictionary-style iterable of filenames and their content.
|
|
2612
|
+
|
|
2613
|
+
Usage::
|
|
2614
|
+
|
|
2615
|
+
subdir = SubDir("!")
|
|
2616
|
+
for file, data in subdir.items():
|
|
2617
|
+
print( file, str(data)[:100] )
|
|
2026
2618
|
|
|
2027
|
-
|
|
2619
|
+
Parameters
|
|
2620
|
+
----------
|
|
2621
|
+
ext : str
|
|
2622
|
+
Extension or ``None`` for the directory's current extension. Use ``""``
|
|
2623
|
+
for all file extension.
|
|
2624
|
+
|
|
2625
|
+
Returns
|
|
2626
|
+
-------
|
|
2627
|
+
Iterable
|
|
2628
|
+
An iterable generator
|
|
2629
|
+
"""
|
|
2630
|
+
class ItemIterable(Iterable):
|
|
2631
|
+
def __init__(_):
|
|
2632
|
+
_._files = self.files(ext=ext)
|
|
2633
|
+
_._subdir = self
|
|
2634
|
+
def __len__(_):
|
|
2635
|
+
return len(_._files)
|
|
2636
|
+
def __iter__(_):
|
|
2637
|
+
for file in _._files:
|
|
2638
|
+
data = _._subdir.read(file, ext=ext, raise_on_error=raise_on_error)
|
|
2639
|
+
yield file, data
|
|
2640
|
+
return ItemIterable()
|
|
2641
|
+
|
|
2642
|
+
# convenient path ops
|
|
2643
|
+
# -------------------
|
|
2644
|
+
|
|
2645
|
+
def __add__(self, directory : str) -> str:
|
|
2028
2646
|
"""
|
|
2029
|
-
|
|
2030
|
-
Note: keys starting with '_' are /not/ written to disk
|
|
2647
|
+
Returns a the subdirectory ``directory`` of ``self``.
|
|
2031
2648
|
"""
|
|
2032
|
-
|
|
2033
|
-
self.__dict__[key] = value
|
|
2034
|
-
else:
|
|
2035
|
-
self.write(key,value)
|
|
2036
|
-
|
|
2037
|
-
def __delattr__(self, key):
|
|
2038
|
-
""" Silently delete a key with member notation. """
|
|
2039
|
-
verify( key[:1] != "_", "Deleting protected or private members disabled. Fix __delattr__ to support this")
|
|
2040
|
-
return self.delete( key=key, raiseOnError=False )
|
|
2649
|
+
return SubDir(directory,parent=self)
|
|
2041
2650
|
|
|
2042
2651
|
# pickling
|
|
2043
2652
|
# --------
|
|
@@ -2052,7 +2661,22 @@ class SubDir(object):
|
|
|
2052
2661
|
self._ext = state['ext']
|
|
2053
2662
|
self._fmt = state['fmt']
|
|
2054
2663
|
self._crt = state['crt']
|
|
2664
|
+
|
|
2665
|
+
@staticmethod
|
|
2666
|
+
def as_format( format_name : str ) -> int:
|
|
2667
|
+
"""
|
|
2668
|
+
Converts a named format into the respective format code.
|
|
2669
|
+
|
|
2670
|
+
Example::
|
|
2055
2671
|
|
|
2672
|
+
format = SubDir.as_format( config("format", "pickle", SubDir.FORMAT_NAMES, "File format") )
|
|
2673
|
+
"""
|
|
2674
|
+
format_name = format_name.upper()
|
|
2675
|
+
if not format_name in SubDir.FORMAT_NAMES:
|
|
2676
|
+
raise LookupError(f"Unknown format name '{format_name}'. Must be one of: {fmt_list(SubDir.FORMAT_NAMES)}")
|
|
2677
|
+
return Format[format_name]
|
|
2678
|
+
|
|
2679
|
+
|
|
2056
2680
|
# caching
|
|
2057
2681
|
# -------
|
|
2058
2682
|
|
|
@@ -2066,129 +2690,249 @@ class SubDir(object):
|
|
|
2066
2690
|
exclude_arg_types : list[type] = None,
|
|
2067
2691
|
version_auto_class : bool = True):
|
|
2068
2692
|
"""
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2693
|
+
Advanced versioned caching for callables.
|
|
2694
|
+
|
|
2695
|
+
Versioned caching is based on the following two simple principles:
|
|
2696
|
+
|
|
2697
|
+
1) **Unique Call IDs:**
|
|
2698
|
+
|
|
2699
|
+
When a function is called with some parameters, the wrapper identifies a unique ID based
|
|
2700
|
+
on the qualified name of the function and on its runtime functional parameters (ie those
|
|
2701
|
+
which alter the outcome of the function).
|
|
2702
|
+
When a function is called the first time with a given unique call ID, it will store
|
|
2703
|
+
the result of the call to disk. If the function is called with the same call ID again,
|
|
2704
|
+
the result is read from disk and returned.
|
|
2705
|
+
|
|
2706
|
+
To compute unique call IDs :class:`cdxcore.uniquehash.NamedUniqueHash` is used
|
|
2707
|
+
by default.
|
|
2708
|
+
|
|
2709
|
+
2) **Code Version:**
|
|
2710
|
+
|
|
2711
|
+
Each function has a version, which includes dependencies on other functions or classes.
|
|
2712
|
+
If the version of data on disk does not match the current version, it is deleted
|
|
2713
|
+
and the generating function is called again. This way you can use your code to drive updates
|
|
2714
|
+
to data generated with cached functions.
|
|
2715
|
+
|
|
2716
|
+
Behind the scenes this is implemented using :dec:`cdxcore.version.version` which means
|
|
2717
|
+
that the version of a cached function can also depend on versions of non-cached functions
|
|
2718
|
+
or other objects.
|
|
2719
|
+
|
|
2720
|
+
Caching Functions
|
|
2721
|
+
^^^^^^^^^^^^^^^^^
|
|
2722
|
+
|
|
2723
|
+
Caching a simple function ``f`` is staight forward:
|
|
2724
|
+
|
|
2725
|
+
.. code-block:: python
|
|
2726
|
+
|
|
2727
|
+
from cdxcore.subdir import SubDir
|
|
2728
|
+
cache = SubDir("!/.cache")
|
|
2729
|
+
cache.delete_all_content() # for illustration
|
|
2730
|
+
|
|
2731
|
+
@cache.cache("0.1")
|
|
2732
|
+
def f(x,y):
|
|
2733
|
+
return x*y
|
|
2734
|
+
|
|
2735
|
+
_ = f(1,2) # function gets computed and the result cached
|
|
2736
|
+
_ = f(1,2) # restore result from cache
|
|
2737
|
+
_ = f(2,2) # different parameters: compute and store result
|
|
2738
|
+
|
|
2739
|
+
Cache another function ``g`` which calls ``f``, and whose version therefore on ``f``'s version:
|
|
2740
|
+
|
|
2741
|
+
.. code-block:: python
|
|
2742
|
+
|
|
2743
|
+
@cache.cache("0.1", dependencies=[f])
|
|
2744
|
+
def g(x,y):
|
|
2745
|
+
return g(x,y)**2
|
|
2746
|
+
|
|
2747
|
+
**Debugging**
|
|
2748
|
+
|
|
2749
|
+
When using automated caching it
|
|
2750
|
+
is important to understand how changes in parameters and the version of the a function
|
|
2751
|
+
affect caching. To this end, :dec:`cdxcore.subdir.SubDir.cache` supports
|
|
2752
|
+
a tracing mechanism via the use of a :class:`cdxcore.subdir.CacheController`:
|
|
2753
|
+
|
|
2754
|
+
.. code-block:: python
|
|
2755
|
+
|
|
2756
|
+
from cdxcore.subdir import SubDir, CacheController, Context
|
|
2757
|
+
|
|
2758
|
+
ctrl = CacheController( debug_verbose=Context("all") )
|
|
2759
|
+
cache = SubDir("!/.cache", cache_controller=ctrl )
|
|
2760
|
+
cache.delete_all_content() # <- delete previous cached files, for this example only
|
|
2761
|
+
|
|
2762
|
+
@cache.cache("0.1")
|
|
2763
|
+
def f(x,y):
|
|
2764
|
+
return x*y
|
|
2765
|
+
|
|
2766
|
+
_ = f(1,2) # function gets computed and the result cached
|
|
2767
|
+
_ = f(1,2) # restore result from cache
|
|
2768
|
+
_ = f(2,2) # different parameters: compute and store result
|
|
2769
|
+
|
|
2770
|
+
Returns:
|
|
2116
2771
|
|
|
2117
|
-
|
|
2772
|
+
.. code-block:: python
|
|
2773
|
+
|
|
2774
|
+
00: cache(f@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2775
|
+
00: cache(f@__main__): called 'f@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ 668a6b111549e288.pck'.
|
|
2776
|
+
00: cache(f@__main__): read 'f@__main__' version 'version 0.1' from cache 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ 668a6b111549e288.pck'.
|
|
2777
|
+
00: cache(f@__main__): called 'f@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ b5609542d7da0b04.pck'.
|
|
2118
2778
|
|
|
2119
|
-
|
|
2120
|
-
|
|
2779
|
+
**Non-Functional Parameters**
|
|
2780
|
+
|
|
2781
|
+
A function may have non-functional parameters which do not alter the function's outcome.
|
|
2782
|
+
An example are ``debug`` flags:
|
|
2783
|
+
|
|
2784
|
+
.. code-block:: python
|
|
2785
|
+
|
|
2786
|
+
from cdxcore.subdir import SubDir
|
|
2787
|
+
cache = SubDir("!/.cache")
|
|
2788
|
+
|
|
2789
|
+
@cache.cache("0.1", dependencies=[f], exclude_args='debug')
|
|
2790
|
+
def g(x,y,debug): # <--' 'debug' is a non-functional parameter
|
|
2791
|
+
if debug:
|
|
2792
|
+
print(f"h(x={x},y={y})")
|
|
2793
|
+
return g(x,y)**2
|
|
2794
|
+
|
|
2795
|
+
You can define certain types as non-functional for *all* functions wrapped
|
|
2796
|
+
by :meth:`cdxcore.subdir.SubDir.cache` when construcing
|
|
2797
|
+
the :class:`cdccore.cache.CacheController` parameter for in :class:`cdxcore.subdir.SubDir`:
|
|
2798
|
+
|
|
2799
|
+
.. code-block:: python
|
|
2121
2800
|
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2801
|
+
from cdxcore.subdir import SubDir
|
|
2802
|
+
|
|
2803
|
+
class Debugger:
|
|
2804
|
+
def output( cond, message ):
|
|
2805
|
+
print(message)
|
|
2806
|
+
|
|
2807
|
+
ctrl = CacheController(exclude_arg_types=[Debugger]) # <- exclude 'Debugger' parameters from hasing
|
|
2808
|
+
cache = SubDir("!/.cache")
|
|
2125
2809
|
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2810
|
+
@cache.cache("0.1", dependencies=[f], exclude_args='debug')
|
|
2811
|
+
def g(x,y,debugger : Debugger): # <-- 'debugger' is a non-functional parameter
|
|
2812
|
+
debugger.output(f"h(x={x},y={y})")
|
|
2813
|
+
return g(x,y)**2
|
|
2814
|
+
|
|
2815
|
+
**Unique IDs and File Naming**
|
|
2816
|
+
|
|
2817
|
+
The unique call ID of a decorated functions is by default generated by its fully qualified name
|
|
2818
|
+
and a unique hash of its functional parameters.
|
|
2819
|
+
|
|
2820
|
+
Key default behaviours of :class:`cdxcore.uniquehash.NamedUniqueHash`:
|
|
2821
|
+
|
|
2822
|
+
* The ``NamedUniqueHash`` hashes objects via their ``__dict__`` or ``__slot__`` members.
|
|
2823
|
+
This can be overwritten for a class by implementing ``__unique_hash__``; see :class:`cdxcore.uniquehash.NamedUniqueHash`.
|
|
2824
|
+
|
|
2825
|
+
* Function members of objects or any members starting with '_' are not hashed
|
|
2826
|
+
unless this behaviour is changed using :class:`cdxcore.subdir.CacheController`.
|
|
2827
|
+
|
|
2828
|
+
* Numpy and panda frames are hashed using their byte representation.
|
|
2829
|
+
That is slow and not recommended. It is better to identify numpy/panda inputs
|
|
2830
|
+
via their generating characteristic ID.
|
|
2831
|
+
|
|
2832
|
+
Either way, hashes are not particularly human readable. It is often useful
|
|
2833
|
+
to have unique IDs and therefore filenames which carry some context information.
|
|
2834
|
+
|
|
2835
|
+
This can be achieved by using ``label``:
|
|
2131
2836
|
|
|
2132
|
-
|
|
2133
|
-
def g(x,y,debug): # <-- debug is a non-functional parameter
|
|
2134
|
-
if debug:
|
|
2135
|
-
print(f"h(x={x},y={y})")
|
|
2136
|
-
return g(x,y)**2
|
|
2137
|
-
|
|
2138
|
-
You can systematically define certain types as non-functional for *all* functions wrapped
|
|
2139
|
-
by this SubDir by specifying the respective parameter for the CacheController() in SubDir.__init__().
|
|
2140
|
-
|
|
2141
|
-
The Unique Call ID of a functions is by default generated by its fully qualified name
|
|
2142
|
-
and a unique hash of its functional parameters.
|
|
2143
|
-
This can be made more readable by using id=
|
|
2144
|
-
|
|
2145
|
-
from cdxbasics.subdir import SubDir
|
|
2146
|
-
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2147
|
-
|
|
2148
|
-
@cache.cache("0.1", id="f({x},{y}") # <- using a string to be passed to str.format()
|
|
2149
|
-
def f(x,y):
|
|
2150
|
-
return x*y
|
|
2837
|
+
.. code-block:: python
|
|
2151
2838
|
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2839
|
+
from cdxcore.subdir import SubDir, CacheController
|
|
2840
|
+
ctrl = CacheController( debug_verbose=Context("all") )
|
|
2841
|
+
cache = SubDir("!/.cache", cache_controller=ctrl )
|
|
2842
|
+
cache.delete_all_content() # for illustration
|
|
2843
|
+
|
|
2844
|
+
@cache.cache("0.1") # <- no ID
|
|
2845
|
+
def f1(x,y):
|
|
2846
|
+
return x*y
|
|
2847
|
+
|
|
2848
|
+
@cache.cache("0.1", label="f2({x},{y})") # <- label uses a string to be passed to str.format()
|
|
2849
|
+
def f2(x,y):
|
|
2850
|
+
return x*y
|
|
2851
|
+
|
|
2852
|
+
We can also use a function to generate a ``label``. In that case all parameters
|
|
2853
|
+
to the function including its ``name`` are passed to the function. In below example
|
|
2854
|
+
we eat any parameters we are not interested in with ``** _``:
|
|
2163
2855
|
|
|
2164
|
-
|
|
2165
|
-
and a hash generated from all pertinent arguments will be generated.
|
|
2166
|
-
That is why in the previous example we still need to exclude_args 'debug' here.
|
|
2856
|
+
.. code-block:: python
|
|
2167
2857
|
|
|
2168
|
-
|
|
2169
|
-
|
|
2858
|
+
@cache.cache("0.1", label=lambda x,y,**_: f"h({x},{y})", exclude_args='debug')
|
|
2859
|
+
def h(x,y,debug=False):
|
|
2860
|
+
if debug:
|
|
2861
|
+
print(f"h(x={x},y={y})")
|
|
2862
|
+
return x*y
|
|
2170
2863
|
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2864
|
+
We obtain:
|
|
2865
|
+
|
|
2866
|
+
.. code-block:: python
|
|
2867
|
+
|
|
2868
|
+
f1(1,1)
|
|
2869
|
+
f2(1,1)
|
|
2870
|
+
h(1,1)
|
|
2871
|
+
|
|
2872
|
+
00: cache(f1@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2873
|
+
00: cache(f2@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2874
|
+
00: cache(h@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2875
|
+
00: cache(f1@__main__): called 'f1@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f1@__main__ ef197d80d6a0bbb0.pck'.
|
|
2876
|
+
00: cache(f2@__main__): called 'f2(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f2(1,1) bdc3cd99157c10f7.pck'.
|
|
2877
|
+
00: cache(h@__main__): called 'h(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/h(1,1) d3fdafc9182070f4.pck'.
|
|
2878
|
+
|
|
2879
|
+
Note that the file names ``f2(1,1) bdc3cd99157c10f7.pck``
|
|
2880
|
+
and ``h(1,1) d3fdafc9182070f4.pck`` for the ``f2`` and ``h`` function calls are now easier to read as
|
|
2881
|
+
they are comprised of the label
|
|
2882
|
+
of the function and a terminal hash key.
|
|
2883
|
+
The trailing hash is appended because we do not assume that the label returned by ``label`` is unique.
|
|
2884
|
+
Therefore, a hash generated from all the ``label`` itself and
|
|
2885
|
+
all pertinent arguments will be appended to the filename.
|
|
2886
|
+
|
|
2887
|
+
If we know how to generate truly unique IDs which are always valid filenames, then we can use ``uid``
|
|
2888
|
+
instead of ``label``:
|
|
2889
|
+
|
|
2890
|
+
.. code-block:: python
|
|
2891
|
+
|
|
2892
|
+
@cache.cache("0.1", uid=lambda x,y,**_: f"h2({x},{y})", exclude_args='debug')
|
|
2893
|
+
def h2(x,y,debug=False):
|
|
2894
|
+
if debug:
|
|
2895
|
+
print(f"h(x={x},y={y})")
|
|
2896
|
+
return x*y
|
|
2897
|
+
h2(1,1)
|
|
2898
|
+
|
|
2899
|
+
yields::
|
|
2900
|
+
|
|
2901
|
+
00: cache(h2@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2902
|
+
00: cache(h2@__main__): called 'h2(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/h2(1,1).pck'.
|
|
2903
|
+
|
|
2904
|
+
In particular, the filename is now ``h2(1,1).pck`` without any hash.
|
|
2905
|
+
If ``uid`` is used the parameter of the function are not hashed. Like ``label``
|
|
2906
|
+
the parameter ``uid`` can also be a :func:`str.format` string or a callable.
|
|
2180
2907
|
|
|
2181
|
-
|
|
2182
|
-
|
|
2908
|
+
**Controlliong which Parameters to Hash**
|
|
2909
|
+
|
|
2910
|
+
To specify which parameters are pertinent for identifying a unique id, use:
|
|
2911
|
+
|
|
2912
|
+
* ``include_args``: list of functions arguments to include. If ``None``, use all parameteres as input in the next step
|
|
2913
|
+
|
|
2914
|
+
* ``exclude_args``: list of function arguments to exclude, if not ``None``.
|
|
2915
|
+
|
|
2916
|
+
* ``exclude_arg_types``: a list of types to exclude.
|
|
2917
|
+
This is helpful if control flow is managed with dedicated data types.
|
|
2918
|
+
An example of such a type is :class:`cdxcore.verbose.Context` which is used to print hierarchical output messages.
|
|
2919
|
+
Types can be globally excluded using a :class:`cdccore.cache.CacheController`
|
|
2920
|
+
when calling
|
|
2921
|
+
:class:`cdxcore.subdir.SubDir`.
|
|
2922
|
+
|
|
2923
|
+
**Numpy/Pandas**
|
|
2924
|
+
|
|
2183
2925
|
Numpy/Panda data should not be hashed for identifying unique call IDs.
|
|
2184
2926
|
Instead, use the defining characteristics for generating the data frames.
|
|
2185
2927
|
|
|
2186
2928
|
For example:
|
|
2187
|
-
|
|
2188
|
-
from cdxbasics.subdir import SubDir
|
|
2189
|
-
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2190
2929
|
|
|
2191
|
-
|
|
2930
|
+
.. code-block:: python
|
|
2931
|
+
|
|
2932
|
+
from cdxcore.pretty import PrettyObject
|
|
2933
|
+
from cdxcore.subdir import SubDir
|
|
2934
|
+
cache = SubDir("!/.cache")
|
|
2935
|
+
cache.delete_all_content() # for illustration
|
|
2192
2936
|
|
|
2193
2937
|
@cache.cache("0.1")
|
|
2194
2938
|
def load_src( src_def ):
|
|
@@ -2201,22 +2945,24 @@ class SubDir(object):
|
|
|
2201
2945
|
stats = ... using data
|
|
2202
2946
|
return stats
|
|
2203
2947
|
|
|
2204
|
-
src_def =
|
|
2948
|
+
src_def = PrettyObject()
|
|
2205
2949
|
src_def.start = "2010-01-01"
|
|
2206
2950
|
src_def.end = "2025-01-01"
|
|
2207
2951
|
src_def.x = 0.1
|
|
2208
2952
|
|
|
2209
|
-
stats_def =
|
|
2953
|
+
stats_def = PrettyObject()
|
|
2210
2954
|
stats_def.lambda = 0.1
|
|
2211
2955
|
stats_def.window = 100
|
|
2212
2956
|
|
|
2213
2957
|
data = load_src( src_def )
|
|
2214
2958
|
stats = statistics( stats_def, src_def, data )
|
|
2215
2959
|
|
|
2216
|
-
While instructive, this case is not optimal: we do not really need to load
|
|
2217
|
-
if we can reconstruct
|
|
2960
|
+
While instructive, this case is not optimal: we do not really need to load ``data``
|
|
2961
|
+
if we can reconstruct ``stats`` from ``data`` (unless we need ``data`` further on).
|
|
2218
2962
|
|
|
2219
|
-
Consider therefore
|
|
2963
|
+
Consider therefore:
|
|
2964
|
+
|
|
2965
|
+
.. code-block:: python
|
|
2220
2966
|
|
|
2221
2967
|
@cache.cache("0.1")
|
|
2222
2968
|
def load_src( src_def ):
|
|
@@ -2232,14 +2978,18 @@ class SubDir(object):
|
|
|
2232
2978
|
|
|
2233
2979
|
stats = statistics_only( stats_def, src_def )
|
|
2234
2980
|
|
|
2235
|
-
Member
|
|
2236
|
-
|
|
2981
|
+
Caching Member Functions
|
|
2982
|
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
|
2983
|
+
|
|
2237
2984
|
You can cache member functions like any other function.
|
|
2238
|
-
Note that version information are by default inherited, i.e. member functions will be dependent on the version of their
|
|
2239
|
-
defining class, and class versions will be dependent on their base classes' versions
|
|
2985
|
+
Note that :dec:`cdxcore.version.version` information are by default inherited, i.e. member functions will be dependent on the version of their
|
|
2986
|
+
defining class, and class versions will be dependent on their base classes' versions:
|
|
2240
2987
|
|
|
2241
|
-
|
|
2242
|
-
|
|
2988
|
+
.. code-block:: python
|
|
2989
|
+
|
|
2990
|
+
from cdxcore.subdir import SubDir, version
|
|
2991
|
+
cache = SubDir("!/.cache")
|
|
2992
|
+
cache.delete_all_content() # for illustration
|
|
2243
2993
|
|
|
2244
2994
|
@version("0.1")
|
|
2245
2995
|
class A(object):
|
|
@@ -2259,18 +3009,26 @@ class SubDir(object):
|
|
|
2259
3009
|
_ = b.f(y=1) # same unique call ID as previous call -> restore result from disk
|
|
2260
3010
|
|
|
2261
3011
|
**WARNING**
|
|
2262
|
-
|
|
2263
|
-
which start with a "_". This behaviour can be changed using CacheController
|
|
2264
|
-
For reasonably complex objects it is recommended to implement
|
|
2265
|
-
|
|
2266
|
-
|
|
3012
|
+
:class:`cdxcore.uniquehash.UniqueHash` does *not* by default process members of objects or dictionaries
|
|
3013
|
+
which start with a "_". This behaviour can be changed using :class:`cdxcore.subdir.CacheController`.
|
|
3014
|
+
For reasonably complex objects it is recommended to implement for your objects
|
|
3015
|
+
the a custom hashing function::
|
|
3016
|
+
|
|
3017
|
+
__unique_hash__( self, uniqueHash : UniqueHash, debug_trace : DebugTrace )
|
|
3018
|
+
|
|
3019
|
+
This function is described at :class:`cdxcore.uniquehash.UniqueHash`.
|
|
3020
|
+
|
|
3021
|
+
Caching Bound Member Functions
|
|
3022
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
2267
3023
|
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
Note that above is functionally different to decorating a bound member function:
|
|
3024
|
+
Caching bound member functions is technically quite different to caching a function of a class in general,
|
|
3025
|
+
but also supported:
|
|
2271
3026
|
|
|
2272
|
-
|
|
2273
|
-
|
|
3027
|
+
.. code-block:: python
|
|
3028
|
+
|
|
3029
|
+
from cdxcore.subdir import SubDir, version
|
|
3030
|
+
cache = SubDir("!/.cache", cache_controller : CacheController(debug_verbose=Context("all")))
|
|
3031
|
+
cache.delete_all_content() # for illustration
|
|
2274
3032
|
|
|
2275
3033
|
class A(object):
|
|
2276
3034
|
def __init__(self,x):
|
|
@@ -2282,133 +3040,177 @@ class SubDir(object):
|
|
|
2282
3040
|
f = cache.cache("0.1", id=lambda self, y : f"a.f({y})")(a.f) # <- decorate bound 'f'.
|
|
2283
3041
|
r = c(y=2)
|
|
2284
3042
|
|
|
2285
|
-
In this case the function
|
|
2286
|
-
parameter list even though the bound function parameter list does not include
|
|
2287
|
-
This, together with the comments on hashing objects above, ensures that (hashed) changes to
|
|
3043
|
+
In this case the function ``f`` is bound to ``a``. The object is added as ``self`` to the function
|
|
3044
|
+
parameter list even though the bound function parameter list does not include ``self``.
|
|
3045
|
+
This, together with the comments on hashing objects above, ensures that (hashed) changes to ``a`` will
|
|
2288
3046
|
be reflected in the unique call ID for the member function.
|
|
2289
3047
|
|
|
2290
|
-
Classes
|
|
2291
|
-
|
|
2292
|
-
Classes can also be cached.
|
|
2293
|
-
This is done in two steps: first, the class itself is decorated to provide version information at its own level.
|
|
2294
|
-
Secondly, decorate __init__ which also helps to define the unique call id. You do not need to specify a version
|
|
2295
|
-
for __init__ as its version usually coincides with the version of the class.
|
|
2296
|
-
|
|
2297
|
-
Simple example:
|
|
2298
|
-
|
|
2299
|
-
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2300
|
-
|
|
2301
|
-
@cache.cache("0.1")
|
|
2302
|
-
class A(object):
|
|
2303
|
-
|
|
2304
|
-
@cache.cache(exclude_args=['debug'])
|
|
2305
|
-
def __init__(self, x, debug):
|
|
2306
|
-
if debug:
|
|
2307
|
-
print("__init__",x)
|
|
2308
|
-
self.x = x
|
|
3048
|
+
Caching Classes
|
|
3049
|
+
^^^^^^^^^^^^^^^
|
|
2309
3050
|
|
|
2310
|
-
|
|
2311
|
-
|
|
3051
|
+
Classes can also be cached. In this case the creation of a class is cached, i.e. a call to
|
|
3052
|
+
the class constructor restores the respectiv object from disk.
|
|
2312
3053
|
|
|
2313
|
-
|
|
2314
|
-
It is therefore automatically excluded from computing a unique call ID.
|
|
2315
|
-
Specifically, 'self' is not part of the arguments passed to 'id':
|
|
3054
|
+
This is done in two steps:
|
|
2316
3055
|
|
|
2317
|
-
|
|
2318
|
-
|
|
3056
|
+
1) first, the class itself is decorated using
|
|
3057
|
+
:dec:`cdxcore.subdir.SubDir.cache`
|
|
3058
|
+
to provide version information at class level. Only version information are provided here.
|
|
3059
|
+
|
|
3060
|
+
2) Secondly, decorate ``__init__``. You do not need to specify a version
|
|
3061
|
+
for ``__init__`` as its version usually coincides with the version of the class. At ``__init__``
|
|
3062
|
+
you define how unique IDs are generated from the parameters passed to object construction.
|
|
3063
|
+
|
|
3064
|
+
Simple example:
|
|
3065
|
+
|
|
3066
|
+
.. code-block:: python
|
|
3067
|
+
|
|
3068
|
+
from cdxcore.subdir import SubDir
|
|
3069
|
+
cache = SubDir("!/.cache")
|
|
3070
|
+
cache.delete_all_content() # for illustration
|
|
3071
|
+
|
|
3072
|
+
@cache.cache("0.1")
|
|
3073
|
+
class A(object):
|
|
3074
|
+
|
|
3075
|
+
@cache.cache(exclude_args=['debug'])
|
|
3076
|
+
def __init__(self, x, debug):
|
|
3077
|
+
if debug:
|
|
3078
|
+
print("__init__",x)
|
|
3079
|
+
self.x = x
|
|
2319
3080
|
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
if debug:
|
|
2323
|
-
print("__init__",x)
|
|
2324
|
-
self.x = x
|
|
3081
|
+
a = A(1) # caches 'a'
|
|
3082
|
+
b = A(1) # reads the cached object into 'b'
|
|
2325
3083
|
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
b) exclude_args: list of funciton arguments to exclude, if not None.
|
|
2343
|
-
c) exclude_arg_types: a list of types to exclude. This is helpful if control flow is managed with dedicated data types.
|
|
2344
|
-
An example of such a type is cdxbasics.verbose.Context which is used to print hierarchical output messages.
|
|
2345
|
-
Types can be globally excluded using the CacheController.
|
|
3084
|
+
**Technical Comments**
|
|
3085
|
+
|
|
3086
|
+
The function ``__init__`` does not actually return a value; for this reason
|
|
3087
|
+
behind the scenes it is actually ``__new__`` which is being decorated.
|
|
3088
|
+
Attempting to cache-decorate ``__new__`` manually will lead to an exception.
|
|
3089
|
+
|
|
3090
|
+
A nuance for ``__init__`` vs ordinary member function is that the
|
|
3091
|
+
``self`` parameter is non-functional
|
|
3092
|
+
(in the sense that it is an empty object when ``__init__`` is called).
|
|
3093
|
+
``self`` is therefore automatically excluded from computing a unique call ID.
|
|
3094
|
+
That also means ``self`` is not part of the arguments passed to ``uid``:
|
|
3095
|
+
|
|
3096
|
+
.. code-block:: python
|
|
3097
|
+
|
|
3098
|
+
@cache.cache("0.1")
|
|
3099
|
+
class A(object):
|
|
2346
3100
|
|
|
3101
|
+
@cache.cache("0.1", id=lambda x, debug: f"A.__init__(x={x})") # <-- 'self' is not passed to the lambda function; no need to add **_
|
|
3102
|
+
def __init__(self, x, debug):
|
|
3103
|
+
if debug:
|
|
3104
|
+
print("__init__",x)
|
|
3105
|
+
self.x = x
|
|
3106
|
+
|
|
3107
|
+
Decorating classes with ``__slots__`` does not yet work.
|
|
3108
|
+
|
|
2347
3109
|
See also
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
3110
|
+
^^^^^^^^
|
|
3111
|
+
|
|
3112
|
+
For project-wide use it is usually inconvenient to control caching at the level of a
|
|
3113
|
+
project-wide cache root directory.
|
|
3114
|
+
See :class:`cdxcore.subdir.VersionedCacheRoot` for a thin convenience wrapper around a :class:`cdxcore.subdir.SubDir`
|
|
3115
|
+
with a :class:`cdxcore.subdir.CacheController`.
|
|
2351
3116
|
|
|
2352
3117
|
Parameters
|
|
2353
3118
|
----------
|
|
2354
3119
|
version : str, optional
|
|
2355
3120
|
Version of the function.
|
|
2356
|
-
* If None then F must be decorated with cdxbasics.version.version().
|
|
2357
|
-
* If set, the function F is first decorated with cdxbasics.version.version().
|
|
2358
|
-
dependencies : list, optional
|
|
2359
|
-
List of version dependencies
|
|
2360
|
-
|
|
2361
|
-
id : str, Callable
|
|
2362
|
-
Create a call label for the function call and its parameters.
|
|
2363
|
-
See above for a description.
|
|
2364
|
-
* A plain string without {} formatting: this is the fully qualified id
|
|
2365
|
-
* A string with {} formatting: id.str( name=name, **parameters ) will be used to generate the fully qualified id
|
|
2366
|
-
* A Callable, in which case id( name=name, **parameters ) will be used to generate the fully qualified id
|
|
2367
|
-
|
|
2368
|
-
unique : bool
|
|
2369
|
-
Whether the 'id' generated by 'id' is unique for this function call with its parameters.
|
|
2370
|
-
If True, then the function will attempt to use 'id' as filename as long as it has no invalid characters and is short
|
|
2371
|
-
enough (see 'max_filename_length').
|
|
2372
|
-
If False, the function will append to the 'id' a unique hash of the qualified function name and all pertinent parameters
|
|
2373
|
-
|
|
2374
|
-
name : str
|
|
2375
|
-
The name of the function, or None for using the fully qualified function name.
|
|
2376
3121
|
|
|
3122
|
+
* If ``None`` then ``F`` must be decorated with :dec:`cdxcore.version.version`.
|
|
3123
|
+
* If set, the function ``F`` is first decorated with :dec:`cdxcore.version.version`.
|
|
3124
|
+
|
|
3125
|
+
dependencies : list[type], optional
|
|
3126
|
+
A list of version dependencies, either by reference or by name.
|
|
3127
|
+
See :dec:`cdxcore.version.version` for details on name lookup if strings are used.
|
|
3128
|
+
|
|
3129
|
+
label : str | Callable
|
|
3130
|
+
Specify a human-readable label for the function call given its parameters.
|
|
3131
|
+
This label is used to generate the cache file name, and is also printed in when tracing
|
|
3132
|
+
hashing operations. Labels are not assumed to be unique, hence a unique hash of
|
|
3133
|
+
the label and the parameters to this function will be appended to generate
|
|
3134
|
+
the actual cache file name.
|
|
3135
|
+
|
|
3136
|
+
Use ``uid`` instead if ``label`` represents valid unique filenames.
|
|
3137
|
+
|
|
3138
|
+
|
|
3139
|
+
**Usage:**
|
|
3140
|
+
|
|
3141
|
+
* If ``label`` is a plain string without ``{}`` formatting: use this string as-is.
|
|
3142
|
+
|
|
3143
|
+
* If ``label`` is a string with ``{}`` formatting, then ``label.format( name=name, **parameters )``
|
|
3144
|
+
will be used to generate the actual label.
|
|
3145
|
+
|
|
3146
|
+
* If ``label`` is a ``Callable`` then ``label( name=name, **parameters )`` will be called
|
|
3147
|
+
to generate the actual label.
|
|
3148
|
+
|
|
3149
|
+
See above for examples.
|
|
3150
|
+
|
|
3151
|
+
``label`` cannot be used alongside ``uid``.
|
|
3152
|
+
|
|
3153
|
+
uid : str | Callable
|
|
3154
|
+
Alternative to ``label`` which is assumed to generate a unique cache file name. It has the same
|
|
3155
|
+
semantics as ``label``. When used, parameters to the decorated function are not hashed.
|
|
3156
|
+
|
|
3157
|
+
``uid`` be used alongside ``label``.
|
|
3158
|
+
|
|
3159
|
+
name : str, optional
|
|
3160
|
+
Name of this function which is used either on its own if neither ``label`` not ``uid`` are used.
|
|
3161
|
+
If either of them is used, ``name`` is passed as a parameter to either the callable or the
|
|
3162
|
+
formatting operator.
|
|
3163
|
+
|
|
3164
|
+
If ``name`` is not specified it defaults to ``__qualname__`` expanded
|
|
3165
|
+
by the module name the function is defined in.
|
|
3166
|
+
|
|
2377
3167
|
include_args : list[str]
|
|
2378
|
-
List of arguments to include in generating
|
|
3168
|
+
List of arguments to include in generating an unqiue ID, or ``None`` for all.
|
|
2379
3169
|
|
|
2380
3170
|
exclude_args : list[str]:
|
|
2381
|
-
List of
|
|
3171
|
+
List of arguments to exclude from generating an unique ID.
|
|
2382
3172
|
|
|
2383
3173
|
exclude_arg_types : list[type]
|
|
2384
|
-
List of types to exclude.
|
|
3174
|
+
List of parameter types to exclude from generating an unique ID.
|
|
2385
3175
|
|
|
2386
3176
|
version_auto_class : bool
|
|
2387
|
-
|
|
2388
|
-
|
|
3177
|
+
Whether to automaticallty add version dependencies on base classes or, for member functions, on containing
|
|
3178
|
+
classes. This is the ``auto_class`` parameter for :dec:`cdxcore.version.version`.
|
|
2389
3179
|
|
|
2390
3180
|
Returns
|
|
2391
3181
|
-------
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
Information available at any time after decoration:
|
|
2396
|
-
F.cache_info.name : qualified name of the function
|
|
2397
|
-
F.cache_info.signature : signature of the function
|
|
3182
|
+
Decorated F: Callable
|
|
3183
|
+
|
|
3184
|
+
A decorator ``cache(F)`` whose ``__call__`` implements the cached call to ``F``.
|
|
2398
3185
|
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
3186
|
+
This callable has a member ``cache_info``
|
|
3187
|
+
of type :class:`cdxcore.subdir.CacheInfo`
|
|
3188
|
+
which can be used to access information on caching activity.
|
|
3189
|
+
|
|
3190
|
+
* Information available at any time after decoration:**
|
|
3191
|
+
|
|
3192
|
+
* ``F.cache_info.name`` : qualified name of the function
|
|
3193
|
+
* ``F.cache_info.signature`` : signature of the function
|
|
3194
|
+
|
|
3195
|
+
* Additonal information available during a call to a decorated function F, and thereafter:
|
|
3196
|
+
|
|
3197
|
+
* ``F.cache_info.version`` : unique version string reflecting all dependencies.
|
|
3198
|
+
* ``F.cache_info.filename`` : unique filename used for caching logic during the last function call.
|
|
3199
|
+
* ``F.cache_info.label`` : last label generated, or ``None``.
|
|
3200
|
+
* ``F.cache_info.arguments`` : arguments parsed to create a unique call ID, or ``None``.
|
|
2404
3201
|
|
|
2405
|
-
|
|
2406
|
-
|
|
3202
|
+
* Additonal information available after a call to ``F``:
|
|
3203
|
+
|
|
3204
|
+
* ``F.cache_info.last_cached`` : whether the last function call returned a cached object.
|
|
2407
3205
|
|
|
2408
|
-
The
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
3206
|
+
The decorated ``F()`` has additional function parameters, namely:
|
|
3207
|
+
|
|
3208
|
+
* ``override_cache_mode`` : allows to override caching mode temporarily, in particular you can set it to ``"off"``.
|
|
3209
|
+
* ``track_cached_files`` : allows passing a :class:`cdxcore.subdir.CacheTracker`
|
|
3210
|
+
object to keep track of all
|
|
3211
|
+
files used (loaded from or saved to).
|
|
3212
|
+
The function :meth:`cdxcore.subdir.CacheTracker.delete_cache_files` can be used
|
|
3213
|
+
to delete all files involved in caching.
|
|
2412
3214
|
"""
|
|
2413
3215
|
return CacheCallable(subdir = self,
|
|
2414
3216
|
version = version,
|
|
@@ -2428,14 +3230,153 @@ class SubDir(object):
|
|
|
2428
3230
|
version_auto_class : bool = True
|
|
2429
3231
|
):
|
|
2430
3232
|
"""
|
|
2431
|
-
Short
|
|
2432
|
-
|
|
3233
|
+
Short-cut for :dec:`cdxcore.subdir.SubDir.cache` applied to classes
|
|
3234
|
+
with a reduced number of available parameters.
|
|
3235
|
+
|
|
3236
|
+
Example::
|
|
3237
|
+
|
|
3238
|
+
cache = SubDir("!/.cache")
|
|
3239
|
+
|
|
3240
|
+
@cache.cache_class("0.1")
|
|
3241
|
+
class A(object):
|
|
3242
|
+
|
|
3243
|
+
@cache.cache(exclude_args=['debug'])
|
|
3244
|
+
def __init__(self, x, debug):
|
|
3245
|
+
if debug:
|
|
3246
|
+
print("__init__",x)
|
|
3247
|
+
self.x = x
|
|
3248
|
+
|
|
2433
3249
|
"""
|
|
2434
3250
|
return self.cache( name=name,
|
|
2435
|
-
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
3251
|
+
version=version,
|
|
3252
|
+
dependencies=dependencies,
|
|
3253
|
+
version_auto_class=version_auto_class)
|
|
3254
|
+
|
|
3255
|
+
# ========================================================================
|
|
3256
|
+
# Caching, convenience
|
|
3257
|
+
# ========================================================================
|
|
3258
|
+
|
|
3259
|
+
def VersionedCacheRoot( directory : str, *,
|
|
3260
|
+
ext : str = None,
|
|
3261
|
+
fmt : Format = None,
|
|
3262
|
+
create_directory : bool = False,
|
|
3263
|
+
**controller_kwargs
|
|
3264
|
+
):
|
|
3265
|
+
"""
|
|
3266
|
+
Create a root directory for versioned caching on disk
|
|
3267
|
+
using :dec:`cdxcore.subdir.SubDir.cache`.
|
|
3268
|
+
|
|
3269
|
+
**Usage:**
|
|
3270
|
+
|
|
3271
|
+
In a central file, define a root directory for all caching activity::
|
|
3272
|
+
|
|
3273
|
+
from cdxcore.subdir import VersionedCacheRoot
|
|
3274
|
+
vroot = VersionedCacheRoot("!/cache")
|
|
3275
|
+
|
|
3276
|
+
Create sub-directories as suitable, for example::
|
|
3277
|
+
|
|
3278
|
+
vtest = vroot("test")
|
|
3279
|
+
|
|
3280
|
+
Use these for caching::
|
|
3281
|
+
|
|
3282
|
+
@vtest.cache("1.0")
|
|
3283
|
+
def f1( x=1, y=2 ):
|
|
3284
|
+
print(x,y)
|
|
3285
|
+
|
|
3286
|
+
@vtest.cache("1.0", dps=[f1])
|
|
3287
|
+
def f2( x=1, y=2, z=3 ):
|
|
3288
|
+
f1( x,y )
|
|
3289
|
+
print(z)
|
|
3290
|
+
|
|
3291
|
+
Parameters
|
|
3292
|
+
----------
|
|
3293
|
+
directory : str
|
|
3294
|
+
Name of the root directory for caching.
|
|
3295
|
+
|
|
3296
|
+
Using SubDir the following Short-cuts are supported:
|
|
3297
|
+
|
|
3298
|
+
* ``"!/dir"`` creates ``dir`` in the temporary directory.
|
|
3299
|
+
* ``"~/dir"`` creates ``dir`` in the home directory.
|
|
3300
|
+
* ``"./dir"`` creates ``dir`` relative to the current directory.
|
|
3301
|
+
|
|
3302
|
+
ext : str
|
|
3303
|
+
Extension, which will automatically be appended to file names.
|
|
3304
|
+
The default value depends on ``fmt`; for ``Format.PICKLE`` it is "pck".
|
|
3305
|
+
|
|
3306
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
3307
|
+
File format; if ``ext`` is not specified, the format drives the extension, too.
|
|
3308
|
+
Default is ``Format.PICKLE``.
|
|
3309
|
+
|
|
3310
|
+
create_directory : bool
|
|
3311
|
+
Whether to create the directory upon creation. Default is ``False``.
|
|
3312
|
+
|
|
3313
|
+
controller_kwargs: dict
|
|
3314
|
+
Parameters passed to :class:`cdxcore.subdir.CacheController``.
|
|
3315
|
+
|
|
3316
|
+
Common parameters used:
|
|
3317
|
+
|
|
3318
|
+
* ``exclude_arg_types``: list of types or names of types to exclude when auto-generating function
|
|
3319
|
+
signatures from function arguments.
|
|
3320
|
+
An example is :class:`cdxcore.verbose.Context` which is used to print progress messages.
|
|
3321
|
+
|
|
3322
|
+
* ``max_filename_length``: maximum filename length.
|
|
3323
|
+
|
|
3324
|
+
* ``hash_length``: length used for hashes, see :class:`cdxcore.uniquehash.UniqueHash`.
|
|
3325
|
+
|
|
3326
|
+
Returns
|
|
3327
|
+
-------
|
|
3328
|
+
Root : SubDir
|
|
3329
|
+
A root directory suitable for caching.
|
|
3330
|
+
"""
|
|
3331
|
+
controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
|
|
3332
|
+
return SubDir( directory=directory, ext=ext, fmt=fmt, create_directory=create_directory, controller=controller )
|
|
3333
|
+
|
|
3334
|
+
version = version_decorator
|
|
3335
|
+
|
|
3336
|
+
class CacheTracker(object):
|
|
3337
|
+
"""
|
|
3338
|
+
Utility class to track caching and be able to delete all dependent objects.
|
|
3339
|
+
"""
|
|
3340
|
+
def __init__(self):
|
|
3341
|
+
""" track cache files """
|
|
3342
|
+
self._files = []
|
|
3343
|
+
def __iadd__(self, new_file):
|
|
3344
|
+
""" Add a new file to the tracker """
|
|
3345
|
+
self._files.append( new_file )
|
|
3346
|
+
def delete_cache_files(self):
|
|
3347
|
+
""" Delete all tracked files """
|
|
3348
|
+
for file in self._files:
|
|
3349
|
+
if os.path.exists(file):
|
|
3350
|
+
os.remove(file)
|
|
3351
|
+
self._files = []
|
|
3352
|
+
def __str__(self) -> str:#NOQA
|
|
3353
|
+
return f"Tracked: {self._files}"
|
|
3354
|
+
def __repr__(self) -> str:#NOQA
|
|
3355
|
+
return f"Tracked: {self._files}"
|
|
3356
|
+
|
|
3357
|
+
class CacheInfo(object):
|
|
3358
|
+
"""
|
|
3359
|
+
Information on cfunctions decorated with :dec:`cdxcore.subdir.SubDir.cache`.
|
|
3360
|
+
|
|
3361
|
+
Functions decorated with :dec:`cdxcore.subdir.SubDir.cache`
|
|
3362
|
+
will have a member ``cache_info`` of this type
|
|
3363
|
+
"""
|
|
3364
|
+
def __init__(self, name, F, keep_last_arguments):
|
|
3365
|
+
"""
|
|
3366
|
+
:meta private:
|
|
3367
|
+
"""
|
|
3368
|
+
self.name = name #: Decoded name of the function.
|
|
3369
|
+
|
|
3370
|
+
self.signature = inspect.signature(F) #: :func:`inspect.signature` of the function.
|
|
3371
|
+
|
|
3372
|
+
self.filename = None #: Unique filename of the last function call.
|
|
3373
|
+
self.label = None #: Label of the last function call.
|
|
3374
|
+
self.version = None #: Last version used.
|
|
3375
|
+
|
|
3376
|
+
self.last_cached = None #: Whether the last function call restored data from disk.
|
|
3377
|
+
|
|
3378
|
+
if keep_last_arguments:
|
|
3379
|
+
self.arguments = None #: Last arguments used. This member is only present if ``keep_last_arguments`` was set to ``True`` for the relevant :class:`cdxcore.subdir.CacheController`.
|
|
2439
3380
|
|
|
2440
3381
|
def _ensure_has_version( F,
|
|
2441
3382
|
version : str = None,
|
|
@@ -2486,8 +3427,9 @@ def _qualified_name( F, name ):
|
|
|
2486
3427
|
|
|
2487
3428
|
class CacheCallable(object):
|
|
2488
3429
|
"""
|
|
2489
|
-
|
|
2490
|
-
|
|
3430
|
+
Wrapper for a cached function.
|
|
3431
|
+
|
|
3432
|
+
This is the wrapper returned by :dec:`cdxcore.subdir.SubDir.cache`.
|
|
2491
3433
|
"""
|
|
2492
3434
|
|
|
2493
3435
|
def __init__(self,
|
|
@@ -2503,8 +3445,9 @@ class CacheCallable(object):
|
|
|
2503
3445
|
version_auto_class : bool = True,
|
|
2504
3446
|
name_of_name_arg : str = "name"):
|
|
2505
3447
|
"""
|
|
2506
|
-
Utility class for SubDir.
|
|
2507
|
-
|
|
3448
|
+
Utility class for :dec:`cdxcore.subdir.SubDir.cache`.
|
|
3449
|
+
|
|
3450
|
+
*Do not use directly.*
|
|
2508
3451
|
"""
|
|
2509
3452
|
if not label is None and not uid is None:
|
|
2510
3453
|
error("Cannot specify both 'label' and 'uid'.")
|
|
@@ -2523,35 +3466,41 @@ class CacheCallable(object):
|
|
|
2523
3466
|
|
|
2524
3467
|
@property
|
|
2525
3468
|
def uid_or_label(self) -> Callable:
|
|
3469
|
+
""" ID or label """
|
|
2526
3470
|
return self._uid if self._label is None else self._label
|
|
2527
3471
|
@property
|
|
2528
3472
|
def unique(self) -> bool:
|
|
3473
|
+
""" Whether the ID is unique """
|
|
2529
3474
|
return not self._uid is None
|
|
2530
|
-
|
|
2531
3475
|
@property
|
|
2532
|
-
def
|
|
2533
|
-
""" Returns the
|
|
2534
|
-
return self._subdir.
|
|
3476
|
+
def cache_controller(self) -> CacheController:
|
|
3477
|
+
""" Returns the :class:`cdxcore.subdir.CacheController` """
|
|
3478
|
+
return self._subdir.cache_controller
|
|
2535
3479
|
@property
|
|
2536
|
-
def cache_mode(self) ->
|
|
2537
|
-
|
|
3480
|
+
def cache_mode(self) -> CacheMode:
|
|
3481
|
+
""" Returns the :class:`cdxcore.subdir.CacheMode` of the underlying :class:`cdxcore.subdir.CacheController` """
|
|
3482
|
+
return self.cache_controller.cache_mode
|
|
2538
3483
|
@property
|
|
2539
3484
|
def debug_verbose(self) -> Context:
|
|
2540
|
-
|
|
3485
|
+
""" Returns the debug :class:`cdxcore.verbose.Context` used to print caching information, or ``None`` """
|
|
3486
|
+
return self.cache_controller.debug_verbose
|
|
2541
3487
|
@property
|
|
2542
|
-
def
|
|
2543
|
-
|
|
3488
|
+
def labelledFileName(self) -> Callable:
|
|
3489
|
+
""" Returns ``labelledFileName()`` of the underlying :class:`cdxcore.subdir.CacheController` """
|
|
3490
|
+
return self.cache_controller.labelledFileName
|
|
2544
3491
|
@property
|
|
2545
|
-
def
|
|
2546
|
-
|
|
3492
|
+
def uniqueFileName(self) -> Callable:
|
|
3493
|
+
""" Returns ``uniqueFileName()`` of the underlying :class:`cdxcore.subdir.CacheController` """
|
|
3494
|
+
return self.cache_controller.uniqueFileName
|
|
2547
3495
|
@property
|
|
2548
3496
|
def global_exclude_arg_types(self) -> list[type]:
|
|
2549
|
-
|
|
3497
|
+
""" Returns ``exclude_arg_types`` of the underlying :class:`cdxcore.subdir.CacheController` """
|
|
3498
|
+
return self.cache_controller.exclude_arg_types
|
|
2550
3499
|
|
|
2551
3500
|
def __call__(self, F : Callable):
|
|
2552
3501
|
"""
|
|
2553
|
-
Decorate
|
|
2554
|
-
See SubDir.cache
|
|
3502
|
+
Decorate ``F`` as cachable callable.
|
|
3503
|
+
See :dec:`cdxcore.subdir.SubDir.cache` for documentation.
|
|
2555
3504
|
"""
|
|
2556
3505
|
if inspect.isclass(F):
|
|
2557
3506
|
if not self._label is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'label' for __init__, not the class")
|
|
@@ -2566,11 +3515,13 @@ class CacheCallable(object):
|
|
|
2566
3515
|
def _wrap_class(self, C : type):
|
|
2567
3516
|
"""
|
|
2568
3517
|
Wrap class
|
|
3518
|
+
|
|
2569
3519
|
This wrapper:
|
|
2570
|
-
|
|
2571
|
-
|
|
3520
|
+
|
|
3521
|
+
* Assigns a :dec:`cdxcore.version.version` for the class (if not yet present).
|
|
3522
|
+
* Extracts from ``__init__`` the wrapper to decorate`` __new__``.
|
|
2572
3523
|
"""
|
|
2573
|
-
debug_verbose = self.
|
|
3524
|
+
debug_verbose = self.cache_controller.debug_verbose
|
|
2574
3525
|
|
|
2575
3526
|
assert not inspect.isclass(C), ("Not a class", C)
|
|
2576
3527
|
|
|
@@ -2609,8 +3560,7 @@ class CacheCallable(object):
|
|
|
2609
3560
|
"""
|
|
2610
3561
|
Decorate callable 'F'.
|
|
2611
3562
|
"""
|
|
2612
|
-
|
|
2613
|
-
debug_verbose = self.cacheController.debug_verbose
|
|
3563
|
+
debug_verbose = self.cache_controller.debug_verbose
|
|
2614
3564
|
assert not inspect.isclass(F), ("Internal error")
|
|
2615
3565
|
|
|
2616
3566
|
# check validity
|
|
@@ -2712,21 +3662,24 @@ class CacheCallable(object):
|
|
|
2712
3662
|
# determine unique id_ for this function call
|
|
2713
3663
|
# -------------------------------------------
|
|
2714
3664
|
|
|
2715
|
-
label = None
|
|
2716
|
-
uid = None
|
|
2717
3665
|
uid_or_label = self.uid_or_label
|
|
3666
|
+
filename = None
|
|
2718
3667
|
if isinstance(uid_or_label, str) and self.unique:
|
|
2719
|
-
# if 'id' does not contain formatting codes,
|
|
3668
|
+
# if 'id' does not contain formatting codes,
|
|
3669
|
+
# and the result is 'unique' then do not bother collecting
|
|
2720
3670
|
# function arguments
|
|
2721
3671
|
try:
|
|
2722
|
-
|
|
3672
|
+
filename = uid_or_label.format() # throws a KeyError if 'id' contains formatting information
|
|
2723
3673
|
except KeyError:
|
|
2724
3674
|
pass
|
|
2725
3675
|
|
|
2726
|
-
if not
|
|
3676
|
+
if not filename is None:
|
|
2727
3677
|
# generate name with the unique string provided by the user
|
|
2728
|
-
|
|
2729
|
-
|
|
3678
|
+
if not is_filename(filename):
|
|
3679
|
+
raise ValueError(f"The unique filename '{filename}' computed for '{name}' contains invalid characters for filename. When using `uid` make sure that "+\
|
|
3680
|
+
"the returned ID is a valid filename (and unique)")
|
|
3681
|
+
label = filename
|
|
3682
|
+
filename = self.uniqueFileName( filename )
|
|
2730
3683
|
arguments = None
|
|
2731
3684
|
|
|
2732
3685
|
else:
|
|
@@ -2767,9 +3720,9 @@ class CacheCallable(object):
|
|
|
2767
3720
|
if arg in arguments:
|
|
2768
3721
|
del arguments[arg]
|
|
2769
3722
|
|
|
2770
|
-
#
|
|
3723
|
+
# did the user provide a label or unique ID?
|
|
2771
3724
|
if uid_or_label is None:
|
|
2772
|
-
|
|
3725
|
+
uid_or_label = name
|
|
2773
3726
|
|
|
2774
3727
|
else:
|
|
2775
3728
|
if self._name_of_name_arg in arguments:
|
|
@@ -2789,24 +3742,30 @@ class CacheCallable(object):
|
|
|
2789
3742
|
# call format or function
|
|
2790
3743
|
if isinstance( uid_or_label, str ):
|
|
2791
3744
|
try:
|
|
2792
|
-
|
|
3745
|
+
uid_or_label = str.format( uid_or_label, **arguments )
|
|
2793
3746
|
except KeyError as e:
|
|
2794
3747
|
raise KeyError(e, f"Error while generating id for '{name}' using format string '{uid_or_label}': {e}. Available arguments: {list(arguments)}")
|
|
2795
3748
|
|
|
2796
3749
|
else:
|
|
2797
3750
|
which = 'uid' if not self._uid is None else 'label'
|
|
2798
3751
|
try:
|
|
2799
|
-
|
|
3752
|
+
uid_or_label = uid_or_label(**arguments)
|
|
2800
3753
|
except TypeError as e:
|
|
2801
3754
|
raise TypeError(e, f"Error while generating '{which}' for '{name}' using a function: {e}. Available arguments: {list(arguments)}")
|
|
2802
3755
|
except Exception as e:
|
|
2803
3756
|
raise type(e)(f"Error while generating '{which}' for '{name}': attempt to call '{which}' of type {type(uid_or_label)} failed: {e}")
|
|
2804
|
-
assert isinstance(
|
|
3757
|
+
assert isinstance(uid_or_label, str), ("Error:", which, "callable must return a string. Found",type(uid_or_label))
|
|
2805
3758
|
|
|
2806
3759
|
if self.unique:
|
|
2807
|
-
|
|
3760
|
+
if not is_filename(uid_or_label):
|
|
3761
|
+
raise ValueError(f"The unique filename '{uid_or_label}' computed for '{name}' contains invalid characters for filename. When using `uid` make sure that "+\
|
|
3762
|
+
"the returned filename is indeed a valid filename (and unique)")
|
|
3763
|
+
|
|
3764
|
+
label = uid_or_label
|
|
3765
|
+
filename = self.uniqueFileName( uid_or_label )
|
|
2808
3766
|
else:
|
|
2809
|
-
|
|
3767
|
+
label = uid_or_label
|
|
3768
|
+
filename = self.labelledFileName( uid_or_label, **arguments )
|
|
2810
3769
|
|
|
2811
3770
|
# determine version, cache mode
|
|
2812
3771
|
# ------------------
|
|
@@ -2818,11 +3777,11 @@ class CacheCallable(object):
|
|
|
2818
3777
|
# store process information
|
|
2819
3778
|
# -------------------------
|
|
2820
3779
|
|
|
2821
|
-
execute.cache_info.label
|
|
2822
|
-
execute.cache_info.
|
|
2823
|
-
execute.cache_info.version
|
|
3780
|
+
execute.cache_info.label = str(label) if not label is None else None
|
|
3781
|
+
execute.cache_info.filename = filename
|
|
3782
|
+
execute.cache_info.version = version_
|
|
2824
3783
|
|
|
2825
|
-
if self.
|
|
3784
|
+
if self.cache_controller.keep_last_arguments:
|
|
2826
3785
|
info_arguments = OrderedDict()
|
|
2827
3786
|
for argname, argvalue in arguments.items():
|
|
2828
3787
|
info_arguments[argname] = str(argvalue)[:100]
|
|
@@ -2833,26 +3792,26 @@ class CacheCallable(object):
|
|
|
2833
3792
|
# ---------------
|
|
2834
3793
|
|
|
2835
3794
|
if cache_mode.delete:
|
|
2836
|
-
self._subdir.delete(
|
|
3795
|
+
self._subdir.delete( filename )
|
|
2837
3796
|
elif cache_mode.read:
|
|
2838
3797
|
class Tag:
|
|
2839
3798
|
pass
|
|
2840
3799
|
tag = Tag()
|
|
2841
3800
|
if not is_new:
|
|
2842
|
-
r = self._subdir.read(
|
|
3801
|
+
r = self._subdir.read( filename, tag, version=version_ )
|
|
2843
3802
|
else:
|
|
2844
3803
|
try:
|
|
2845
3804
|
execute.__new_during_read = True
|
|
2846
|
-
r = self._subdir.read(
|
|
3805
|
+
r = self._subdir.read( filename, tag, version=version_ )
|
|
2847
3806
|
finally:
|
|
2848
3807
|
execute.__new_during_read = False
|
|
2849
3808
|
|
|
2850
3809
|
if not r is tag:
|
|
2851
3810
|
if not track_cached_files is None:
|
|
2852
|
-
track_cached_files += self._fullFileName(
|
|
3811
|
+
track_cached_files += self._fullFileName(filename)
|
|
2853
3812
|
execute.cache_info.last_cached = True
|
|
2854
3813
|
if not debug_verbose is None:
|
|
2855
|
-
debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.
|
|
3814
|
+
debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.full_file_name(filename)}'.")
|
|
2856
3815
|
if is_new:
|
|
2857
3816
|
assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__", F.__qualname__, label)
|
|
2858
3817
|
r.__magic_cache_call_init__ = False # since we called __new__, __init__ will be called next
|
|
@@ -2871,9 +3830,9 @@ class CacheCallable(object):
|
|
|
2871
3830
|
assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__")
|
|
2872
3831
|
|
|
2873
3832
|
if cache_mode.write:
|
|
2874
|
-
self._subdir.write(
|
|
3833
|
+
self._subdir.write(filename,r,version=version_)
|
|
2875
3834
|
if not track_cached_files is None:
|
|
2876
|
-
track_cached_files += self._subdir.
|
|
3835
|
+
track_cached_files += self._subdir.full_file_name(filename)
|
|
2877
3836
|
execute.cache_info.last_cached = False
|
|
2878
3837
|
|
|
2879
3838
|
if is_new:
|
|
@@ -2883,81 +3842,21 @@ class CacheCallable(object):
|
|
|
2883
3842
|
|
|
2884
3843
|
if not debug_verbose is None:
|
|
2885
3844
|
if cache_mode.write:
|
|
2886
|
-
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.
|
|
3845
|
+
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.full_file_name(filename)}'.")
|
|
2887
3846
|
else:
|
|
2888
|
-
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.
|
|
3847
|
+
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.full_file_name(filename)}'.")
|
|
2889
3848
|
return r
|
|
2890
3849
|
|
|
2891
3850
|
update_wrapper( wrapper=execute, wrapped=F )
|
|
2892
|
-
execute.cache_info = CacheInfo()
|
|
2893
|
-
|
|
2894
|
-
execute.cache_info.name = name # decoded name of the function
|
|
2895
|
-
execute.cache_info.signature = inspect.signature(F) # signature of the function
|
|
2896
|
-
|
|
2897
|
-
execute.cache_info.uid = None # last function call ID
|
|
2898
|
-
execute.cache_info.label = None # last unique file name cached to
|
|
2899
|
-
execute.cache_info.version = None # last version used
|
|
2900
|
-
|
|
2901
|
-
execute.cache_info.last_cached = None # last function call restored from disk?
|
|
2902
|
-
|
|
2903
|
-
if self.cacheController.keep_last_arguments:
|
|
2904
|
-
execute.cache_info.arguments = None # last function call arguments dictionary of strings
|
|
3851
|
+
execute.cache_info = CacheInfo(name, F, self.cache_controller.keep_last_arguments)
|
|
2905
3852
|
|
|
2906
3853
|
if is_new:
|
|
2907
3854
|
execute.__new_during_read = False
|
|
2908
3855
|
|
|
2909
3856
|
if not debug_verbose is None:
|
|
2910
3857
|
debug_verbose.write(f"cache({name}): {'function' if not is_new else 'class constructor function'} registered for caching into '{self._subdir.path}'.")
|
|
2911
|
-
self.
|
|
3858
|
+
self.cache_controller.versioned[name] = execute
|
|
2912
3859
|
return execute
|
|
2913
3860
|
|
|
2914
|
-
def VersionedCacheRoot( directory : str, *,
|
|
2915
|
-
ext : str = None,
|
|
2916
|
-
fmt : Format = None,
|
|
2917
|
-
createDirectory : bool = None,
|
|
2918
|
-
**controller_kwargs
|
|
2919
|
-
):
|
|
2920
|
-
"""
|
|
2921
|
-
Create a root directory for versioning caching on disk
|
|
2922
|
-
|
|
2923
|
-
Usage:
|
|
2924
|
-
In a central file, define a root directory
|
|
2925
|
-
vroot = VersionedCacheRoot("!/cache")
|
|
2926
3861
|
|
|
2927
|
-
and a sub-directory
|
|
2928
|
-
vtest = vroot("test")
|
|
2929
|
-
|
|
2930
|
-
@vtest.cache("1.0")
|
|
2931
|
-
def f1( x=1, y=2 ):
|
|
2932
|
-
print(x,y)
|
|
2933
|
-
|
|
2934
|
-
@vtest.cache("1.0", dps=[f1])
|
|
2935
|
-
def f2( x=1, y=2, z=3 ):
|
|
2936
|
-
f1( x,y )
|
|
2937
|
-
print(z)
|
|
2938
|
-
|
|
2939
|
-
Parameters
|
|
2940
|
-
----------
|
|
2941
|
-
directory : name of the directory. Using SubDir the following short cuts are supported:
|
|
2942
|
-
"!/dir" creates 'dir' in the temporary directory
|
|
2943
|
-
"~/dir" creates 'dir' in the home directory
|
|
2944
|
-
"./dir" created 'dir' relative to the current directory
|
|
2945
|
-
ext : extension, which will automatically be appended to file names (see SubDir). Default depends on format. For Format.PICKLE it is 'pck'
|
|
2946
|
-
fmt : format, see SubDir.Format. Default is Format.PICKLE
|
|
2947
|
-
createDirectory : whether to create the directory upon creation. Default is no.
|
|
2948
|
-
controller_kwargs: parameters passed to VersionController, for example:
|
|
2949
|
-
exclude_arg_types : list of types or names of types to exclude when auto-generating function signatures from function arguments.
|
|
2950
|
-
A standard example from cdxbasics is "Context" as it is used to print progress messages.
|
|
2951
|
-
max_filename_length : maximum filename length
|
|
2952
|
-
hash_length: length used for hashes, see cdxbasics.util.uniqueHash()
|
|
2953
|
-
|
|
2954
|
-
Returns
|
|
2955
|
-
-------
|
|
2956
|
-
A root cache directory
|
|
2957
|
-
"""
|
|
2958
|
-
controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
|
|
2959
|
-
return SubDir( directory=directory, ext=ext, fmt=fmt, createDirectory=createDirectory, controller=controller )
|
|
2960
3862
|
|
|
2961
|
-
version = version_decorator
|
|
2962
|
-
|
|
2963
|
-
|