cdxcore 0.1.5__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cdxcore might be problematic. Click here for more details.
- cdxcore/__init__.py +1 -9
- cdxcore/config.py +1188 -521
- cdxcore/crman.py +95 -25
- cdxcore/err.py +371 -0
- cdxcore/pretty.py +468 -0
- cdxcore/pretty.py_bak.py +750 -0
- cdxcore/subdir.py +2225 -1334
- cdxcore/uniquehash.py +515 -363
- cdxcore/util.py +358 -417
- cdxcore/verbose.py +683 -248
- cdxcore/version.py +398 -139
- cdxcore-0.1.9.dist-info/METADATA +27 -0
- cdxcore-0.1.9.dist-info/RECORD +36 -0
- {cdxcore-0.1.5.dist-info → cdxcore-0.1.9.dist-info}/top_level.txt +3 -1
- docs/source/conf.py +123 -0
- docs2/source/conf.py +35 -0
- tests/test_config.py +502 -0
- tests/test_crman.py +54 -0
- tests/test_err.py +86 -0
- tests/test_pretty.py +404 -0
- tests/test_subdir.py +289 -0
- tests/test_uniquehash.py +159 -144
- tests/test_util.py +122 -83
- tests/test_verbose.py +119 -0
- tests/test_version.py +153 -0
- up/git_message.py +2 -2
- cdxcore/logger.py +0 -319
- cdxcore/prettydict.py +0 -388
- cdxcore/prettyobject.py +0 -64
- cdxcore-0.1.5.dist-info/METADATA +0 -1418
- cdxcore-0.1.5.dist-info/RECORD +0 -30
- conda/conda_exists.py +0 -10
- conda/conda_modify_yaml.py +0 -42
- tests/_cdxbasics.py +0 -1086
- {cdxcore-0.1.5.dist-info → cdxcore-0.1.9.dist-info}/WHEEL +0 -0
- {cdxcore-0.1.5.dist-info → cdxcore-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {cdxcore → tmp}/deferred.py +0 -0
- {cdxcore → tmp}/dynaplot.py +0 -0
- {cdxcore → tmp}/filelock.py +0 -0
- {cdxcore → tmp}/jcpool.py +0 -0
- {cdxcore → tmp}/np.py +0 -0
- {cdxcore → tmp}/npio.py +0 -0
- {cdxcore → tmp}/sharedarray.py +0 -0
cdxcore/subdir.py
CHANGED
|
@@ -1,61 +1,381 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
Overview
|
|
3
|
+
--------
|
|
4
|
+
|
|
5
|
+
This module contains utilities for file i/o, directory management and
|
|
6
|
+
streamlined versioned caching.
|
|
7
|
+
|
|
8
|
+
The key idea is to provide transparent, concise :mod:`pickle` access to the file system
|
|
9
|
+
via the :class:`cdxcore.subdir.SubDir` class.
|
|
10
|
+
|
|
11
|
+
**Key design features:**
|
|
12
|
+
|
|
13
|
+
* Simple path construction via ``()`` operator. By default directories which do not exist yet
|
|
14
|
+
are only created upon writing a first file.
|
|
15
|
+
|
|
16
|
+
* Files managed by :class:`cdxcore.subdir.SubDir` all have the same extension.
|
|
17
|
+
|
|
18
|
+
* Files support fast versioning: the version of a file can be read without having to read the
|
|
19
|
+
entire file.
|
|
20
|
+
|
|
21
|
+
* :dec:`cdxcore.subdir.SubDir.cache` implements a convenient versioned caching framework.
|
|
22
|
+
|
|
23
|
+
Directories
|
|
24
|
+
^^^^^^^^^^^
|
|
25
|
+
|
|
26
|
+
The core of the framework is the :class:`cdxcore.subdir.SubDir` class which represents a directory
|
|
27
|
+
with files of a given extension.
|
|
28
|
+
|
|
29
|
+
Simply write::
|
|
30
|
+
|
|
31
|
+
from cdxcore.subdir import SubDir
|
|
32
|
+
subdir = SubDir("my_directory") # relative to current working directory
|
|
33
|
+
subdir = SubDir("./my_directory") # relative to current working directory
|
|
34
|
+
subdir = SubDir("~/my_directory") # relative to home directory
|
|
35
|
+
subdir = SubDir("!/my_directory") # relative to default temp directory
|
|
36
|
+
|
|
37
|
+
Note that ``my_directoy`` will not be created if it does not exist yet. It will be created the first
|
|
38
|
+
time we write a file.
|
|
39
|
+
|
|
40
|
+
You can specify a parent for relative path names::
|
|
41
|
+
|
|
42
|
+
from cdxcore.subdir import SubDir
|
|
43
|
+
subdir = SubDir("my_directory", "~") # relative to home directory
|
|
44
|
+
subdir = SubDir("my_directory", "!") # relative to default temp directory
|
|
45
|
+
subdir = SubDir("my_directory", ".") # relative to current directory
|
|
46
|
+
subdir2 = SubDir("my_directory", subdir) # subdir2 is relative to `subdir`
|
|
47
|
+
|
|
48
|
+
Change the extension to "bin"::
|
|
49
|
+
|
|
50
|
+
from cdxcore.subdir import SubDir
|
|
51
|
+
subdir = SubDir("~/my_directory;*.bin")
|
|
52
|
+
subdir = SubDir("~/my_directory", ext="bin")
|
|
53
|
+
subdir = SubDir("my_directory", "~", ext="bin")
|
|
54
|
+
|
|
55
|
+
You can turn off extension management by setting the extension to ""::
|
|
56
|
+
|
|
57
|
+
from cdxcore.subdir import SubDir
|
|
58
|
+
subdir = SubDir("~/my_directory", ext="")
|
|
59
|
+
|
|
60
|
+
You can also use :meth:`cdxcore.subdir.SubDir.__call__` to generate sub directories.
|
|
61
|
+
|
|
62
|
+
from cdxcore.subdir import SubDir
|
|
63
|
+
parent = SubDir("~/parent")
|
|
64
|
+
subdir = parent("subdir")
|
|
65
|
+
|
|
66
|
+
Be aware that when the operator :meth:`cdxcore.subdir.SubDir.__call__`
|
|
67
|
+
is called with two keyword arguments, then it reads files.
|
|
68
|
+
|
|
69
|
+
You can obtain a list of all sub directories in a directory by using :meth:`cdxcore.subdir.SubDir.sub_dirs`.
|
|
70
|
+
The list of files with the corresponding extension is accessible via :meth:`cdxcore.subdir.SubDir.files`.
|
|
71
|
+
|
|
72
|
+
File Format
|
|
73
|
+
^^^^^^^^^^^
|
|
74
|
+
|
|
75
|
+
:class:`cdxcore.subdir.SubDir` supports file i/o with a number of different file formats
|
|
76
|
+
via :class:`cdxcore.subdir.Format`.
|
|
77
|
+
|
|
78
|
+
* "PICKLE": standard pickling with default extension is "pck".
|
|
79
|
+
|
|
80
|
+
* "JSON_PICKLE": uses the :mod:`jsonpickle` package; default extension "jpck".
|
|
81
|
+
The advantage of this format over "PICKLE" is that it is somewhat human-readable.
|
|
82
|
+
However, ``jsonpickle`` uses compressed formats for complex objects such as :mod:`numpy`
|
|
83
|
+
arrays, hence readablility is somewhat limited. Using "JSON_PICKLE"
|
|
84
|
+
comes at cost of slower i/o speed.
|
|
85
|
+
|
|
86
|
+
* "JSON_PLAIN": calls :func:`cdxcore.util.plain` is used to generate human readable files
|
|
87
|
+
which cannot be loaded back from disk.
|
|
88
|
+
In this mode ``SubDir`` converts objects into plain Python objects before using :mod:`json`
|
|
89
|
+
to write them to disk.
|
|
90
|
+
That means that deserialized data does not have the correct object structure
|
|
91
|
+
to be able to restore files written in "JSON_PLAIN".
|
|
92
|
+
However, such files are much easier to read.
|
|
93
|
+
|
|
94
|
+
* "BLOSC" uses `blosc <https://github.com/blosc/python-blosc>`__
|
|
95
|
+
to read/write compressed binary data. The blosc compression algorithm is very fast,
|
|
96
|
+
hence using this mode will not usually lead to notably slower performanbce than using
|
|
97
|
+
"PICKLE" but will generate smaller files, depending on your data structure.
|
|
98
|
+
|
|
99
|
+
The default extension for "BLOSC" is "zbsc".
|
|
100
|
+
|
|
101
|
+
* "GZIP": uses :mod:`gzip` to
|
|
102
|
+
to read/write compressed binary data. The default extension is "pgz".
|
|
103
|
+
|
|
104
|
+
Summary of properties:
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
108
|
+
| Format | Restores objects | Human readable | Speed | Compression | Extension |
|
|
109
|
+
+==============+==================+================+=======+=============+===========+
|
|
110
|
+
| PICKLE | yes | no | high | no | .pck |
|
|
111
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
112
|
+
| JSON_PLAIN | no | yes | low | no | .json |
|
|
113
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
114
|
+
| JSON_PICKLE | yes | limited | low | no | .jpck |
|
|
115
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
116
|
+
| BLOSC | yes | no | high | yes | .zbsc |
|
|
117
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
118
|
+
| GZIP | yes | no | high | yes | .pgz |
|
|
119
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
You may specify the file format when instantiating :class:`cdxcore.subdir.SubDir`::
|
|
123
|
+
|
|
124
|
+
from cdxcore.subdir import SubDir
|
|
125
|
+
subdir = SubDir("~/my_directory", fmt=SubDir.PICKLE)
|
|
126
|
+
subdir = SubDir("~/my_directory", fmt=SubDir.JSON_PICKLE)
|
|
127
|
+
...
|
|
128
|
+
|
|
129
|
+
If ``ext`` is not specified the extension will defaulted to
|
|
130
|
+
the respective default extension of the format requested.
|
|
131
|
+
|
|
132
|
+
Reading Files
|
|
133
|
+
^^^^^^^^^^^^^
|
|
134
|
+
|
|
135
|
+
To read the data contained in a ``file`` from our subdirectory
|
|
136
|
+
with its reference extension use :meth:`cdxcore.subdir.SubDir.read`::
|
|
137
|
+
|
|
138
|
+
from cdxcore.subdir import SubDir
|
|
139
|
+
subdir = SubDir("!/test")
|
|
140
|
+
|
|
141
|
+
data = subdir.read("file") # returns the default `None` if file is not found
|
|
142
|
+
data = subdir.read("file", default=[]) # returns the default [] if file is not found
|
|
143
|
+
|
|
144
|
+
This function will return the "default"``" (which in turns defaults to ``None``)
|
|
145
|
+
if ``file.ext`` does not exist.
|
|
146
|
+
You can opt for :meth:`cdxcore.subdir.SubDir.read` to raise an error instead of returning a default
|
|
147
|
+
by using ``raise_on_error=True``::
|
|
148
|
+
|
|
149
|
+
data = subdir.read("file", raise_on_error=True) # raises 'KeyError' if not found
|
|
150
|
+
|
|
151
|
+
When calling ``read()`` you may specify an alternative extension::
|
|
152
|
+
|
|
153
|
+
data = subdir.read("file", ext="bin") # change extension to "bin"
|
|
154
|
+
data = subdir.read("file.bin", ext="") # no automatic extension
|
|
155
|
+
|
|
156
|
+
Specifying a different format for :meth:`cdxcore.subdir.SubDir.read` only changes
|
|
157
|
+
the extension automatically if you have not overwritten it before:
|
|
158
|
+
|
|
159
|
+
.. code-block:: python
|
|
160
|
+
|
|
161
|
+
subdir = SubDir("!/test") # default format PICKLE with extension pck
|
|
162
|
+
data = subdir.read("file", fmt=Subdir.JSON_PICKLE ) # uses "json" extension
|
|
163
|
+
|
|
164
|
+
subdir = SubDir("!/test", ext="bin") # user-specified extension
|
|
165
|
+
data = subdir.read("file", fmt=Subdir.JSON_PICKLE ) # keeps using "bin"
|
|
166
|
+
|
|
167
|
+
You can also use the :meth:`cdxcore.subdir.SubDir.__call__` to read files, in which case you must specify a default value
|
|
168
|
+
(if you don't, then the operator will return a sub directory)::
|
|
169
|
+
|
|
170
|
+
data = subdir("file", None) # returns None if file is not found
|
|
171
|
+
|
|
172
|
+
You can also use item notation to access files.
|
|
173
|
+
In this case, though, an error will be thrown if the file does not exist::
|
|
174
|
+
|
|
175
|
+
data = subdir['file'] # raises KeyError if file is not found
|
|
176
|
+
|
|
177
|
+
You can read a range of files in one function call::
|
|
178
|
+
|
|
179
|
+
data = subdir.read( ["file1", "file2"] ) # returns list
|
|
180
|
+
|
|
181
|
+
Finally, you can also iterate through all existing files using iterators::
|
|
182
|
+
|
|
183
|
+
# manual loading
|
|
184
|
+
for file in subdir:
|
|
185
|
+
data = subdir.read(file)
|
|
186
|
+
...
|
|
187
|
+
|
|
188
|
+
# automatic loading, with "None" as a default
|
|
189
|
+
for file, data in subdir.items():
|
|
190
|
+
...
|
|
191
|
+
|
|
192
|
+
To obtain a list of all files in our directory which have the correct extension, use :meth:`cdxcore.subdir.SubDir.files`.
|
|
193
|
+
|
|
194
|
+
Writing Files
|
|
195
|
+
^^^^^^^^^^^^^
|
|
196
|
+
|
|
197
|
+
Writing files mirrors reading them::
|
|
198
|
+
|
|
199
|
+
from cdxcore.subdir import SubDir
|
|
200
|
+
subdir = SubDir("!/test")
|
|
201
|
+
|
|
202
|
+
subdir.write("file", data)
|
|
203
|
+
subdir['file'] = data
|
|
204
|
+
|
|
205
|
+
You may specifify different a extension::
|
|
206
|
+
|
|
207
|
+
subdir.write("file", data, ext="bin")
|
|
208
|
+
|
|
209
|
+
You can also specify a file :class:`cdxcore.subdir.Format`.
|
|
210
|
+
The extension will be changed automatically if you have not set it manually::
|
|
211
|
+
|
|
212
|
+
subdir = SubDir("!/test")
|
|
213
|
+
subdir.write("file", data, fmt=SubDir.JSON_PICKLE ) # will write to "file.json"
|
|
214
|
+
|
|
215
|
+
To write several files at once, write::
|
|
216
|
+
|
|
217
|
+
subdir.write(["file1", "file"], [data1, data2])
|
|
218
|
+
|
|
219
|
+
Note that when writing to a file, :meth:`cdxcore.subdir.SubDir.write`
|
|
220
|
+
will first write to a temporary file, and then rename this file into the target file name.
|
|
221
|
+
The temporary file name is generated by applying :func:`cdxcore.uniquehash.unique_hash48`
|
|
222
|
+
to the
|
|
223
|
+
target file name,
|
|
224
|
+
current time, process and thread ID, as well as the machines's UUID.
|
|
225
|
+
his is done to reduce collisions between processes/machines accessing the same files,
|
|
226
|
+
potentially accross a network.
|
|
227
|
+
It does not remove collision risk entirely, though.
|
|
228
|
+
|
|
229
|
+
Filenames
|
|
230
|
+
^^^^^^^^^
|
|
231
|
+
|
|
232
|
+
:class:`cdxcore.subdir.SubDir` transparently handles directory access and extensions.
|
|
233
|
+
That means a user usually only uses ``file`` names which do not contain either.
|
|
234
|
+
To obtain the full qualified filename given a "file" use :meth:`cdxcore.subdir.SubDir.full_file_name`.
|
|
235
|
+
|
|
236
|
+
Reading and Writing Versioned Files
|
|
237
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
238
|
+
|
|
239
|
+
:class:`cdxcore.subdir.SubDir` supports versioned files.
|
|
240
|
+
If versions are used, then they *must* be used for both reading and writing.
|
|
241
|
+
:dec:`cdxcore.version.version` provides a standard decorator framework for definining
|
|
242
|
+
versions for classes and functions including the version dependencies.
|
|
243
|
+
|
|
244
|
+
If a ``version`` is provided to :func:`cdxcore.subdir.SubDir.write`
|
|
245
|
+
then ``SubDir`` will write the version in a block ahead of the main content of the file.
|
|
246
|
+
In case of the PICKLE format, this is a byte string. In case of JSON_PLAIN and JSON_PICKLE this is line of
|
|
247
|
+
text starting with ``#`` ahead of the file. (Note that this violates
|
|
248
|
+
the JSON file format.)
|
|
249
|
+
|
|
250
|
+
Writing short version block ahead of the main data allows :func:`cdxcore.subdir.SubDir.read`
|
|
251
|
+
reading this version information back quickly without needing to read the entire file.
|
|
252
|
+
``read()`` does attempt so if its called with a ``version`` parameter.
|
|
253
|
+
In this case it will compare the read version with the provided version,
|
|
254
|
+
and only return the main content of the file if versions match.
|
|
255
|
+
|
|
256
|
+
Use :func:`cdxcore.subdir.SubDir.is_version` to check whether a given file has a specific version.
|
|
257
|
+
Like ``read()`` this function only reads the information required to obtain the information and will
|
|
258
|
+
be much faster than reading the whole file.
|
|
259
|
+
|
|
260
|
+
*Important:* note that if a file was written, it has to be read again with a test version.
|
|
261
|
+
You can specify ``version="*"`` for :func:`cdxcore.subdir.SubDir.read` to match any version.
|
|
262
|
+
|
|
263
|
+
**Examples:**
|
|
264
|
+
|
|
265
|
+
Writing a versioned file::
|
|
266
|
+
|
|
267
|
+
from cdxcore.subdir import SubDir
|
|
268
|
+
sub_dir = SubDir("!/test_version)
|
|
269
|
+
sub_dir.write("test", [1,2,3], version="0.0.1" )
|
|
270
|
+
|
|
271
|
+
To read ``[1,2,3]`` from "test" we need to use the correct version::
|
|
272
|
+
|
|
273
|
+
_ = sub_dir.read("test", version="0.0.1")
|
|
274
|
+
|
|
275
|
+
The following will not read "test" as the versions do not match::
|
|
276
|
+
|
|
277
|
+
_ = sub_dir.read("test", version="0.0.2")
|
|
278
|
+
|
|
279
|
+
By default :func:`cdxcore.subdir.SubDir.read`
|
|
280
|
+
will not fail if a version mismatch is encountered; rather it will
|
|
281
|
+
attempt to delete the file and then return the ``default`` value.
|
|
282
|
+
|
|
283
|
+
This can be turned off
|
|
284
|
+
with the keyword ``delete_wrong_version`` set to ``False``.
|
|
285
|
+
|
|
286
|
+
You can ignore the version used to write a file by using `*` as version::
|
|
287
|
+
|
|
288
|
+
_ = sub_dir.read("test", version="*")
|
|
289
|
+
|
|
290
|
+
Note that reading files which have been written with a ``version`` without
|
|
291
|
+
``version`` keyword will fail because ``SubDir`` will only append additional version information
|
|
292
|
+
to the file if required.
|
|
293
|
+
|
|
294
|
+
Test existence of Files
|
|
295
|
+
^^^^^^^^^^^^^^^^^^^^^^^
|
|
296
|
+
|
|
297
|
+
To test existence of 'file' in a directory, use one of::
|
|
298
|
+
|
|
299
|
+
subdir.exist('file')
|
|
300
|
+
'file' in subdir
|
|
301
|
+
|
|
302
|
+
Deleting files
|
|
303
|
+
^^^^^^^^^^^^^^
|
|
304
|
+
|
|
305
|
+
To delete a 'file', use any of the following::
|
|
306
|
+
|
|
307
|
+
subdir.delete("file")
|
|
308
|
+
del subdir['file']
|
|
309
|
+
|
|
310
|
+
All of these are *silent*, and will not throw errors if "file" does not exist.
|
|
311
|
+
In order to throw an error use::
|
|
312
|
+
|
|
313
|
+
subdir.delete('file', raise_on_error=True)
|
|
314
|
+
|
|
315
|
+
A few member functions assist in deleting a number of files:
|
|
316
|
+
|
|
317
|
+
* :func:`cdxcore.subdir.SubDir.delete_all_files`: delete all files in the directory with matching extension. Do not delete sub directories, or files with extensions different to our own.
|
|
318
|
+
* :func:`cdxcore.subdir.SubDir.delete_all_content`: delete all files with our extension, including in all sub-directories. If a sub-directory is left empty
|
|
319
|
+
upon ``delete_all_content`` delete it, too.
|
|
320
|
+
* :func:`cdxcore.subdir.SubDir.delete_everything`: deletes *everything*, not just files with matching extensions.
|
|
321
|
+
|
|
322
|
+
Caching
|
|
323
|
+
^^^^^^^
|
|
324
|
+
|
|
325
|
+
A :class:`cdxcore.subdir.SubDir` object offers an advanced context for caching calls to :class:`collection.abc.Callable``
|
|
326
|
+
objects with :dec:`cdxcore.subdir.SubDir.cache`.
|
|
327
|
+
|
|
328
|
+
This involves keying the cache by the function name and its current parameters using :class:`cdxcore.uniquehash.UniqueHash`,
|
|
329
|
+
and monitoring the functions version using :dec:`cdxcore.version.version`. The caching behaviour itself can be controlled by
|
|
330
|
+
specifying the desired :class:`cdxcore.subdir.CacheMode`.
|
|
331
|
+
|
|
332
|
+
Import
|
|
333
|
+
------
|
|
334
|
+
.. code-block:: python
|
|
335
|
+
|
|
336
|
+
import cdxcore.uniquehash as uniquehash
|
|
337
|
+
|
|
338
|
+
Documentation
|
|
339
|
+
-------------
|
|
5
340
|
"""
|
|
6
341
|
|
|
7
342
|
|
|
8
|
-
import os
|
|
9
|
-
import
|
|
10
|
-
import
|
|
11
|
-
import
|
|
12
|
-
import
|
|
13
|
-
import
|
|
14
|
-
import
|
|
15
|
-
import
|
|
16
|
-
import
|
|
343
|
+
import os as os
|
|
344
|
+
import uuid as uuid
|
|
345
|
+
import threading as threading
|
|
346
|
+
import pickle as pickle
|
|
347
|
+
import tempfile as tempfile
|
|
348
|
+
import shutil as shutil
|
|
349
|
+
import datetime as datetime
|
|
350
|
+
import inspect as inspect
|
|
351
|
+
import platform as platform
|
|
17
352
|
from collections import OrderedDict
|
|
18
|
-
from collections.abc import Collection, Mapping, Callable
|
|
353
|
+
from collections.abc import Collection, Mapping, Callable, Iterable
|
|
19
354
|
from enum import Enum
|
|
20
|
-
import json as json
|
|
21
|
-
import platform as platform
|
|
22
355
|
from functools import update_wrapper
|
|
23
|
-
import warnings as warnings
|
|
24
356
|
|
|
25
|
-
import
|
|
357
|
+
import json as json
|
|
26
358
|
import jsonpickle as jsonpickle
|
|
27
359
|
import jsonpickle.ext.numpy as jsonpickle_numpy
|
|
28
|
-
import zlib as zlib
|
|
29
360
|
import gzip as gzip
|
|
30
361
|
import blosc as blosc
|
|
31
362
|
|
|
32
|
-
from .
|
|
363
|
+
from .err import verify, error, warn, fmt as txtfmt
|
|
364
|
+
from .pretty import PrettyObject
|
|
33
365
|
from .verbose import Context
|
|
34
|
-
from .version import Version, version as version_decorator
|
|
35
|
-
from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP,
|
|
36
|
-
from .uniquehash import
|
|
37
|
-
|
|
38
|
-
def error( text, *args, exception = RuntimeError, **kwargs ):
|
|
39
|
-
raise exception( txtfmt(text, *args, **kwargs) )
|
|
40
|
-
def verify( cond, text, *args, exception = RuntimeError, **kwargs ):
|
|
41
|
-
if not cond:
|
|
42
|
-
error( text, *args, **kwargs, exception=exception )
|
|
43
|
-
def warn( text, *args, warning=warnings.RuntimeWarning, stack_level=1, **kwargs ):
|
|
44
|
-
warnings.warn( txtfmt(text, *args, **kwargs), warning, stack_level=stack_level )
|
|
366
|
+
from .version import Version, version as version_decorator, VersionError
|
|
367
|
+
from .util import fmt_list, fmt_filename, DEF_FILE_NAME_MAP, plain, is_filename
|
|
368
|
+
from .uniquehash import unique_hash48, UniqueLabel, NamedUniqueHash
|
|
369
|
+
|
|
45
370
|
|
|
46
371
|
"""
|
|
372
|
+
:meta private:
|
|
47
373
|
compression
|
|
48
374
|
"""
|
|
49
375
|
jsonpickle_numpy.register_handlers()
|
|
50
376
|
BLOSC_MAX_BLOCK = 2147483631
|
|
51
377
|
BLOSC_MAX_USE = 1147400000 # ... blosc really cannot handle large files
|
|
52
|
-
|
|
53
|
-
"""
|
|
54
|
-
Hashing
|
|
55
|
-
"""
|
|
56
|
-
uniqueFileName48 = uniqueHash48
|
|
57
|
-
uniqueNamedFileName48_16 = namedUniqueHashExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
|
|
58
|
-
uniqueLabelledFileName48_16 = uniqueLabelExt(max_length=48,id_length=16,filename_by=DEF_FILE_NAME_MAP)
|
|
378
|
+
#
|
|
59
379
|
|
|
60
380
|
def _remove_trailing( path ):
|
|
61
381
|
if len(path) > 0:
|
|
@@ -63,13 +383,34 @@ def _remove_trailing( path ):
|
|
|
63
383
|
return _remove_trailing(path[:-1])
|
|
64
384
|
return path
|
|
65
385
|
|
|
386
|
+
|
|
387
|
+
# ========================================================================
|
|
388
|
+
# Basics
|
|
389
|
+
# ========================================================================
|
|
390
|
+
|
|
66
391
|
class Format(Enum):
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
392
|
+
"""
|
|
393
|
+
File formats for :class:`cdxcore.subdir.SubDir`.
|
|
394
|
+
|
|
395
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
396
|
+
| Format | Restores objects | Human readable | Speed | Compression | Extension |
|
|
397
|
+
+==============+==================+================+=======+=============+===========+
|
|
398
|
+
| PICKLE | yes | no | high | no | .pck |
|
|
399
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
400
|
+
| JSON_PLAIN | no | yes | low | no | .json |
|
|
401
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
402
|
+
| JSON_PICKLE | yes | limited | low | no | .jpck |
|
|
403
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
404
|
+
| BLOSC | yes | no | high | yes | .zbsc |
|
|
405
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
406
|
+
| GZIP | yes | no | high | yes | .pgz |
|
|
407
|
+
+--------------+------------------+----------------+-------+-------------+-----------+
|
|
408
|
+
"""
|
|
409
|
+
PICKLE = 0 #: Standard binary :mod:`pickle` format.
|
|
410
|
+
JSON_PICKLE = 1 #: :mod:`jsonpickle` format.
|
|
411
|
+
JSON_PLAIN = 2 #: ``json`` format.
|
|
412
|
+
BLOSC = 3 #: :mod:`blosc` binary compressed format.
|
|
413
|
+
GZIP = 4 #: :mod:`gzip` binary compressed format.
|
|
73
414
|
|
|
74
415
|
PICKLE = Format.PICKLE
|
|
75
416
|
JSON_PICKLE = Format.JSON_PICKLE
|
|
@@ -77,31 +418,79 @@ JSON_PLAIN = Format.JSON_PLAIN
|
|
|
77
418
|
BLOSC = Format.BLOSC
|
|
78
419
|
GZIP = Format.GZIP
|
|
79
420
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
421
|
+
class VersionPresentError(RuntimeError):
|
|
422
|
+
"""
|
|
423
|
+
Exception raised in case a file was read which had a version, but no test version
|
|
424
|
+
was provided.
|
|
425
|
+
"""
|
|
426
|
+
pass
|
|
427
|
+
|
|
428
|
+
# ========================================================================
|
|
429
|
+
# Caching utilities
|
|
430
|
+
# ========================================================================
|
|
89
431
|
|
|
90
432
|
class CacheMode(object):
|
|
91
433
|
"""
|
|
92
|
-
|
|
93
|
-
A class which encodes standard behaviour of a caching strategy:
|
|
434
|
+
A class which encodes standard behaviour of a caching strategy.
|
|
94
435
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
436
|
+
**Summary mechanics:**
|
|
437
|
+
|
|
438
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
439
|
+
| Action | on | gen | off | update | clear | readonly |
|
|
440
|
+
+=========================================+=======+=======+=======+=========+========+==========+
|
|
441
|
+
| load cache from disk if exists | x | x | | | | x |
|
|
442
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
443
|
+
| write updates to disk | x | x | | x | | |
|
|
444
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
445
|
+
| delete existing object | | | | | x | |
|
|
446
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
447
|
+
| delete existing object if incompatible | x | | | x | x | |
|
|
448
|
+
+-----------------------------------------+-------+-------+-------+---------+--------+----------+
|
|
100
449
|
|
|
101
|
-
|
|
450
|
+
|
|
451
|
+
**Standard Caching Semantics**
|
|
452
|
+
|
|
453
|
+
Assuming we wish to cache results from calling a function ``f`` in a file named ``filename``
|
|
454
|
+
in a directory ``directory``, then this is the ``CacheMode`` waterfall:
|
|
455
|
+
|
|
456
|
+
.. code-block:: python
|
|
457
|
+
|
|
458
|
+
def cache_f( filename : str, directory : SubDir, version : str, cache_mode : CacheMode ):
|
|
459
|
+
if cache_mode.delete:
|
|
460
|
+
directory.delete(filename)
|
|
461
|
+
if cache_mode.read:
|
|
462
|
+
r = directory.read(filename,
|
|
463
|
+
default=None,
|
|
464
|
+
version=version,
|
|
465
|
+
raise_on_error=False,
|
|
466
|
+
delete_wrong_version=cache_mode.del_incomp
|
|
467
|
+
)
|
|
468
|
+
if not r is None:
|
|
469
|
+
return r
|
|
470
|
+
|
|
471
|
+
r = f(...) # compute result
|
|
472
|
+
|
|
473
|
+
if cache_mode.write:
|
|
474
|
+
directory.write(filename,
|
|
475
|
+
r,
|
|
476
|
+
version=version,
|
|
477
|
+
raise_on_error=False
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
return r
|
|
481
|
+
|
|
482
|
+
See :func:`cdxcore.subdir.SubDir.cache` for a comprehensive
|
|
483
|
+
implementation.
|
|
484
|
+
|
|
485
|
+
Parameters
|
|
486
|
+
----------
|
|
487
|
+
mode : str, optional
|
|
488
|
+
Which mode to use: ``"on"``, ``"gen"``, ``"off"``, ``"update"``, ``"clear"`` or ``"readonly"``.
|
|
489
|
+
|
|
490
|
+
The default is ``None`` in which case ``"on"`` is used.
|
|
102
491
|
"""
|
|
103
492
|
|
|
104
|
-
ON = "on"
|
|
493
|
+
ON = "on"
|
|
105
494
|
GEN = "gen"
|
|
106
495
|
OFF = "off"
|
|
107
496
|
UPDATE = "update"
|
|
@@ -109,22 +498,31 @@ class CacheMode(object):
|
|
|
109
498
|
READONLY = "readonly"
|
|
110
499
|
|
|
111
500
|
MODES = [ ON, GEN, OFF, UPDATE, CLEAR, READONLY ]
|
|
501
|
+
"""
|
|
502
|
+
List of available modes in text form.
|
|
503
|
+
This list can be used as ``cast`` parameter when calling :func:`cdxcore.config.Config.__call__`::
|
|
504
|
+
|
|
505
|
+
from cdxcore.config import Config
|
|
506
|
+
from cdxcore.subdir import CacheMode
|
|
507
|
+
|
|
508
|
+
def get_cache_mode( config : Config ) -> CacheMode:
|
|
509
|
+
return CacheMode( config("cache_mode", "on", CacheMode.MODES, CacheMode.HELP) )
|
|
510
|
+
"""
|
|
511
|
+
|
|
112
512
|
HELP = "'on' for standard caching; 'gen' for caching but keep existing incompatible files; 'off' to turn off; 'update' to overwrite any existing cache; 'clear' to clear existing caches; 'readonly' to read existing caches but not write new ones"
|
|
113
|
-
|
|
513
|
+
"""
|
|
514
|
+
Standard ``config`` help text, to be used with :func:`cdxcore.config.Config.__call__` as follows::
|
|
515
|
+
|
|
516
|
+
from cdxcore.config import Config
|
|
517
|
+
from cdxcore.subdir import CacheMode
|
|
518
|
+
|
|
519
|
+
def get_cache_mode( config : Config ) -> CacheMode:
|
|
520
|
+
return CacheMode( config("cache_mode", "on", CacheMode.MODES, CacheMode.HELP) )
|
|
521
|
+
"""
|
|
522
|
+
|
|
114
523
|
def __init__(self, mode : str = None ):
|
|
115
524
|
"""
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
on gen off update clear readonly
|
|
119
|
-
load upon start from disk if exists x x - - - x
|
|
120
|
-
write updates to disk x x - x - -
|
|
121
|
-
delete existing object upon start - - - - x -
|
|
122
|
-
delete existing object if incompatible x - - x x -
|
|
123
|
-
|
|
124
|
-
Parameters
|
|
125
|
-
----------
|
|
126
|
-
mode : str
|
|
127
|
-
Which mode to use.
|
|
525
|
+
:meta private:
|
|
128
526
|
"""
|
|
129
527
|
if isinstance( mode, CacheMode ):
|
|
130
528
|
return# id copy constuctor
|
|
@@ -145,22 +543,22 @@ class CacheMode(object):
|
|
|
145
543
|
|
|
146
544
|
@property
|
|
147
545
|
def read(self) -> bool:
|
|
148
|
-
""" Whether to load any existing data
|
|
546
|
+
""" Whether to load any existing cached data. """
|
|
149
547
|
return self._read
|
|
150
548
|
|
|
151
549
|
@property
|
|
152
550
|
def write(self) -> bool:
|
|
153
|
-
""" Whether to
|
|
551
|
+
""" Whether to cache newly computed data to disk. """
|
|
154
552
|
return self._write
|
|
155
553
|
|
|
156
554
|
@property
|
|
157
555
|
def delete(self) -> bool:
|
|
158
|
-
""" Whether to delete existing data """
|
|
556
|
+
""" Whether to delete existing data. """
|
|
159
557
|
return self._delete
|
|
160
558
|
|
|
161
559
|
@property
|
|
162
560
|
def del_incomp(self) -> bool:
|
|
163
|
-
""" Whether to delete existing data if it is not compatible """
|
|
561
|
+
""" Whether to delete existing data if it is not compatible or has the wrong version. """
|
|
164
562
|
return self._del_in
|
|
165
563
|
|
|
166
564
|
def __str__(self) -> str:# NOQA
|
|
@@ -175,311 +573,327 @@ class CacheMode(object):
|
|
|
175
573
|
|
|
176
574
|
@property
|
|
177
575
|
def is_off(self) -> bool:
|
|
178
|
-
""" Whether this cache mode is OFF """
|
|
576
|
+
""" Whether this cache mode is OFF. """
|
|
179
577
|
return self.mode == self.OFF
|
|
180
578
|
|
|
181
579
|
@property
|
|
182
580
|
def is_on(self) -> bool:
|
|
183
|
-
""" Whether this cache mode is ON """
|
|
581
|
+
""" Whether this cache mode is ON. """
|
|
184
582
|
return self.mode == self.ON
|
|
185
583
|
|
|
186
584
|
@property
|
|
187
585
|
def is_gen(self) -> bool:
|
|
188
|
-
""" Whether this cache mode is GEN """
|
|
586
|
+
""" Whether this cache mode is GEN. """
|
|
189
587
|
return self.mode == self.GEN
|
|
190
588
|
|
|
191
589
|
@property
|
|
192
590
|
def is_update(self) -> bool:
|
|
193
|
-
""" Whether this cache mode is UPDATE """
|
|
591
|
+
""" Whether this cache mode is UPDATE. """
|
|
194
592
|
return self.mode == self.UPDATE
|
|
195
593
|
|
|
196
594
|
@property
|
|
197
595
|
def is_clear(self) -> bool:
|
|
198
|
-
""" Whether this cache mode is CLEAR """
|
|
596
|
+
""" Whether this cache mode is CLEAR. """
|
|
199
597
|
return self.mode == self.CLEAR
|
|
200
598
|
|
|
201
599
|
@property
|
|
202
600
|
def is_readonly(self) -> bool:
|
|
203
|
-
""" Whether this cache mode is READONLY """
|
|
601
|
+
""" Whether this cache mode is READONLY. """
|
|
204
602
|
return self.mode == self.READONLY
|
|
205
603
|
|
|
206
604
|
class CacheController( object ):
|
|
207
|
-
"""
|
|
208
|
-
Central control for
|
|
209
|
-
|
|
605
|
+
r"""
|
|
606
|
+
Central control parameters for caching.
|
|
607
|
+
|
|
608
|
+
When a parameter object of this type
|
|
609
|
+
is assigned to a :class:`cdxcore.subdir.SubDir`,
|
|
610
|
+
then it is passed on when sub-directories are
|
|
611
|
+
created. This way all ``SubDir`` have the same
|
|
612
|
+
caching behaviour.
|
|
613
|
+
|
|
614
|
+
See :class:`cdxcore.subdir.CacheController` for
|
|
615
|
+
a list of control parameters.
|
|
616
|
+
|
|
617
|
+
Parameters
|
|
618
|
+
----------
|
|
619
|
+
exclude_arg_types : list[type], optional
|
|
620
|
+
List of types to exclude from producing unique ids from function arguments.
|
|
621
|
+
|
|
622
|
+
Defaults to ``[Context]``.
|
|
623
|
+
|
|
624
|
+
cache_mode : CacheMode, optional
|
|
625
|
+
Top level cache control.
|
|
626
|
+
Set to "OFF" to turn off all caching.
|
|
627
|
+
Default is "ON".
|
|
628
|
+
|
|
629
|
+
max_filename_length : int, optional
|
|
630
|
+
Maximum filename length. If unique id's exceed the file name a hash of length
|
|
631
|
+
``hash_length`` will be intergated into the file name.
|
|
632
|
+
See :class:`cdxcore.uniquehash.NamedUniqueHash`.
|
|
633
|
+
Default is ``48``.
|
|
634
|
+
|
|
635
|
+
hash_length : int, optional
|
|
636
|
+
Length of the hash used to make sure each filename is unique
|
|
637
|
+
See :class:`cdxcore.uniquehash.NamedUniqueHash`.
|
|
638
|
+
Default is ``8``.
|
|
639
|
+
|
|
640
|
+
debug_verbose : :class:`cdxcore.verbose.Context`, optional
|
|
641
|
+
If not ``None`` print caching process messages to this object.
|
|
642
|
+
|
|
643
|
+
Default is ``None``.
|
|
644
|
+
|
|
645
|
+
keep_last_arguments : bool, optional
|
|
646
|
+
Keep a dictionary of all parameters as string representations after each function call.
|
|
647
|
+
If the function ``F`` was decorated using :meth:``cdxcore.subdir.SubDir.cache``,
|
|
648
|
+
you can access this information via ``F.cache_info.last_arguments``.
|
|
649
|
+
|
|
650
|
+
Note that strings are limited to 100 characters per argument to avoid memory
|
|
651
|
+
overload when large objects are passed.
|
|
652
|
+
|
|
653
|
+
Default is ``False``.
|
|
210
654
|
"""
|
|
211
655
|
|
|
212
656
|
def __init__(self, *,
|
|
213
657
|
exclude_arg_types : list[type] = [Context],
|
|
214
658
|
cache_mode : CacheMode = CacheMode.ON,
|
|
215
659
|
max_filename_length: int = 48,
|
|
216
|
-
hash_length : int =
|
|
660
|
+
hash_length : int = 8,
|
|
217
661
|
debug_verbose : Context = None,
|
|
218
662
|
keep_last_arguments: bool = False
|
|
219
663
|
):
|
|
220
664
|
"""
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
exclude_arg_types :
|
|
226
|
-
List of types to exclude from producing unique ids from function arguments. Defaults to [SubDir, Context]
|
|
227
|
-
cache_mode :
|
|
228
|
-
Top level cache control. Set to "OFF" to turn off all caching. Default is "ON"
|
|
229
|
-
max_filename_length :
|
|
230
|
-
Maximum filename length. If unique id's exceed the file name a hash of length 'hash_length' will be intergated into the file name.
|
|
231
|
-
See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
|
|
232
|
-
hash_length :
|
|
233
|
-
Length of the hash used to make sure each filename is unique
|
|
234
|
-
See cdxbasics.util.namedUniqueHashExt and cdxbasics.util.uniqueLabelExt
|
|
235
|
-
debug_verbose :
|
|
236
|
-
If non-None print caching process messages to this object.
|
|
237
|
-
keep_last_arguments :
|
|
238
|
-
keep a dictionary of all parameters as string representations after each function call.
|
|
239
|
-
If the function F was decorated using SubDir.cache(), you can access this information via
|
|
240
|
-
F.cache_info.last_arguments
|
|
241
|
-
Note that strings are limited to 100 characters per argument to avoid memory
|
|
242
|
-
overload when large objects are passed.
|
|
243
|
-
"""
|
|
244
|
-
max_filename_length = int(max_filename_length)
|
|
245
|
-
hash_length = int(hash_length)
|
|
665
|
+
:meta private:
|
|
666
|
+
"""
|
|
667
|
+
max_filename_length = int(max_filename_length)
|
|
668
|
+
hash_length = int(hash_length)
|
|
246
669
|
assert max_filename_length>0, ("'max_filename_length' must be positive")
|
|
247
670
|
assert hash_length>0 and hash_length<=max_filename_length, ("'hash_length' must be positive and at most 'max_filename_length'")
|
|
248
671
|
assert max_filename_length>=hash_length, ("'hash_length' must not exceed 'max_filename_length")
|
|
249
672
|
self.cache_mode = CacheMode(cache_mode if not cache_mode is None else CacheMode.ON)
|
|
250
|
-
self.debug_verbose = debug_verbose
|
|
673
|
+
self.debug_verbose = Context(debug_verbose) if isinstance(debug_verbose, (int,str)) else debug_verbose
|
|
251
674
|
self.exclude_arg_types = set(exclude_arg_types) if not exclude_arg_types is None else None
|
|
252
|
-
self.versioned =
|
|
253
|
-
self.
|
|
254
|
-
self.
|
|
675
|
+
self.versioned = PrettyObject() # list
|
|
676
|
+
self.labelledFileName = NamedUniqueHash(max_length=max_filename_length,id_length=hash_length,filename_by=DEF_FILE_NAME_MAP)
|
|
677
|
+
self.uniqueFileName = UniqueLabel(max_length=max_filename_length,id_length=hash_length,filename_by=None)
|
|
255
678
|
self.keep_last_arguments = keep_last_arguments
|
|
256
679
|
|
|
257
680
|
default_cacheController = CacheController()
|
|
681
|
+
#
|
|
258
682
|
|
|
683
|
+
# ========================================================================
|
|
684
|
+
# SubDir
|
|
685
|
+
# ========================================================================
|
|
259
686
|
|
|
260
|
-
class
|
|
261
|
-
"""
|
|
262
|
-
|
|
687
|
+
class SubDir(object):
|
|
688
|
+
r"""
|
|
689
|
+
``SubDir`` implements a transparent i/o
|
|
690
|
+
interface for storing data in files.
|
|
263
691
|
|
|
264
|
-
|
|
265
|
-
def __init__(self):
|
|
266
|
-
""" track cache files """
|
|
267
|
-
self._files = []
|
|
268
|
-
def __iadd__(self, new_file):
|
|
269
|
-
""" Add a new file to the tracker """
|
|
270
|
-
self._files.append( new_file )
|
|
271
|
-
def delete_cache_files(self):
|
|
272
|
-
""" Delete all tracked files """
|
|
273
|
-
for file in self._files:
|
|
274
|
-
if os.path.exists(file):
|
|
275
|
-
os.remove(file)
|
|
276
|
-
self._files = []
|
|
277
|
-
def __str__(self) -> str:#NOQA
|
|
278
|
-
return f"Tracked: {self._files}"
|
|
279
|
-
def __repr__(self) -> str:#NOQA
|
|
280
|
-
return f"Tracked: {self._files}"
|
|
692
|
+
**Directories**
|
|
281
693
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
class CacheInfo(object):
|
|
286
|
-
pass
|
|
694
|
+
Instantiate a ``SubDir`` with a directory name. There are some
|
|
695
|
+
pre-defined relative system paths the name can refer to::
|
|
287
696
|
|
|
288
|
-
|
|
289
|
-
#
|
|
697
|
+
from cdxcore.subdir import SubDir
|
|
698
|
+
parent = SubDir("!/subdir") # relative to system temp directory
|
|
699
|
+
parent = SubDir("~/subdir") # relative to user home directory
|
|
700
|
+
parent = SubDir("./subdir") # relative to current working directory (explicit)
|
|
701
|
+
parent = SubDir("subdir") # relative to current working directory (implicit)
|
|
702
|
+
parent = SubDir("/tmp/subdir") # absolute path (linux)
|
|
703
|
+
parent = SubDir("C:/temp/subdir") # absolute path (windows)
|
|
704
|
+
parent = SubDir("") # current working directory
|
|
705
|
+
|
|
706
|
+
Sub-directories can be generated in a number of ways::
|
|
290
707
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
708
|
+
subDir = parent('subdir') # using __call__
|
|
709
|
+
subDir = SubDir('subdir', parent) # explicit constructor
|
|
710
|
+
subDir = SubDir('subdir', parent="!/") # explicit constructor with parent being a string
|
|
711
|
+
|
|
712
|
+
Files managed by ``SubDir`` will usually have the same extension.
|
|
713
|
+
This extension can be specified with ``ext``, or as part of the directory string::
|
|
714
|
+
|
|
715
|
+
subDir = SubDir("~/subdir", ext="bin") # set extension to 'bin'
|
|
716
|
+
subDir = SubDir("~/subdir;*.bin") # set extension to 'bin'
|
|
717
|
+
|
|
718
|
+
Leaving the extension as default ``None`` allows ``SubDir`` to automatically use
|
|
719
|
+
the extension associated with any specified format.
|
|
295
720
|
|
|
296
|
-
|
|
297
|
-
Absolute: parentDir = SubDir("C:/temp/root")
|
|
298
|
-
In system temp directory: parentDir = SubDir("!/root")
|
|
299
|
-
In user directory: parentDir = SubDir("~/root")
|
|
300
|
-
Relative to current directory: parentDir = SubDir("./root")
|
|
721
|
+
**Copy Constructor**
|
|
301
722
|
|
|
302
|
-
|
|
303
|
-
assume f() will want to store some data:
|
|
723
|
+
The constructor is shallow.
|
|
304
724
|
|
|
305
|
-
|
|
725
|
+
**File I/O**
|
|
306
726
|
|
|
307
|
-
|
|
308
|
-
or
|
|
309
|
-
subDir = SubDir('subdir', parentDir)
|
|
310
|
-
:
|
|
311
|
-
:
|
|
312
|
-
Write data:
|
|
727
|
+
Write data with :meth:`cdxcore.subdir.SubDir.write`::
|
|
313
728
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
subDir.write('item3',item3) <-- explicit
|
|
729
|
+
subDir.write('item3',item3) # explicit
|
|
730
|
+
subDir['item1'] = item1 # dictionary style
|
|
317
731
|
|
|
318
|
-
|
|
732
|
+
Note that :meth:`cdxcore.subdir.SubDir.write` can write to multiple files at the same time.
|
|
319
733
|
|
|
320
|
-
|
|
734
|
+
Read data with :meth:`cdxcore.subdir.SubDir.read`::
|
|
321
735
|
|
|
322
|
-
|
|
736
|
+
item = subDir('item', 'i1') # returns 'i1' if not found.
|
|
737
|
+
item = subdir.read('item') # returns None if not found
|
|
738
|
+
item = subdir.read('item','i2') # returns 'i2' if not found
|
|
739
|
+
item = subDir['item'] # raises a KeyError if not found
|
|
323
740
|
|
|
324
|
-
|
|
741
|
+
Treat files in a directory like dictionaries::
|
|
325
742
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
item = subDir['item'] <-- throws a KeyError if not found
|
|
330
|
-
item = subDir.item <-- throws an AttributeError if not found
|
|
743
|
+
for file in subDir:
|
|
744
|
+
data = subDir[file]
|
|
745
|
+
f(item, data)
|
|
331
746
|
|
|
332
|
-
|
|
747
|
+
for file, data in subDir.items():
|
|
748
|
+
f(item, data)
|
|
333
749
|
|
|
334
|
-
|
|
750
|
+
Delete items::
|
|
335
751
|
|
|
336
|
-
|
|
752
|
+
del subDir['item'] # silently fails if 'item' does not exist
|
|
753
|
+
subDir.delete('item') # silently fails if 'item' does not exist
|
|
754
|
+
subDir.delete('item', True) # raises a KeyError if 'item' does not exit
|
|
337
755
|
|
|
338
|
-
|
|
339
|
-
data = subDir[item]
|
|
756
|
+
Cleaning up::
|
|
340
757
|
|
|
341
|
-
|
|
758
|
+
parent.delete_all_content() # silently deletes all files with matching extensions, and sub directories.
|
|
342
759
|
|
|
343
|
-
|
|
344
|
-
del subDir.item <-- silently fails if 'item' does not exist
|
|
345
|
-
subDir.delete('item') <-- silently fails if 'item' does not exist
|
|
346
|
-
subDir.delete('item', True) <-- throw a KeyError if 'item' does not exit
|
|
760
|
+
**File Format**
|
|
347
761
|
|
|
348
|
-
|
|
762
|
+
``SubDir`` supports a number of file formats via :class:`cdxcore.subdir.Format`.
|
|
763
|
+
Those can be controlled with the ``fmt`` keyword in various functions not least
|
|
764
|
+
:class:`cdxcore.subdir.SubDir`::
|
|
349
765
|
|
|
350
|
-
|
|
766
|
+
subdir = SubDir("!/.test", fmt=SubDir.JSON_PICKLE)
|
|
351
767
|
|
|
352
|
-
|
|
353
|
-
The most straightfoward way is to specify the format of the directory itself:
|
|
768
|
+
See :class:`cdxcore.subdir.Format` for supported formats.
|
|
354
769
|
|
|
355
|
-
|
|
770
|
+
Parameters
|
|
771
|
+
----------
|
|
772
|
+
name : str:
|
|
773
|
+
Name of the directory.
|
|
774
|
+
|
|
775
|
+
The name may start with any of the following special characters:
|
|
776
|
+
|
|
777
|
+
* ``'.'`` for current directory
|
|
778
|
+
* ``'~'`` for home directory
|
|
779
|
+
* ``'!'`` for system default temp directory
|
|
780
|
+
|
|
781
|
+
The directory name may also contain a formatting string for defining ``ext`` on the fly:
|
|
782
|
+
for example use ``"!/test;*.bin"`` to specify a directory ``"test"`` in the user's
|
|
783
|
+
temp directory with extension ``"bin"``.
|
|
784
|
+
|
|
785
|
+
The directory name can be set to ``None`` in which case it is always empty
|
|
786
|
+
and attempts to write to it fail with :class:`EOFError`.
|
|
787
|
+
|
|
788
|
+
parent : str | SubDir, optional
|
|
789
|
+
Parent directory.
|
|
790
|
+
|
|
791
|
+
If ``parent`` is a :class:`cdxcore.subdir.SubDir` then its parameters are used
|
|
792
|
+
as default values here.
|
|
356
793
|
|
|
357
|
-
|
|
794
|
+
Default is ``None``.
|
|
795
|
+
|
|
796
|
+
ext : str, optional
|
|
797
|
+
Extension for files managed by this ``SubDir``. All files will share the same extension.
|
|
358
798
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
using, essentially, GZIP.
|
|
799
|
+
If set to ``""`` no extension is assigned to this directory. That means, for example, that
|
|
800
|
+
:meth:`cdxcore.subdir.SubDir.files` returns all files contained in the directory, not
|
|
801
|
+
just files with a specific extension.
|
|
802
|
+
|
|
803
|
+
If ``None``, use an extension depending on ``fmt``:
|
|
804
|
+
|
|
805
|
+
* 'pck' for the default PICKLE format.
|
|
806
|
+
* 'json' for JSON_PLAIN.
|
|
807
|
+
* 'jpck' for JSON_PICKLE.
|
|
808
|
+
* 'zbsc' for BLOSC.
|
|
809
|
+
* 'pgz' for GZIP.
|
|
810
|
+
|
|
811
|
+
Default is ``None``.
|
|
812
|
+
|
|
813
|
+
fmt : :class:`cdxcore.subdir.Format`, optional
|
|
375
814
|
|
|
376
|
-
|
|
815
|
+
One of the :class:`cdxcore.subdir.Format` codes.
|
|
816
|
+
If ``ext`` is left to ``None`` then setting the a format will also set the corrsponding ``ext``.
|
|
817
|
+
|
|
818
|
+
Default is ``Format.PICKLE``.
|
|
377
819
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
820
|
+
create_directory : bool | None, optional
|
|
821
|
+
|
|
822
|
+
Whether to create the directory upon creation of the ``SubDir`` object; otherwise it will be created upon first
|
|
823
|
+
:meth:`cdxcore.subdir.SubDir.write`.
|
|
824
|
+
|
|
825
|
+
Set to ``None`` to use the setting of the parent directory, or ``False`` if no parent
|
|
826
|
+
is specified.
|
|
827
|
+
|
|
828
|
+
Default is ``False``.
|
|
384
829
|
|
|
385
|
-
|
|
830
|
+
delete_everything : bool, optional
|
|
831
|
+
|
|
832
|
+
Delete all contents in the newly defined sub directory upon creation.
|
|
386
833
|
|
|
387
|
-
|
|
834
|
+
Default is ``False``.
|
|
835
|
+
|
|
836
|
+
cache_controller : :class:`cdxcore.subdir.CacheController`, optional
|
|
837
|
+
|
|
838
|
+
An object which fine-tunes the behaviour of :meth:`cdxcore.subdir.SubDir.cache`.
|
|
839
|
+
See that function's documentation for further details. Default is ``None``.
|
|
388
840
|
"""
|
|
389
841
|
|
|
390
842
|
class __RETURN_SUB_DIRECTORY(object):
|
|
391
843
|
pass
|
|
844
|
+
""" :meta private: """
|
|
392
845
|
|
|
393
|
-
Format = Format
|
|
394
|
-
|
|
846
|
+
Format = Format # :meta private
|
|
847
|
+
""" :meta private: """
|
|
848
|
+
|
|
849
|
+
PICKLE = Format.PICKLE
|
|
850
|
+
""" :meta private: """
|
|
851
|
+
|
|
395
852
|
JSON_PICKLE = Format.JSON_PICKLE
|
|
853
|
+
""" :meta private: """
|
|
854
|
+
|
|
396
855
|
JSON_PLAIN = Format.JSON_PLAIN
|
|
856
|
+
""" :meta private: """
|
|
857
|
+
|
|
397
858
|
BLOSC = Format.BLOSC
|
|
859
|
+
""" :meta private: """
|
|
860
|
+
|
|
398
861
|
GZIP = Format.GZIP
|
|
399
|
-
|
|
400
|
-
|
|
862
|
+
""" :meta private: """
|
|
863
|
+
|
|
401
864
|
RETURN_SUB_DIRECTORY = __RETURN_SUB_DIRECTORY
|
|
865
|
+
""" :meta private: """
|
|
866
|
+
|
|
402
867
|
DEFAULT_FORMAT = Format.PICKLE
|
|
403
|
-
|
|
868
|
+
""" Default :class:`cdxcore.subdir.Format`: ``Format.PICKLE`` """
|
|
869
|
+
|
|
404
870
|
EXT_FMT_AUTO = "*"
|
|
871
|
+
""" :meta private: """
|
|
405
872
|
|
|
406
873
|
MAX_VERSION_BINARY_LEN = 128
|
|
407
|
-
|
|
874
|
+
""" :meta private: """
|
|
875
|
+
|
|
408
876
|
VER_NORMAL = 0
|
|
877
|
+
""" :meta private: """
|
|
409
878
|
VER_CHECK = 1
|
|
879
|
+
""" :meta private: """
|
|
410
880
|
VER_RETURN = 2
|
|
881
|
+
""" :meta private: """
|
|
882
|
+
|
|
411
883
|
|
|
412
884
|
def __init__(self, name : str,
|
|
413
|
-
parent = None, *,
|
|
885
|
+
parent : str|type = None, *,
|
|
414
886
|
ext : str = None,
|
|
415
887
|
fmt : Format = None,
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
888
|
+
create_directory : bool = None,
|
|
889
|
+
delete_everything : bool = False,
|
|
890
|
+
cache_controller : CacheController = None
|
|
419
891
|
):
|
|
420
892
|
"""
|
|
421
|
-
Instantiates a sub directory which contains
|
|
422
|
-
By default the directory is created.
|
|
423
|
-
|
|
424
|
-
Absolute directories
|
|
425
|
-
sd = SubDir("!/subdir") - relative to system temp directory
|
|
426
|
-
sd = SubDir("~/subdir") - relative to user home directory
|
|
427
|
-
sd = SubDir("./subdir") - relative to current working directory (explicit)
|
|
428
|
-
sd = SubDir("subdir") - relative to current working directory (implicit)
|
|
429
|
-
sd = SubDir("/tmp/subdir") - absolute path (linux)
|
|
430
|
-
sd = SubDir("C:/temp/subdir") - absolute path (windows)
|
|
431
|
-
Short-cut
|
|
432
|
-
sd = SubDir("") - current working directory
|
|
433
|
-
|
|
434
|
-
It is often desired that the user specifies a sub-directory name under some common parent directory.
|
|
435
|
-
You can create sub directories if you provide a 'parent' directory:
|
|
436
|
-
sd2 = SubDir("subdir2", parent=sd) - relative to other sub directory
|
|
437
|
-
sd2 = sd("subdir2") - using call operator
|
|
438
|
-
Works with strings, too:
|
|
439
|
-
sd2 = SubDir("subdir2", parent="~/my_config") - relative to ~/my_config
|
|
440
|
-
|
|
441
|
-
All files managed by SubDir will have the same extension.
|
|
442
|
-
The extension can be specified with 'ext', or as part of the directory string:
|
|
443
|
-
sd = SubDir("~/subdir;*.bin") - set extension to 'bin'
|
|
444
|
-
|
|
445
|
-
COPY CONSTRUCTION
|
|
446
|
-
This function also allows copy construction and constrution from a repr() string.
|
|
447
|
-
|
|
448
|
-
HANDLING KEYS
|
|
449
|
-
SubDirs allows reading data using the item and attribute notation, i.e. we may use
|
|
450
|
-
sd = SubDir("~/subdir")
|
|
451
|
-
x = sd.x
|
|
452
|
-
y = sd['y']
|
|
453
|
-
If the respective keys are not found, exceptions are thrown.
|
|
454
|
-
|
|
455
|
-
NONE OBJECTS
|
|
456
|
-
It is possible to set the directory name to 'None'. In this case the directory will behave as if:
|
|
457
|
-
No files exist
|
|
458
|
-
Writing fails with a EOFError.
|
|
893
|
+
Instantiates a sub directory which contains files with a common extension.
|
|
459
894
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
name - Name of the directory.
|
|
463
|
-
'.' for current directory
|
|
464
|
-
'~' for home directory
|
|
465
|
-
'!' for system default temp directory
|
|
466
|
-
May contain a formatting string for defining 'ext' on the fly:
|
|
467
|
-
Use "!/test;*.bin" to specify 'test' in the system temp directory as root directory with extension 'bin'
|
|
468
|
-
Can be set to None, see above.
|
|
469
|
-
parent - Parent directory. If provided, will also set defaults for 'ext' and 'raiseOnError'
|
|
470
|
-
ext - standard file extenson for data files. All files will share the same extension.
|
|
471
|
-
If None, use the parent extension, or if that is not specified use an extension depending on 'fmt':
|
|
472
|
-
'pck' for the default PICKLE format
|
|
473
|
-
'json' for JSON_PLAIN
|
|
474
|
-
'jpck' for JSON_PICKLE
|
|
475
|
-
Set to "" to turn off managing extensions.
|
|
476
|
-
fmt - format, current pickle or json
|
|
477
|
-
eraseEverything - delete all contents in the newly defined subdir
|
|
478
|
-
createDirectory - whether to create the directory.
|
|
479
|
-
Otherwise it will be created upon first write().
|
|
480
|
-
Set to None to use the setting of the parent directory
|
|
481
|
-
"""
|
|
482
|
-
createDirectory = bool(createDirectory) if not createDirectory is None else None
|
|
895
|
+
"""
|
|
896
|
+
create_directory = bool(create_directory) if not create_directory is None else None
|
|
483
897
|
|
|
484
898
|
# copy constructor support
|
|
485
899
|
if isinstance(name, SubDir):
|
|
@@ -487,9 +901,9 @@ class SubDir(object):
|
|
|
487
901
|
self._path = name._path
|
|
488
902
|
self._ext = name._ext if ext is None else ext
|
|
489
903
|
self._fmt = name._fmt if fmt is None else fmt
|
|
490
|
-
self._crt = name._crt if
|
|
491
|
-
self._cctrl = name._cctrl if
|
|
492
|
-
if
|
|
904
|
+
self._crt = name._crt if create_directory is None else create_directory
|
|
905
|
+
self._cctrl = name._cctrl if cache_controller is None else cache_controller
|
|
906
|
+
if delete_everything: raise ValueError( "Cannot use 'delete_everything' when cloning a directory")
|
|
493
907
|
return
|
|
494
908
|
|
|
495
909
|
# reconstruction from a dictionary
|
|
@@ -498,14 +912,14 @@ class SubDir(object):
|
|
|
498
912
|
self._path = name['_path']
|
|
499
913
|
self._ext = name['_ext'] if ext is None else ext
|
|
500
914
|
self._fmt = name['_fmt'] if fmt is None else fmt
|
|
501
|
-
self._crt = name['_crt'] if
|
|
502
|
-
self._cctrl = name['_cctrl'] if
|
|
503
|
-
if
|
|
915
|
+
self._crt = name['_crt'] if create_directory is None else create_directory
|
|
916
|
+
self._cctrl = name['_cctrl'] if cache_controller is None else cache_controller
|
|
917
|
+
if delete_everything: raise ValueError( "Cannot use 'delete_everything' when cloning a directory")
|
|
504
918
|
return
|
|
505
919
|
|
|
506
920
|
# parent
|
|
507
921
|
if isinstance(parent, str):
|
|
508
|
-
parent = SubDir( parent, ext=ext, fmt=fmt,
|
|
922
|
+
parent = SubDir( parent, ext=ext, fmt=fmt, create_directory=create_directory, cache_controller=cache_controller )
|
|
509
923
|
if not parent is None and not isinstance(parent, SubDir):
|
|
510
924
|
raise ValueError( "'parent' must be SubDir, str, or None. Found object of type '{type(parent)}'")
|
|
511
925
|
|
|
@@ -543,15 +957,15 @@ class SubDir(object):
|
|
|
543
957
|
else:
|
|
544
958
|
self._ext = SubDir._extract_ext(ext)
|
|
545
959
|
|
|
546
|
-
#
|
|
547
|
-
if
|
|
548
|
-
self._crt =
|
|
960
|
+
# create_directory
|
|
961
|
+
if create_directory is None:
|
|
962
|
+
self._crt = False if parent is None else parent._crt
|
|
549
963
|
else:
|
|
550
|
-
self._crt = bool(
|
|
964
|
+
self._crt = bool(create_directory)
|
|
551
965
|
|
|
552
966
|
# cache controller
|
|
553
|
-
assert type(
|
|
554
|
-
self._cctrl =
|
|
967
|
+
assert cache_controller is None or type(cache_controller).__name__ == CacheController.__name__, ("'cache_controller' should be of type 'CacheController'", type(cache_controller))
|
|
968
|
+
self._cctrl = cache_controller
|
|
555
969
|
|
|
556
970
|
# name
|
|
557
971
|
if name is None:
|
|
@@ -566,12 +980,12 @@ class SubDir(object):
|
|
|
566
980
|
if len(name) > 1 and name[1] != '/':
|
|
567
981
|
raise ValueError( txtfmt("If 'name' starts with '%s', then the second character must be '/' (or '\\' on windows). Found 'name' set to '%s'", name[:1], _name ))
|
|
568
982
|
if name[0] == '!':
|
|
569
|
-
name = SubDir.
|
|
983
|
+
name = SubDir.temp_dir()[:-1] + name[1:]
|
|
570
984
|
elif name[0] == ".":
|
|
571
|
-
name = SubDir.
|
|
985
|
+
name = SubDir.working_dir()[:-1] + name[1:]
|
|
572
986
|
else:
|
|
573
987
|
assert name[0] == "~", ("Internal error", name[0] )
|
|
574
|
-
name = SubDir.
|
|
988
|
+
name = SubDir.user_dir()[:-1] + name[1:]
|
|
575
989
|
elif name == "..":
|
|
576
990
|
error("Cannot use name '..'")
|
|
577
991
|
elif not parent is None:
|
|
@@ -587,33 +1001,37 @@ class SubDir(object):
|
|
|
587
1001
|
self._path = os.path.abspath(name) + '/'
|
|
588
1002
|
self._path = self._path.replace('\\','/')
|
|
589
1003
|
|
|
590
|
-
if
|
|
591
|
-
self.
|
|
1004
|
+
if delete_everything:
|
|
1005
|
+
self.delete_everything(keep_directory=self._crt)
|
|
592
1006
|
if self._crt:
|
|
593
|
-
self.
|
|
1007
|
+
self.create_directory()
|
|
594
1008
|
|
|
595
1009
|
@staticmethod
|
|
596
|
-
def
|
|
1010
|
+
def expand_std_root( name ):
|
|
597
1011
|
"""
|
|
598
|
-
Expands
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
1012
|
+
Expands ``name`` by a standardized root directory if provided:
|
|
1013
|
+
|
|
1014
|
+
The first character of ``name`` can be either of:
|
|
1015
|
+
|
|
1016
|
+
* ``"!"`` returns :meth:`cdxcore.subdir.SubDir.temp_dir()`.
|
|
1017
|
+
* ``"."`` returns :meth:`cdxcore.subdir.SubDir.working_dir()`.
|
|
1018
|
+
* ``"~"`` returns :meth:`cdxcore.subdir.SubDir.user_dir()`.
|
|
1019
|
+
|
|
1020
|
+
If neither of these matches the first character, ``name``
|
|
1021
|
+
is returned as is.
|
|
603
1022
|
"""
|
|
604
1023
|
if len(name) < 2 or name[0] not in ['.','!','~'] or name[1] not in ["\\","/"]:
|
|
605
1024
|
return name
|
|
606
1025
|
if name[0] == '!':
|
|
607
|
-
return SubDir.
|
|
1026
|
+
return SubDir.temp_dir() + name[2:]
|
|
608
1027
|
elif name[0] == ".":
|
|
609
|
-
return SubDir.
|
|
1028
|
+
return SubDir.working_dir() + name[2:]
|
|
610
1029
|
else:
|
|
611
|
-
return SubDir.
|
|
1030
|
+
return SubDir.user_dir() + name[2:]
|
|
612
1031
|
|
|
613
|
-
def
|
|
1032
|
+
def create_directory( self ):
|
|
614
1033
|
"""
|
|
615
|
-
Creates the directory if it doesn't exist yet.
|
|
616
|
-
Does not do anything if is_none.
|
|
1034
|
+
Creates the current directory if it doesn't exist yet.
|
|
617
1035
|
"""
|
|
618
1036
|
# create directory/clean up
|
|
619
1037
|
if self._path is None:
|
|
@@ -628,8 +1046,8 @@ class SubDir(object):
|
|
|
628
1046
|
if not os.path.isdir(self._path[:-1]):
|
|
629
1047
|
raise NotADirectoryError(txtfmt( "Cannot use sub directory %s: object exists but is not a directory", self._path[:-1] ))
|
|
630
1048
|
|
|
631
|
-
def
|
|
632
|
-
"""
|
|
1049
|
+
def path_exists(self) -> bool:
|
|
1050
|
+
""" Whether the current directory exists """
|
|
633
1051
|
return os.path.exists( self._path[:-1] ) if not self._path is None else False
|
|
634
1052
|
|
|
635
1053
|
# -- a few basic properties --
|
|
@@ -659,60 +1077,79 @@ class SubDir(object):
|
|
|
659
1077
|
|
|
660
1078
|
@property
|
|
661
1079
|
def is_none(self) -> bool:
|
|
662
|
-
""" Whether this object is
|
|
1080
|
+
""" Whether this object is ``None`` or not. For such ``SubDir`` object no files exists, and writing any file will fail. """
|
|
663
1081
|
return self._path is None
|
|
664
1082
|
|
|
665
1083
|
@property
|
|
666
1084
|
def path(self) -> str:
|
|
667
1085
|
"""
|
|
668
|
-
Return current path, including trailing '/'
|
|
669
|
-
|
|
1086
|
+
Return current path, including trailing ``'/'``.
|
|
1087
|
+
|
|
1088
|
+
Note that the path may not exist yet. If existence is required, consider using
|
|
1089
|
+
:meth:`cdxcore.subdir.SubDir.existing_path`.
|
|
670
1090
|
"""
|
|
671
1091
|
return self._path
|
|
672
1092
|
|
|
673
1093
|
@property
|
|
674
1094
|
def existing_path(self) -> str:
|
|
675
1095
|
"""
|
|
676
|
-
Return current path, including training '/'
|
|
677
|
-
|
|
1096
|
+
Return current path, including training ``'/'``.
|
|
1097
|
+
|
|
1098
|
+
``existing_path`` ensures that the directory structure exists (or raises an exception).
|
|
1099
|
+
Use :meth:`cdxcore.subdir.SubDir.path` if creation on the fly is not desired.
|
|
678
1100
|
"""
|
|
679
|
-
self.
|
|
1101
|
+
self.create_directory()
|
|
680
1102
|
return self.path
|
|
681
1103
|
|
|
682
1104
|
@property
|
|
683
1105
|
def fmt(self) -> Format:
|
|
684
|
-
""" Returns current
|
|
1106
|
+
""" Returns current :class:`cdxcore.subdir.Format`. """
|
|
685
1107
|
return self._fmt
|
|
686
1108
|
|
|
687
1109
|
@property
|
|
688
1110
|
def ext(self) -> str:
|
|
689
1111
|
"""
|
|
690
|
-
Returns the common extension of the files in this directory, including leading '.'
|
|
691
|
-
Resolves
|
|
1112
|
+
Returns the common extension of the files in this directory, including leading ``'.'``.
|
|
1113
|
+
Resolves ``"*"`` into the extension associated with the current :class:`cdxcore.subdir.Format`.
|
|
692
1114
|
"""
|
|
693
1115
|
return self._ext if self._ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
|
|
694
1116
|
|
|
695
|
-
def
|
|
1117
|
+
def auto_ext( self, ext_or_fmt : str|Format = None ) -> str:
|
|
696
1118
|
"""
|
|
697
|
-
Computes the effective extension based on inputs
|
|
698
|
-
|
|
699
|
-
|
|
1119
|
+
Computes the effective extension based on theh inputs ``ext_or_fmt``,
|
|
1120
|
+
and the current settings for ``self``.
|
|
1121
|
+
|
|
1122
|
+
If ``ext_or_fmt`` is set to ``"*"`` then the extension associated to
|
|
1123
|
+
the format of ``self`` is returned.
|
|
700
1124
|
|
|
701
|
-
|
|
1125
|
+
Parameters
|
|
1126
|
+
----------
|
|
1127
|
+
ext_or_fmt : str or :class:`cdxcore.subdir.Format`
|
|
1128
|
+
An extension or a format.
|
|
1129
|
+
|
|
1130
|
+
Returns
|
|
1131
|
+
-------
|
|
1132
|
+
ext : str
|
|
1133
|
+
The extension with leading ``'.'``.
|
|
702
1134
|
"""
|
|
703
|
-
if isinstance(
|
|
704
|
-
return self._auto_ext(
|
|
1135
|
+
if isinstance(ext_or_fmt, Format):
|
|
1136
|
+
return self._auto_ext(ext_or_fmt)
|
|
705
1137
|
else:
|
|
706
|
-
ext = self._ext if
|
|
1138
|
+
ext = self._ext if ext_or_fmt is None else SubDir._extract_ext(ext_or_fmt)
|
|
707
1139
|
return ext if ext != self.EXT_FMT_AUTO else self._auto_ext(self._fmt)
|
|
708
1140
|
|
|
709
|
-
def
|
|
1141
|
+
def auto_ext_fmt( self, *, ext : str = None, fmt : Format = None ) -> tuple[str]:
|
|
710
1142
|
"""
|
|
711
|
-
Computes the effective extension and format based on inputs
|
|
712
|
-
|
|
713
|
-
|
|
1143
|
+
Computes the effective extension and format based on inputs ``ext`` and ``fmt``,
|
|
1144
|
+
each of which defaults to the respective values of ``self``.
|
|
1145
|
+
|
|
1146
|
+
Resolves an ``ext`` of ``"*"`` into the extension associated with ``fmt``.
|
|
714
1147
|
|
|
715
|
-
Returns
|
|
1148
|
+
Returns
|
|
1149
|
+
-------
|
|
1150
|
+
(ext, fmt) : tuple
|
|
1151
|
+
Here ``ext`` contains the leading ``'.'`` and ``fmt`` is
|
|
1152
|
+
of type :class:`cdxcore.subdir.Format`.
|
|
716
1153
|
"""
|
|
717
1154
|
if isinstance(ext, Format):
|
|
718
1155
|
verify( fmt is None or fmt == ext, "If 'ext' is a Format, then 'fmt' must match 'ext' or be None. Found '%s' and '%s', respectively.", ext, fmt, exception=ValueError )
|
|
@@ -724,8 +1161,8 @@ class SubDir(object):
|
|
|
724
1161
|
return ext, fmt
|
|
725
1162
|
|
|
726
1163
|
@property
|
|
727
|
-
def
|
|
728
|
-
""" Returns an assigned CacheController
|
|
1164
|
+
def cache_controller(self):
|
|
1165
|
+
""" Returns an assigned :class:`cdxcore.subdir.CacheController`, or ``None`` """
|
|
729
1166
|
return self._cctrl if not self._cctrl is None else default_cacheController
|
|
730
1167
|
|
|
731
1168
|
# -- static helpers --
|
|
@@ -747,7 +1184,10 @@ class SubDir(object):
|
|
|
747
1184
|
|
|
748
1185
|
@staticmethod
|
|
749
1186
|
def _version_to_bytes( version : str ) -> bytearray:
|
|
750
|
-
"""
|
|
1187
|
+
"""
|
|
1188
|
+
Convert string version to byte string of at most size
|
|
1189
|
+
:data:`cdxcore.subdir.SubDir.MAX_VERSION_BINARY_LEN` + 1
|
|
1190
|
+
"""
|
|
751
1191
|
if version is None:
|
|
752
1192
|
return None
|
|
753
1193
|
version_ = bytearray(version,'utf-8')
|
|
@@ -790,69 +1230,67 @@ class SubDir(object):
|
|
|
790
1230
|
|
|
791
1231
|
# -- public utilities --
|
|
792
1232
|
|
|
793
|
-
def
|
|
1233
|
+
def full_file_name(self, file : str, *, ext : str = None) -> str:
|
|
794
1234
|
"""
|
|
795
1235
|
Returns fully qualified file name.
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
If 'self' is None, then this function returns None
|
|
799
|
-
If key is None then this function returns None
|
|
1236
|
+
|
|
1237
|
+
The function tests that ``file`` does not contain directory information.
|
|
800
1238
|
|
|
801
1239
|
Parameters
|
|
802
1240
|
----------
|
|
803
|
-
|
|
804
|
-
Core file name
|
|
1241
|
+
file : str
|
|
1242
|
+
Core file name without path or extension.
|
|
805
1243
|
ext : str
|
|
806
|
-
If not None
|
|
1244
|
+
If not ``None``, use this extension rather than :attr:`cdxcore.subdir.SubDir.ext`.
|
|
807
1245
|
|
|
808
1246
|
Returns
|
|
809
1247
|
-------
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
1248
|
+
Filename : str
|
|
1249
|
+
Fully qualified system file name.
|
|
1250
|
+
If ``self`` is ``None``, then this function returns ``None``; if ``file`` is ``None`` then this function also returns ``None``.
|
|
813
1251
|
"""
|
|
814
|
-
if self._path is None or
|
|
1252
|
+
if self._path is None or file is None:
|
|
815
1253
|
return None
|
|
816
|
-
|
|
817
|
-
verify( len(
|
|
1254
|
+
file = str(file)
|
|
1255
|
+
verify( len(file) > 0, "'file' cannot be empty")
|
|
818
1256
|
|
|
819
|
-
sub, _ = os.path.split(
|
|
820
|
-
verify( len(sub) == 0, "Key '%s' contains directory information",
|
|
1257
|
+
sub, _ = os.path.split(file)
|
|
1258
|
+
verify( len(sub) == 0, "Key '%s' contains directory information", file)
|
|
821
1259
|
|
|
822
|
-
verify(
|
|
823
|
-
verify(
|
|
1260
|
+
verify( file[0] != "!", "Key '%s' cannot start with '!' (this symbol indicates the temp directory)", file, exception=ValueError )
|
|
1261
|
+
verify( file[0] != "~", "Key '%s' cannot start with '~' (this symbol indicates the user's directory)", file, exception=ValueError )
|
|
824
1262
|
|
|
825
|
-
ext = self.
|
|
826
|
-
if len(ext) > 0 and
|
|
827
|
-
return self._path +
|
|
828
|
-
return self._path +
|
|
829
|
-
|
|
1263
|
+
ext = self.auto_ext( ext )
|
|
1264
|
+
if len(ext) > 0 and file[-len(ext):] != ext:
|
|
1265
|
+
return self._path + file + ext
|
|
1266
|
+
return self._path + file
|
|
1267
|
+
full_file_name = full_file_name # backwards compatibility
|
|
830
1268
|
|
|
831
1269
|
@staticmethod
|
|
832
|
-
def
|
|
1270
|
+
def temp_dir() -> str:
|
|
833
1271
|
"""
|
|
834
|
-
Return system temp directory. Short
|
|
835
|
-
Result contains trailing '/'
|
|
1272
|
+
Return system temp directory. Short-cut to :func:`tempfile.gettempdir`.
|
|
1273
|
+
Result contains trailing ``'/'``.
|
|
836
1274
|
"""
|
|
837
1275
|
d = tempfile.gettempdir()
|
|
838
1276
|
assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-1", d)
|
|
839
1277
|
return d + "/"
|
|
840
1278
|
|
|
841
1279
|
@staticmethod
|
|
842
|
-
def
|
|
1280
|
+
def working_dir() -> str:
|
|
843
1281
|
"""
|
|
844
|
-
Return current working directory. Short
|
|
845
|
-
Result contains trailing '/'
|
|
1282
|
+
Return current working directory. Short-cut for :func:`os.getcwd`.
|
|
1283
|
+
Result contains trailing ``'/'``.
|
|
846
1284
|
"""
|
|
847
1285
|
d = os.getcwd()
|
|
848
1286
|
assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-2", d)
|
|
849
1287
|
return d + "/"
|
|
850
1288
|
|
|
851
1289
|
@staticmethod
|
|
852
|
-
def
|
|
1290
|
+
def user_dir() -> str:
|
|
853
1291
|
"""
|
|
854
|
-
Return current working directory. Short
|
|
855
|
-
Result contains trailing '/'
|
|
1292
|
+
Return current working directory. Short-cut for :func:`os.path.expanduser` with parameter ``'~'``.
|
|
1293
|
+
Result contains trailing ``'/'``.
|
|
856
1294
|
"""
|
|
857
1295
|
d = os.path.expanduser('~')
|
|
858
1296
|
assert len(d) == 0 or not (d[-1] == '/' or d[-1] == '\\'), ("*** Internal error 13123212-3", d)
|
|
@@ -860,95 +1298,101 @@ class SubDir(object):
|
|
|
860
1298
|
|
|
861
1299
|
# -- read --
|
|
862
1300
|
|
|
863
|
-
def _read_reader( self, reader,
|
|
1301
|
+
def _read_reader( self, reader, file : str, default, raise_on_error : bool, *, ext : str = None ):
|
|
864
1302
|
"""
|
|
865
1303
|
Utility function for read() and readLine()
|
|
866
1304
|
|
|
867
1305
|
Parameters
|
|
868
1306
|
----------
|
|
869
|
-
reader(
|
|
1307
|
+
reader( file, full_file_name, default )
|
|
870
1308
|
A function which is called to read the file once the correct directory is identified
|
|
871
|
-
|
|
872
|
-
|
|
1309
|
+
file : file (for error messages, might include '/')
|
|
1310
|
+
full_file_name : full file name
|
|
873
1311
|
default value
|
|
874
|
-
|
|
875
|
-
str: fully qualified
|
|
1312
|
+
file : str or list
|
|
1313
|
+
str: fully qualified file
|
|
876
1314
|
list: list of fully qualified names
|
|
877
1315
|
default :
|
|
878
1316
|
default value. None is a valid default value
|
|
879
1317
|
list : list of defaults for a list of keys
|
|
880
|
-
|
|
1318
|
+
raise_on_error : bool
|
|
881
1319
|
If True, and the file does not exist, throw exception
|
|
882
1320
|
ext :
|
|
883
1321
|
Extension or None for current extension.
|
|
884
1322
|
list : list of extensions for a list of keys
|
|
885
1323
|
"""
|
|
886
1324
|
# vector version
|
|
887
|
-
if not isinstance(
|
|
888
|
-
if not isinstance(
|
|
889
|
-
l = len(
|
|
1325
|
+
if not isinstance(file,str):
|
|
1326
|
+
if not isinstance(file, Collection): raise ValueError(txtfmt( "'file' must be a string, or an interable object. Found type %s", type(file)))
|
|
1327
|
+
l = len(file)
|
|
890
1328
|
if default is None or isinstance(default,str) or not isinstance(default, Collection):
|
|
891
1329
|
default = [ default ] * l
|
|
892
1330
|
else:
|
|
893
|
-
if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as '
|
|
1331
|
+
if len(default) != l: raise ValueError(txtfmt("'default' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(default), l ))
|
|
894
1332
|
if ext is None or isinstance(ext, str) or not isinstance(ext, Collection):
|
|
895
1333
|
ext = [ ext ] * l
|
|
896
1334
|
else:
|
|
897
|
-
if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as '
|
|
898
|
-
return [ self._read_reader(reader=reader,
|
|
1335
|
+
if len(ext) != l: raise ValueError(txtfmt("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l ))
|
|
1336
|
+
return [ self._read_reader(reader=reader,file=k,default=d,raise_on_error=raise_on_error,ext=e) for k, d, e in zip(file,default,ext) ]
|
|
899
1337
|
|
|
900
1338
|
# deleted directory?
|
|
901
1339
|
if self._path is None:
|
|
902
|
-
verify( not
|
|
1340
|
+
verify( not raise_on_error, "Trying to read '%s' from an empty directory object", file, exception=NotADirectoryError)
|
|
903
1341
|
return default
|
|
904
1342
|
|
|
905
|
-
# single
|
|
906
|
-
if len(
|
|
907
|
-
sub, key_ = os.path.split(
|
|
1343
|
+
# single file
|
|
1344
|
+
if len(file) == 0: raise ValueError(txtfmt("'file' missing (the filename)" ))
|
|
1345
|
+
sub, key_ = os.path.split(file)
|
|
908
1346
|
if len(sub) > 0:
|
|
909
|
-
return self(sub)._read_reader(reader=reader,
|
|
910
|
-
if len(key_) == 0: ValueError(txtfmt("'
|
|
1347
|
+
return self(sub)._read_reader(reader=reader,file=key_,default=default,raise_on_error=raise_on_error,ext=ext)
|
|
1348
|
+
if len(key_) == 0: ValueError(txtfmt("'file' %s indicates a directory, not a file", file))
|
|
911
1349
|
|
|
912
1350
|
# don't try if directory doesn't exist
|
|
913
|
-
|
|
914
|
-
if not self.
|
|
915
|
-
if
|
|
916
|
-
raise KeyError(
|
|
1351
|
+
full_file_name = self.full_file_name(file,ext=ext)
|
|
1352
|
+
if not self.path_exists():
|
|
1353
|
+
if raise_on_error:
|
|
1354
|
+
raise KeyError(file, full_file_name)
|
|
917
1355
|
return default
|
|
918
1356
|
|
|
919
1357
|
# does file exit?
|
|
920
|
-
if not os.path.exists(
|
|
921
|
-
if
|
|
922
|
-
raise KeyError(
|
|
1358
|
+
if not os.path.exists(full_file_name):
|
|
1359
|
+
if raise_on_error:
|
|
1360
|
+
raise KeyError(file,full_file_name)
|
|
923
1361
|
return default
|
|
924
|
-
if not os.path.isfile(
|
|
925
|
-
raise IOError(txtfmt( "Cannot read %s: object exists, but is not a file (full path %s)",
|
|
1362
|
+
if not os.path.isfile(full_file_name):
|
|
1363
|
+
raise IOError(txtfmt( "Cannot read '%s': object exists, but is not a file (full path %s)", file, full_file_name ))
|
|
926
1364
|
|
|
927
1365
|
# read content
|
|
928
1366
|
# delete existing files upon read error
|
|
929
1367
|
try:
|
|
930
|
-
return reader(
|
|
1368
|
+
return reader( file, full_file_name, default )
|
|
931
1369
|
except EOFError as e:
|
|
932
1370
|
try:
|
|
933
|
-
os.remove(
|
|
934
|
-
warn("Cannot read %s; file deleted (full path %s).\nError: %s",
|
|
1371
|
+
os.remove(full_file_name)
|
|
1372
|
+
warn("Cannot read '%s'; file deleted (full path '%s').\nError: %s",file,full_file_name, str(e))
|
|
935
1373
|
except Exception as e:
|
|
936
|
-
warn("Cannot read %s; attempt to delete file failed (full path %s): %s",
|
|
1374
|
+
warn("Cannot read '%s'; subsequent attempt to delete file failed (full path '%s''): %s",file,full_file_name,str(e))
|
|
937
1375
|
except FileNotFoundError as e:
|
|
938
|
-
if
|
|
939
|
-
raise KeyError(
|
|
1376
|
+
if raise_on_error:
|
|
1377
|
+
raise KeyError(file, full_file_name, str(e)) from e
|
|
1378
|
+
except VersionError as e:
|
|
1379
|
+
if raise_on_error:
|
|
1380
|
+
raise e
|
|
1381
|
+
except VersionPresentError as e:
|
|
1382
|
+
if raise_on_error:
|
|
1383
|
+
raise e
|
|
940
1384
|
except Exception as e:
|
|
941
|
-
if
|
|
942
|
-
raise KeyError(
|
|
1385
|
+
if raise_on_error:
|
|
1386
|
+
raise KeyError(file, full_file_name, str(e)) from e
|
|
943
1387
|
except (ImportError, BaseException) as e:
|
|
944
|
-
e.add_note(
|
|
945
|
-
e.add_note(
|
|
1388
|
+
e.add_note( file )
|
|
1389
|
+
e.add_note( full_file_name )
|
|
946
1390
|
raise e
|
|
947
1391
|
return default
|
|
948
1392
|
|
|
949
|
-
def _read( self,
|
|
1393
|
+
def _read( self, file : str,
|
|
950
1394
|
default = None,
|
|
951
|
-
|
|
1395
|
+
raise_on_error : bool = False,
|
|
952
1396
|
*,
|
|
953
1397
|
version : str = None,
|
|
954
1398
|
ext : str = None,
|
|
@@ -957,18 +1401,34 @@ class SubDir(object):
|
|
|
957
1401
|
handle_version : int = 0
|
|
958
1402
|
):
|
|
959
1403
|
""" See read() """
|
|
960
|
-
ext, fmt = self.
|
|
1404
|
+
ext, fmt = self.auto_ext_fmt(ext=ext, fmt=fmt)
|
|
961
1405
|
version = str(version) if not version is None else None
|
|
962
1406
|
version = version if handle_version != SubDir.VER_RETURN else ""
|
|
963
1407
|
assert not fmt == self.EXT_FMT_AUTO, ("'fmt' is '*' ...?")
|
|
964
1408
|
|
|
965
1409
|
if version is None and fmt in [Format.BLOSC, Format.GZIP]:
|
|
966
|
-
|
|
1410
|
+
# blosc and gzip have unexpected side effects
|
|
1411
|
+
# a version is attempted to be read but is not present
|
|
1412
|
+
# (e.g. blosc causes a MemoryError)
|
|
1413
|
+
version = ""
|
|
967
1414
|
|
|
968
|
-
def reader(
|
|
1415
|
+
def reader( file, full_file_name, default ):
|
|
969
1416
|
test_version = "(unknown)"
|
|
970
|
-
|
|
971
|
-
|
|
1417
|
+
|
|
1418
|
+
def handle_pickle_error(e):
|
|
1419
|
+
err = "invalid load key, '\\x03'."
|
|
1420
|
+
if not version is None or e.args[0] != err:
|
|
1421
|
+
print("####", e.args)
|
|
1422
|
+
raise e
|
|
1423
|
+
raise VersionPresentError(
|
|
1424
|
+
f"Error reading '{full_file_name}': encountered an unpickling error '{err}' "+\
|
|
1425
|
+
f"while attempting to read file using {str(fmt)}. "+\
|
|
1426
|
+
"This is likely caused by attempting to read a file which was written with "+\
|
|
1427
|
+
"version information without providing a test version during read(). If the version is of the file "+\
|
|
1428
|
+
"is not important, use `version=\"*\"'", e) from e
|
|
1429
|
+
if fmt == Format.PICKLE:
|
|
1430
|
+
# we do not read any version information if not requested
|
|
1431
|
+
with open(full_file_name,"rb") as f:
|
|
972
1432
|
# handle version as byte string
|
|
973
1433
|
ok = True
|
|
974
1434
|
if not version is None:
|
|
@@ -981,37 +1441,55 @@ class SubDir(object):
|
|
|
981
1441
|
if ok:
|
|
982
1442
|
if handle_version == SubDir.VER_CHECK:
|
|
983
1443
|
return True
|
|
984
|
-
|
|
1444
|
+
try:
|
|
985
1445
|
data = pickle.load(f)
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1446
|
+
except pickle.UnpicklingError as e:
|
|
1447
|
+
handle_pickle_error(e)
|
|
1448
|
+
return data
|
|
1449
|
+
|
|
1450
|
+
elif fmt == Format.BLOSC:
|
|
1451
|
+
# we do not write
|
|
1452
|
+
# any version information if not requested
|
|
1453
|
+
with open(full_file_name,"rb") as f:
|
|
1454
|
+
# handle version as byte string
|
|
1455
|
+
ok = True
|
|
1456
|
+
if not version is None: # it's never None
|
|
1457
|
+
test_len = int( f.read( 1 )[0] )
|
|
1458
|
+
test_version = f.read(test_len)
|
|
1459
|
+
test_version = test_version.decode("utf-8")
|
|
1460
|
+
if handle_version == SubDir.VER_RETURN:
|
|
1461
|
+
return test_version
|
|
1462
|
+
ok = (version == "*" or test_version == version)
|
|
1463
|
+
if ok:
|
|
1464
|
+
if handle_version == SubDir.VER_CHECK:
|
|
1465
|
+
return True
|
|
1466
|
+
nnbb = f.read(2)
|
|
1467
|
+
num_blocks = int.from_bytes( nnbb, 'big', signed=False )
|
|
1468
|
+
data = bytearray()
|
|
1469
|
+
for i in range(num_blocks):
|
|
1470
|
+
blockl = int.from_bytes( f.read(6), 'big', signed=False )
|
|
1471
|
+
if blockl>0:
|
|
1472
|
+
bdata = blosc.decompress( f.read(blockl) )
|
|
1473
|
+
data += bdata
|
|
1474
|
+
del bdata
|
|
1475
|
+
try:
|
|
998
1476
|
data = pickle.loads(data)
|
|
999
|
-
|
|
1000
|
-
|
|
1477
|
+
except pickle.UnpicklingError as e:
|
|
1478
|
+
handle_pickle_error(e)
|
|
1001
1479
|
return data
|
|
1002
1480
|
|
|
1003
1481
|
elif fmt == Format.GZIP:
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
with gzip.open(fullFileName,"rb") as f:
|
|
1482
|
+
# always read version information
|
|
1483
|
+
with gzip.open(full_file_name,"rb") as f:
|
|
1007
1484
|
# handle version as byte string
|
|
1008
|
-
ok
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1485
|
+
ok = True
|
|
1486
|
+
if not version is None: # it's never None
|
|
1487
|
+
test_len = int( f.read( 1 )[0] )
|
|
1488
|
+
test_version = f.read(test_len)
|
|
1489
|
+
test_version = test_version.decode("utf-8")
|
|
1490
|
+
if handle_version == SubDir.VER_RETURN:
|
|
1491
|
+
return test_version
|
|
1492
|
+
ok = (version == "*" or test_version == version)
|
|
1015
1493
|
if ok:
|
|
1016
1494
|
if handle_version == SubDir.VER_CHECK:
|
|
1017
1495
|
return True
|
|
@@ -1019,13 +1497,16 @@ class SubDir(object):
|
|
|
1019
1497
|
return data
|
|
1020
1498
|
|
|
1021
1499
|
elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
|
|
1022
|
-
|
|
1500
|
+
# only read version information if requested
|
|
1501
|
+
with open(full_file_name,"rt",encoding="utf-8") as f:
|
|
1023
1502
|
# handle versioning
|
|
1024
1503
|
ok = True
|
|
1025
1504
|
if not version is None:
|
|
1026
1505
|
test_version = f.readline()
|
|
1027
1506
|
if test_version[:2] != "# ":
|
|
1028
|
-
raise
|
|
1507
|
+
raise VersionError("Error reading '{full_file_name}' using {fmt}: file does not appear to contain a version (it should start with '# ')",
|
|
1508
|
+
version_found="",
|
|
1509
|
+
version_expected=version)
|
|
1029
1510
|
test_version = test_version[2:]
|
|
1030
1511
|
if test_version[-1:] == "\n":
|
|
1031
1512
|
test_version = test_version[:-1]
|
|
@@ -1037,8 +1518,6 @@ class SubDir(object):
|
|
|
1037
1518
|
return ok
|
|
1038
1519
|
# read
|
|
1039
1520
|
if fmt == Format.JSON_PICKLE:
|
|
1040
|
-
if jsonpickle is None:
|
|
1041
|
-
raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found'")
|
|
1042
1521
|
return jsonpickle.decode( f.read() )
|
|
1043
1522
|
else:
|
|
1044
1523
|
assert fmt == Format.JSON_PLAIN, ("Internal error: unknown Format", fmt)
|
|
@@ -1048,25 +1527,33 @@ class SubDir(object):
|
|
|
1048
1527
|
|
|
1049
1528
|
# arrive here if version is wrong
|
|
1050
1529
|
# delete a wrong version
|
|
1530
|
+
|
|
1531
|
+
if version == "":
|
|
1532
|
+
raise VersionPresentError(f"Error reading '{full_file_name}' using {fmt}: the file has version '{test_version}', but was attempted to be read without "+\
|
|
1533
|
+
"a test version. If you intended to accept any version, use 'version=\"*\"' instead.")
|
|
1534
|
+
|
|
1051
1535
|
deleted = ""
|
|
1052
1536
|
if delete_wrong_version:
|
|
1053
1537
|
try:
|
|
1054
|
-
os.remove(
|
|
1538
|
+
os.remove(full_file_name)
|
|
1055
1539
|
e = None
|
|
1056
1540
|
except Exception as e_:
|
|
1057
1541
|
e = str(e_)
|
|
1058
1542
|
if handle_version == SubDir.VER_CHECK:
|
|
1059
1543
|
return False
|
|
1060
|
-
if not
|
|
1544
|
+
if not raise_on_error:
|
|
1061
1545
|
return default
|
|
1062
1546
|
deleted = " (file was deleted)" if e is None else " (attempt to delete file failed: %s)" % e
|
|
1063
|
-
raise
|
|
1547
|
+
raise VersionError( f"Error reading '{full_file_name}' using {fmt}: found version '{test_version}' not '{version}'{deleted}",
|
|
1548
|
+
version_found=test_version,
|
|
1549
|
+
version_expected=version
|
|
1550
|
+
)
|
|
1064
1551
|
|
|
1065
|
-
return self._read_reader( reader=reader,
|
|
1552
|
+
return self._read_reader( reader=reader, file=file, default=default, raise_on_error=raise_on_error, ext=ext )
|
|
1066
1553
|
|
|
1067
|
-
def read( self,
|
|
1554
|
+
def read( self, file : str,
|
|
1068
1555
|
default = None,
|
|
1069
|
-
|
|
1556
|
+
raise_on_error : bool = False,
|
|
1070
1557
|
*,
|
|
1071
1558
|
version : str = None,
|
|
1072
1559
|
delete_wrong_version : bool = True,
|
|
@@ -1074,296 +1561,323 @@ class SubDir(object):
|
|
|
1074
1561
|
fmt : Format = None
|
|
1075
1562
|
):
|
|
1076
1563
|
"""
|
|
1077
|
-
Read
|
|
1078
|
-
-- Supports 'key' containing directories
|
|
1079
|
-
-- Supports 'key' (and default, ext) being iterable.
|
|
1080
|
-
In this case any any iterable 'default' except strings are considered accordingly.
|
|
1081
|
-
In order to have a unit default which is an iterable, you will have to wrap it in another iterable, e.g.
|
|
1082
|
-
E.g.:
|
|
1083
|
-
keys = ['file1', 'file2']
|
|
1084
|
-
|
|
1085
|
-
sd.read( keys )
|
|
1086
|
-
--> works, both are using default None
|
|
1087
|
-
|
|
1088
|
-
sd.read( keys, 1 )
|
|
1089
|
-
--> works, both are using default '1'
|
|
1564
|
+
Read data from a file if the file exists, or return ``default``.
|
|
1090
1565
|
|
|
1091
|
-
|
|
1092
|
-
|
|
1566
|
+
* Supports ``file`` containing directory information.
|
|
1567
|
+
* Supports ``file`` (and ``default``as well as ``ext``) being iterable.
|
|
1568
|
+
Examples::
|
|
1569
|
+
|
|
1570
|
+
from cdxcore.subdir import SubDir
|
|
1571
|
+
files = ['file1', 'file2']
|
|
1572
|
+
sd = SubDir("!/test")
|
|
1093
1573
|
|
|
1094
|
-
|
|
1095
|
-
|
|
1574
|
+
sd.read( files ) # both files are using default None
|
|
1575
|
+
sd.read( files, 1 ) # both files are using default '1'
|
|
1576
|
+
sd.read( files, [1,2] ) # files use defaults 1 and 2, respectively
|
|
1096
1577
|
|
|
1097
|
-
|
|
1098
|
-
Therefore
|
|
1099
|
-
sd.read( keys, '12' )
|
|
1100
|
-
means the default value '12' is used for both files.
|
|
1101
|
-
Use
|
|
1102
|
-
sd.read( keys, ['1','2'] )
|
|
1103
|
-
in case the intention was using '1' and '2', respectively.
|
|
1578
|
+
sd.read( files, [1] ) # produces error as len(keys) != len([1])
|
|
1104
1579
|
|
|
1105
|
-
|
|
1106
|
-
|
|
1580
|
+
Strings are iterable but are treated as single value.
|
|
1581
|
+
Therefore::
|
|
1582
|
+
|
|
1583
|
+
sd.read( files, '12' ) # the default value '12' is used for both files
|
|
1584
|
+
sd.read( files, ['1','2'] ) # use defaults '1' and '2', respectively
|
|
1107
1585
|
|
|
1108
1586
|
Parameters
|
|
1109
1587
|
----------
|
|
1110
|
-
|
|
1111
|
-
A
|
|
1588
|
+
file : str
|
|
1589
|
+
A file name or a list thereof. ``file`` may contain subdirectories.
|
|
1590
|
+
|
|
1112
1591
|
default :
|
|
1113
|
-
Default value, or default values if
|
|
1114
|
-
|
|
1592
|
+
Default value, or default values if ``file`` is a list.
|
|
1593
|
+
|
|
1594
|
+
raise_on_error : bool
|
|
1115
1595
|
Whether to raise an exception if reading an existing file failed.
|
|
1116
1596
|
By default this function fails silently and returns the default.
|
|
1597
|
+
|
|
1117
1598
|
version : str
|
|
1118
|
-
If not None
|
|
1599
|
+
If not ``None``, specifies the version of the current code base.
|
|
1600
|
+
|
|
1119
1601
|
In this case, this version will be compared to the version of the file being read.
|
|
1120
|
-
If they do not match, read fails (either by returning default or throwing
|
|
1121
|
-
|
|
1602
|
+
If they do not match, read fails (either by returning default or throwing a :class:`cdxcore.version.VersionError` exception).
|
|
1603
|
+
|
|
1604
|
+
You can specify version ``"*"`` to accept any version.
|
|
1605
|
+
Note that this is distinct
|
|
1606
|
+
to using ``None`` which stipulates that the file should not
|
|
1607
|
+
have version information.
|
|
1608
|
+
|
|
1122
1609
|
delete_wrong_version : bool
|
|
1123
|
-
If True
|
|
1610
|
+
If ``True``, and if a wrong version was found, delete the file.
|
|
1611
|
+
|
|
1124
1612
|
ext : str
|
|
1125
|
-
Extension overwrite, or a list thereof if
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1613
|
+
Extension overwrite, or a list thereof if ``file`` is a list.
|
|
1614
|
+
|
|
1615
|
+
Use:
|
|
1616
|
+
|
|
1617
|
+
* ``None`` to use directory's default.
|
|
1618
|
+
* ``'*'`` to use the extension implied by ``fmt``.
|
|
1619
|
+
* ``""`` to turn of extension management.
|
|
1620
|
+
|
|
1621
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
1622
|
+
File :class:`cdxcore.subdir.Format` or ``None`` to use the directory's default.
|
|
1623
|
+
|
|
1624
|
+
Note:
|
|
1625
|
+
|
|
1626
|
+
* ``fmt`` cannot be a list even if ``file`` is.
|
|
1627
|
+
* Unless ``ext`` or the SubDir's extension is ``'*'``, changing the format does not automatically change the extension.
|
|
1134
1628
|
|
|
1135
1629
|
Returns
|
|
1136
1630
|
-------
|
|
1137
|
-
|
|
1138
|
-
|
|
1631
|
+
Content
|
|
1632
|
+
For a single ``file`` returns the content of the file if successfully read, or ``default`` otherwise.
|
|
1633
|
+
If ``file``` is a list: list of contents.
|
|
1634
|
+
|
|
1635
|
+
Raises
|
|
1636
|
+
------
|
|
1637
|
+
:class:`cdxcore.version.VersionError`:
|
|
1638
|
+
If the file's version did not match the ``version`` provided.
|
|
1639
|
+
|
|
1139
1640
|
"""
|
|
1140
|
-
return self._read(
|
|
1641
|
+
return self._read( file=file,
|
|
1141
1642
|
default=default,
|
|
1142
|
-
|
|
1643
|
+
raise_on_error=raise_on_error,
|
|
1143
1644
|
version=version,
|
|
1144
1645
|
ext=ext,
|
|
1145
1646
|
fmt=fmt,
|
|
1146
1647
|
delete_wrong_version=delete_wrong_version,
|
|
1147
1648
|
handle_version=SubDir.VER_NORMAL )
|
|
1148
1649
|
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
def is_version( self, key : str, version : str = None, raiseOnError : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
|
|
1650
|
+
def is_version( self, file : str, version : str = None, raise_on_error : bool = False, *, ext : str = None, fmt : Format = None, delete_wrong_version : bool = True ):
|
|
1152
1651
|
"""
|
|
1153
|
-
|
|
1652
|
+
Tests the version of a file.
|
|
1154
1653
|
|
|
1155
1654
|
Parameters
|
|
1156
1655
|
----------
|
|
1157
|
-
|
|
1158
|
-
A
|
|
1656
|
+
file : str
|
|
1657
|
+
A filename, or a list thereof.
|
|
1658
|
+
|
|
1159
1659
|
version : str
|
|
1160
|
-
Specifies the version
|
|
1161
|
-
|
|
1660
|
+
Specifies the version to compare the file's version with.
|
|
1661
|
+
|
|
1662
|
+
You can use ``"*"`` to match any version.
|
|
1162
1663
|
|
|
1163
|
-
|
|
1664
|
+
raise_on_error : bool
|
|
1164
1665
|
Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
|
|
1165
1666
|
By default this function fails silently and returns the default.
|
|
1667
|
+
|
|
1166
1668
|
delete_wrong_version : bool
|
|
1167
|
-
If True, and if a wrong version was found, delete
|
|
1669
|
+
If True, and if a wrong version was found, delete ``file``.
|
|
1670
|
+
|
|
1168
1671
|
ext : str
|
|
1169
|
-
Extension overwrite, or a list thereof if
|
|
1672
|
+
Extension overwrite, or a list thereof if file is a list.
|
|
1673
|
+
|
|
1170
1674
|
Set to:
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1675
|
+
|
|
1676
|
+
* ``None`` to use directory's default.
|
|
1677
|
+
* ``"*"`` to use the extension implied by ``fmt``.
|
|
1678
|
+
* ``""`` for no extension.
|
|
1679
|
+
|
|
1680
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
1681
|
+
File format or ``None`` to use the directory's default.
|
|
1682
|
+
Note that ``fmt`` cannot be a list even if ``file`` is.
|
|
1178
1683
|
|
|
1179
1684
|
Returns
|
|
1180
1685
|
-------
|
|
1181
|
-
|
|
1686
|
+
Status : bool
|
|
1687
|
+
Returns `True` only if the file exists, has version information, and its version is equal to ``version``.
|
|
1182
1688
|
"""
|
|
1183
|
-
return self._read(
|
|
1689
|
+
return self._read( file=file,default=False,raise_on_error=raise_on_error,version=version,ext=ext,fmt=fmt,delete_wrong_version=delete_wrong_version,handle_version=SubDir.VER_CHECK )
|
|
1184
1690
|
|
|
1185
|
-
def get_version( self,
|
|
1691
|
+
def get_version( self, file : str, raise_on_error : bool = False, *, ext : str = None, fmt : Format = None ):
|
|
1186
1692
|
"""
|
|
1187
|
-
Returns
|
|
1693
|
+
Returns a version stored in a file.
|
|
1694
|
+
|
|
1188
1695
|
This requires that the file has previously been saved with a version.
|
|
1189
|
-
Otherwise this function will
|
|
1696
|
+
Otherwise this function will have unpredictable results.
|
|
1190
1697
|
|
|
1191
1698
|
Parameters
|
|
1192
1699
|
----------
|
|
1193
|
-
|
|
1194
|
-
A
|
|
1195
|
-
|
|
1700
|
+
file : str
|
|
1701
|
+
A filename, or a list thereof.
|
|
1702
|
+
|
|
1703
|
+
raise_on_error : bool
|
|
1196
1704
|
Whether to raise an exception if accessing an existing file failed (e.g. if it is a directory).
|
|
1197
1705
|
By default this function fails silently and returns the default.
|
|
1706
|
+
|
|
1707
|
+
delete_wrong_version : bool
|
|
1708
|
+
If ``True``, and if a wrong version was found, delete ``file``.
|
|
1709
|
+
|
|
1198
1710
|
ext : str
|
|
1199
|
-
Extension overwrite, or a list thereof if
|
|
1711
|
+
Extension overwrite, or a list thereof if ``file`` is a list.
|
|
1712
|
+
|
|
1200
1713
|
Set to:
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1714
|
+
|
|
1715
|
+
* ``None`` to use directory's default.
|
|
1716
|
+
* ``"*"`` to use the extension implied by ``fmt``.
|
|
1717
|
+
* ``""`` for no extension.
|
|
1718
|
+
|
|
1719
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
1720
|
+
File format or ``None`` to use the directory's default.
|
|
1721
|
+
Note that ``fmt`` cannot be a list even if ``file`` is.
|
|
1208
1722
|
|
|
1209
1723
|
Returns
|
|
1210
1724
|
-------
|
|
1211
|
-
|
|
1725
|
+
version : str
|
|
1212
1726
|
"""
|
|
1213
|
-
return self._read(
|
|
1727
|
+
return self._read( file=file,default=None,raise_on_error=raise_on_error,version="",ext=ext,fmt=fmt,delete_wrong_version=False,handle_version=SubDir.VER_RETURN )
|
|
1214
1728
|
|
|
1215
|
-
def
|
|
1729
|
+
def read_string( self, file : str, default = None, raise_on_error : bool = False, *, ext : str = None ) -> str:
|
|
1216
1730
|
"""
|
|
1217
|
-
Reads text from
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
Returns the read string, or a list of strings if 'key' was iterable.
|
|
1222
|
-
If the current directory is 'None', then behaviour is as if the file did not exist.
|
|
1223
|
-
|
|
1224
|
-
Use 'ext' to specify the extension.
|
|
1225
|
-
You cannot use 'ext' to specify a format as the format is plain text.
|
|
1226
|
-
If 'ext' is '*' or if self._ext is '*' then the default extension is 'txt'.
|
|
1731
|
+
Reads text from a file. Removes trailing EOLs.
|
|
1732
|
+
|
|
1733
|
+
Returns the read string, or a list of strings if ``file`` was iterable.
|
|
1227
1734
|
"""
|
|
1228
1735
|
verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext)
|
|
1229
1736
|
ext = ext if not ext is None else self._ext
|
|
1230
1737
|
ext = ext if ext != self.EXT_FMT_AUTO else ".txt"
|
|
1231
1738
|
|
|
1232
|
-
def reader(
|
|
1233
|
-
with open(
|
|
1739
|
+
def reader( file, full_file_name, default ):
|
|
1740
|
+
with open(full_file_name,"rt",encoding="utf-8") as f:
|
|
1234
1741
|
line = f.readline()
|
|
1235
1742
|
if len(line) > 0 and line[-1] == '\n':
|
|
1236
1743
|
line = line[:-1]
|
|
1237
1744
|
return line
|
|
1238
|
-
return self._read_reader( reader=reader,
|
|
1745
|
+
return self._read_reader( reader=reader, file=file, default=default, raise_on_error=raise_on_error, ext=ext )
|
|
1239
1746
|
|
|
1240
1747
|
# -- write --
|
|
1241
1748
|
|
|
1242
|
-
def _write( self, writer,
|
|
1749
|
+
def _write( self, writer, file : str, obj, raise_on_error : bool, *, ext : str = None ) -> bool:
|
|
1243
1750
|
""" Utility function for write() and writeLine() """
|
|
1244
1751
|
if self._path is None:
|
|
1245
|
-
raise EOFError("Cannot write to '%s': current directory is not specified" %
|
|
1246
|
-
self.
|
|
1752
|
+
raise EOFError("Cannot write to '%s': current directory is not specified" % file)
|
|
1753
|
+
self.create_directory()
|
|
1247
1754
|
|
|
1248
1755
|
# vector version
|
|
1249
|
-
if not isinstance(
|
|
1250
|
-
if not isinstance(
|
|
1251
|
-
l = len(
|
|
1756
|
+
if not isinstance(file,str):
|
|
1757
|
+
if not isinstance(file, Collection): error( "'file' must be a string or an interable object. Found type %s", type(file), exception=ValueError)
|
|
1758
|
+
l = len(file)
|
|
1252
1759
|
if obj is None or isinstance(obj,str) or not isinstance(obj, Collection):
|
|
1253
1760
|
obj = [ obj ] * l
|
|
1254
1761
|
else:
|
|
1255
|
-
if len(obj) != l: error("'obj' must have same lengths as '
|
|
1762
|
+
if len(obj) != l: error("'obj' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(obj), l, exception=ValueError )
|
|
1256
1763
|
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1257
1764
|
ext = [ ext ] * l
|
|
1258
1765
|
else:
|
|
1259
|
-
if len(ext) != l: error("'ext' must have same lengths as '
|
|
1766
|
+
if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l, exception=ValueError )
|
|
1260
1767
|
ok = True
|
|
1261
|
-
for k,o,e in zip(
|
|
1262
|
-
ok |= self._write( writer, k, o,
|
|
1768
|
+
for k,o,e in zip(file,obj,ext):
|
|
1769
|
+
ok |= self._write( writer, k, o, raise_on_error=raise_on_error, ext=e )
|
|
1263
1770
|
return ok
|
|
1264
1771
|
|
|
1265
|
-
# single
|
|
1266
|
-
if not len(
|
|
1267
|
-
sub,
|
|
1268
|
-
if len(
|
|
1772
|
+
# single file
|
|
1773
|
+
if not len(file) > 0: error("'file is empty (the filename)" )
|
|
1774
|
+
sub, file = os.path.split(file)
|
|
1775
|
+
if len(file) == 0: error("'file '%s' refers to a directory, not a file", file)
|
|
1269
1776
|
if len(sub) > 0:
|
|
1270
|
-
return SubDir(sub,parent=self)._write(writer,
|
|
1777
|
+
return SubDir(sub,parent=self)._write(writer,file,obj, raise_on_error=raise_on_error,ext=ext )
|
|
1271
1778
|
|
|
1272
1779
|
# write to temp file, then rename into target file
|
|
1273
1780
|
# this reduces collision when i/o operations are slow
|
|
1274
|
-
|
|
1275
|
-
tmp_file =
|
|
1781
|
+
full_file_name = self.full_file_name(file,ext=ext)
|
|
1782
|
+
tmp_file = unique_hash48( [ file, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
|
|
1276
1783
|
tmp_i = 0
|
|
1277
|
-
fullTmpFile = self.
|
|
1784
|
+
fullTmpFile = self.full_file_name(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
|
|
1278
1785
|
while os.path.exists(fullTmpFile):
|
|
1279
|
-
fullTmpFile = self.
|
|
1786
|
+
fullTmpFile = self.full_file_name(tmp_file) + "." + str(tmp_i) + ".tmp"
|
|
1280
1787
|
tmp_i += 1
|
|
1281
1788
|
if tmp_i >= 10:
|
|
1282
|
-
raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % (
|
|
1789
|
+
raise RuntimeError("Failed to generate temporary file for writing '%s': too many temporary files found. For example, this file already exists: '%s'" % ( full_file_name, fullTmpFile ) )
|
|
1283
1790
|
|
|
1284
1791
|
# write
|
|
1285
|
-
if not writer(
|
|
1792
|
+
if not writer( file, fullTmpFile, obj ):
|
|
1286
1793
|
return False
|
|
1287
|
-
assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile,
|
|
1794
|
+
assert os.path.exists(fullTmpFile), ("Internal error: file does not exist ...?", fullTmpFile, full_file_name)
|
|
1288
1795
|
try:
|
|
1289
|
-
if os.path.exists(
|
|
1290
|
-
os.remove(
|
|
1291
|
-
os.rename(fullTmpFile,
|
|
1796
|
+
if os.path.exists(full_file_name):
|
|
1797
|
+
os.remove(full_file_name)
|
|
1798
|
+
os.rename(fullTmpFile, full_file_name)
|
|
1292
1799
|
except Exception as e:
|
|
1293
1800
|
os.remove(fullTmpFile)
|
|
1294
|
-
if
|
|
1801
|
+
if raise_on_error:
|
|
1295
1802
|
raise e
|
|
1296
1803
|
return False
|
|
1297
1804
|
return True
|
|
1298
1805
|
|
|
1299
|
-
def write( self,
|
|
1806
|
+
def write( self, file : str,
|
|
1300
1807
|
obj,
|
|
1301
|
-
|
|
1808
|
+
raise_on_error : bool = True,
|
|
1302
1809
|
*,
|
|
1303
1810
|
version : str = None,
|
|
1304
1811
|
ext : str = None,
|
|
1305
1812
|
fmt : Format = None ) -> bool:
|
|
1306
1813
|
"""
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
keys = ['file1', 'file2']
|
|
1314
|
-
|
|
1315
|
-
sd.write( keys, 1 )
|
|
1316
|
-
--> works, writes '1' in both files.
|
|
1317
|
-
|
|
1318
|
-
sd.read( keys, [1,2] )
|
|
1319
|
-
--> works, writes 1 and 2, respectively
|
|
1814
|
+
Writes an object to file.
|
|
1815
|
+
|
|
1816
|
+
* Supports ``file`` containing directories.
|
|
1817
|
+
* Supports ``file`` being a list.
|
|
1818
|
+
In this case, if ``obj`` is an iterable it is considered the list of values for the elements of ``file``.
|
|
1819
|
+
If ``obj`` is not iterable, it will be written into all files from ``file``::
|
|
1320
1820
|
|
|
1321
|
-
|
|
1322
|
-
--> works, writes '12' in both files
|
|
1821
|
+
from cdxcore.subdir import SubDir
|
|
1323
1822
|
|
|
1324
|
-
|
|
1325
|
-
|
|
1823
|
+
keys = ['file1', 'file2']
|
|
1824
|
+
sd = SubDir("!/test")
|
|
1825
|
+
sd.write( keys, 1 ) # works, writes '1' in both files.
|
|
1826
|
+
sd.write( keys, [1,2] ) # works, writes 1 and 2, respectively
|
|
1827
|
+
sd.write( keys, "12" ) # works, writes '12' in both files
|
|
1828
|
+
sd.write( keys, [1] ) # produces error as len(keys) != len(obj)
|
|
1326
1829
|
|
|
1327
|
-
If the current directory is
|
|
1830
|
+
If the current directory is ``None``, then the function raises an :class:`EOFError` exception.
|
|
1328
1831
|
|
|
1329
1832
|
Parameters
|
|
1330
1833
|
----------
|
|
1331
|
-
|
|
1332
|
-
Core filename
|
|
1834
|
+
file : str
|
|
1835
|
+
Core filename, or list thereof.
|
|
1836
|
+
|
|
1333
1837
|
obj :
|
|
1334
|
-
Object to write, or list thereof if
|
|
1335
|
-
|
|
1336
|
-
|
|
1838
|
+
Object to write, or list thereof if ``file`` is a list.
|
|
1839
|
+
|
|
1840
|
+
raise_on_error : bool
|
|
1841
|
+
If ``False``, this function will return ``False`` upon failure.
|
|
1842
|
+
|
|
1337
1843
|
version : str
|
|
1338
|
-
If not None
|
|
1844
|
+
If not ``None``, specifies the version of the code which generated ``obj``.
|
|
1339
1845
|
This version will be written to the beginning of the file.
|
|
1846
|
+
|
|
1340
1847
|
ext : str
|
|
1341
|
-
Extension, or list thereof if
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
fmt : Format
|
|
1347
|
-
File format or None to use the directory's default.
|
|
1348
|
-
Note that
|
|
1349
|
-
Note that unless
|
|
1848
|
+
Extension, or list thereof if ``file`` is a list.
|
|
1849
|
+
|
|
1850
|
+
* Use ``None`` to use directory's default extension.
|
|
1851
|
+
* Use ``"*"`` to use the extension implied by ``fmt``.
|
|
1852
|
+
|
|
1853
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
1854
|
+
File format or ``None`` to use the directory's default.
|
|
1855
|
+
Note that ``fmt`` cannot be a list even if ``file`` is.
|
|
1856
|
+
Note that unless ``ext`` or the SubDir's extension is '*',
|
|
1857
|
+
changing the format does not automatically change the extension used.
|
|
1350
1858
|
|
|
1351
1859
|
Returns
|
|
1352
1860
|
-------
|
|
1353
|
-
|
|
1861
|
+
Success : bool
|
|
1862
|
+
Boolean to indicate success if ``raise_on_error`` is ``False``.
|
|
1354
1863
|
"""
|
|
1355
|
-
ext, fmt = self.
|
|
1864
|
+
ext, fmt = self.auto_ext_fmt(ext=ext, fmt=fmt)
|
|
1356
1865
|
version = str(version) if not version is None else None
|
|
1357
1866
|
assert ext != self.EXT_FMT_AUTO, ("'ext' is '*'...?")
|
|
1358
1867
|
|
|
1359
1868
|
if version=='*': error("You cannot write version '*'. Use None to write a file without version.")
|
|
1869
|
+
|
|
1360
1870
|
if version is None and fmt in [Format.BLOSC, Format.GZIP]:
|
|
1361
|
-
|
|
1871
|
+
# blosc and gzip have unexpected side effects
|
|
1872
|
+
# a version is attempted to be read but is not present
|
|
1873
|
+
# (e.g. blosc causes a MemoryError)
|
|
1874
|
+
version = ""
|
|
1362
1875
|
|
|
1363
|
-
def writer(
|
|
1876
|
+
def writer( file, full_file_name, obj ):
|
|
1364
1877
|
try:
|
|
1365
|
-
if fmt == Format.PICKLE
|
|
1366
|
-
|
|
1878
|
+
if fmt == Format.PICKLE:
|
|
1879
|
+
# only if a version is provided write it into the file
|
|
1880
|
+
with open(full_file_name,"wb") as f:
|
|
1367
1881
|
# handle version as byte string
|
|
1368
1882
|
if not version is None:
|
|
1369
1883
|
version_ = bytearray(version, "utf-8")
|
|
@@ -1372,35 +1886,41 @@ class SubDir(object):
|
|
|
1372
1886
|
len8[0] = len(version_)
|
|
1373
1887
|
f.write(len8)
|
|
1374
1888
|
f.write(version_)
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1889
|
+
pickle.dump(obj,f,-1)
|
|
1890
|
+
|
|
1891
|
+
elif fmt == Format.BLOSC:
|
|
1892
|
+
# only if a version is provided write it into the file
|
|
1893
|
+
with open(full_file_name,"wb") as f:
|
|
1894
|
+
# handle version as byte string
|
|
1895
|
+
if not version is None: # it's never None
|
|
1896
|
+
version_ = bytearray(version, "utf-8")
|
|
1897
|
+
if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
|
|
1898
|
+
len8 = bytearray(1)
|
|
1899
|
+
len8[0] = len(version_)
|
|
1900
|
+
f.write(len8)
|
|
1901
|
+
f.write(version_)
|
|
1902
|
+
pdata = pickle.dumps(obj) # returns data as a bytes object
|
|
1903
|
+
del obj
|
|
1904
|
+
len_data = len(pdata)
|
|
1905
|
+
num_blocks = max(0,len_data-1) // BLOSC_MAX_USE + 1
|
|
1906
|
+
f.write(num_blocks.to_bytes(2, 'big', signed=False))
|
|
1907
|
+
for i in range(num_blocks):
|
|
1908
|
+
start = i*BLOSC_MAX_USE
|
|
1909
|
+
end = min(len_data,start+BLOSC_MAX_USE)
|
|
1910
|
+
assert end>start, ("Internal error; nothing to write")
|
|
1911
|
+
block = blosc.compress( pdata[start:end] )
|
|
1912
|
+
blockl = len(block)
|
|
1913
|
+
f.write( blockl.to_bytes(6, 'big', signed=False) )
|
|
1914
|
+
if blockl > 0:
|
|
1915
|
+
f.write( block )
|
|
1916
|
+
del block
|
|
1917
|
+
del pdata
|
|
1397
1918
|
|
|
1398
1919
|
elif fmt == Format.GZIP:
|
|
1399
|
-
if
|
|
1400
|
-
|
|
1401
|
-
with gzip.open(fullFileName,"wb") as f:
|
|
1920
|
+
# only if a version is provided write it into the file
|
|
1921
|
+
with gzip.open(full_file_name,"wb") as f:
|
|
1402
1922
|
# handle version as byte string
|
|
1403
|
-
if not version is None:
|
|
1923
|
+
if not version is None: # it's never None
|
|
1404
1924
|
version_ = bytearray(version, "utf-8")
|
|
1405
1925
|
if len(version_) > 255: error("Version '%s' is way too long: its byte encoding has length %ld which does not fit into a byte", version, len(version_))
|
|
1406
1926
|
len8 = bytearray(1)
|
|
@@ -1410,12 +1930,11 @@ class SubDir(object):
|
|
|
1410
1930
|
pickle.dump(obj,f,-1)
|
|
1411
1931
|
|
|
1412
1932
|
elif fmt in [Format.JSON_PLAIN, Format.JSON_PICKLE]:
|
|
1413
|
-
|
|
1933
|
+
# only if a version is provided write it into the file
|
|
1934
|
+
with open(full_file_name,"wt",encoding="utf-8") as f:
|
|
1414
1935
|
if not version is None:
|
|
1415
1936
|
f.write("# " + version + "\n")
|
|
1416
1937
|
if fmt == Format.JSON_PICKLE:
|
|
1417
|
-
if jsonpickle is None:
|
|
1418
|
-
raise ModuleNotFoundError("jsonpickle", "'jsonpickle' not found")
|
|
1419
1938
|
f.write( jsonpickle.encode(obj) )
|
|
1420
1939
|
else:
|
|
1421
1940
|
assert fmt == Format.JSON_PLAIN, ("Internal error: invalid Format", fmt)
|
|
@@ -1424,27 +1943,21 @@ class SubDir(object):
|
|
|
1424
1943
|
else:
|
|
1425
1944
|
raise NotImplementedError(fmt, txtfmt("Internal error: invalid format '%s'", fmt))
|
|
1426
1945
|
except Exception as e:
|
|
1427
|
-
if
|
|
1946
|
+
if raise_on_error:
|
|
1428
1947
|
raise e
|
|
1429
1948
|
return False
|
|
1430
1949
|
return True
|
|
1431
|
-
return self._write( writer=writer,
|
|
1950
|
+
return self._write( writer=writer, file=file, obj=obj, raise_on_error=raise_on_error, ext=ext )
|
|
1432
1951
|
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
def writeString( self, key : str, line : str, raiseOnError : bool = True, *, ext : str = None ) -> bool:
|
|
1952
|
+
def write_string( self, file : str, line : str, raise_on_error : bool = True, *, ext : str = None ) -> bool:
|
|
1436
1953
|
"""
|
|
1437
|
-
Writes
|
|
1438
|
-
-- Supports 'key' containing directories
|
|
1439
|
-
-- Supports 'key' being a list.
|
|
1440
|
-
In this case, line can either be the same value for all key's or a list, too.
|
|
1441
|
-
|
|
1442
|
-
If the current directory is 'None', then the function throws an EOFError exception
|
|
1443
|
-
See additional comments for write()
|
|
1954
|
+
Writes a line of text into a file.
|
|
1444
1955
|
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1956
|
+
* Supports ``file``` containing directories.
|
|
1957
|
+
* Supports ``file``` being a list.
|
|
1958
|
+
In this case, ``line`` can either be the same value for all file's or a list, too.
|
|
1959
|
+
|
|
1960
|
+
If the current directory is ``None``, then the function throws an EOFError exception
|
|
1448
1961
|
"""
|
|
1449
1962
|
verify( not isinstance(ext, Format), "Cannot change format when writing strings. Found extension '%s'", ext, exception=ValueError )
|
|
1450
1963
|
ext = ext if not ext is None else self._ext
|
|
@@ -1452,38 +1965,37 @@ class SubDir(object):
|
|
|
1452
1965
|
|
|
1453
1966
|
if len(line) == 0 or line[-1] != '\n':
|
|
1454
1967
|
line += '\n'
|
|
1455
|
-
def writer(
|
|
1968
|
+
def writer( file, full_file_name, obj ):
|
|
1456
1969
|
try:
|
|
1457
|
-
with open(
|
|
1970
|
+
with open(full_file_name,"wt",encoding="utf-8") as f:
|
|
1458
1971
|
f.write(obj)
|
|
1459
1972
|
except Exception as e:
|
|
1460
|
-
if
|
|
1973
|
+
if raise_on_error:
|
|
1461
1974
|
raise e
|
|
1462
1975
|
return False
|
|
1463
1976
|
return True
|
|
1464
|
-
return self._write( writer=writer,
|
|
1977
|
+
return self._write( writer=writer, file=file, obj=line, raise_on_error=raise_on_error, ext=ext )
|
|
1465
1978
|
|
|
1466
1979
|
# -- iterate --
|
|
1467
1980
|
|
|
1468
1981
|
def files(self, *, ext : str = None) -> list:
|
|
1469
1982
|
"""
|
|
1470
|
-
Returns a list of
|
|
1983
|
+
Returns a list of files in this subdirectory with the current extension, or the specified extension.
|
|
1471
1984
|
|
|
1472
1985
|
In other words, if the extension is ".pck", and the files are "file1.pck", "file2.pck", "file3.bin"
|
|
1473
1986
|
then this function will return [ "file1", "file2" ]
|
|
1474
1987
|
|
|
1475
|
-
If
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
This function ignores directories. Use subDirs() to retrieve those.
|
|
1988
|
+
If ``ext`` is:
|
|
1989
|
+
|
|
1990
|
+
* ``None``, then the directory's default extension will be used.
|
|
1991
|
+
* ``""`` then this function will return all files in this directory.
|
|
1992
|
+
* ``"*"`` then the extension corresponding to the current format will be used.
|
|
1481
1993
|
|
|
1482
|
-
|
|
1994
|
+
This function ignores directories. Use :meth:`cdxcore.subdir.SubDir.sub_dirs` to retrieve those.
|
|
1483
1995
|
"""
|
|
1484
|
-
if not self.
|
|
1996
|
+
if not self.path_exists():
|
|
1485
1997
|
return []
|
|
1486
|
-
ext = self.
|
|
1998
|
+
ext = self.auto_ext( ext )
|
|
1487
1999
|
ext_l = len(ext)
|
|
1488
2000
|
keys = []
|
|
1489
2001
|
with os.scandir(self._path) as it:
|
|
@@ -1497,15 +2009,15 @@ class SubDir(object):
|
|
|
1497
2009
|
else:
|
|
1498
2010
|
keys.append( entry.name )
|
|
1499
2011
|
return keys
|
|
1500
|
-
keys = files
|
|
1501
2012
|
|
|
1502
|
-
def
|
|
2013
|
+
def sub_dirs(self) -> list:
|
|
1503
2014
|
"""
|
|
1504
|
-
|
|
1505
|
-
|
|
2015
|
+
Retrieve a list of all sub directories.
|
|
2016
|
+
|
|
2017
|
+
If ``self`` does not refer to an existing directory, then this function returns an empty list.
|
|
1506
2018
|
"""
|
|
1507
2019
|
# do not do anything if the object was deleted
|
|
1508
|
-
if not self.
|
|
2020
|
+
if not self.path_exists():
|
|
1509
2021
|
return []
|
|
1510
2022
|
subdirs = []
|
|
1511
2023
|
with os.scandir(self._path[:-1]) as it:
|
|
@@ -1517,322 +2029,345 @@ class SubDir(object):
|
|
|
1517
2029
|
|
|
1518
2030
|
# -- delete --
|
|
1519
2031
|
|
|
1520
|
-
def delete( self,
|
|
2032
|
+
def delete( self, file : str, raise_on_error: bool = False, *, ext : str = None ):
|
|
1521
2033
|
"""
|
|
1522
|
-
Deletes
|
|
2034
|
+
Deletes ``file``.
|
|
2035
|
+
|
|
2036
|
+
This function will quietly fail if ``file`` does not exist unless ``raise_on_error``
|
|
2037
|
+
is set to ``True``.
|
|
1523
2038
|
|
|
1524
2039
|
Parameters
|
|
1525
2040
|
----------
|
|
1526
|
-
|
|
2041
|
+
file :
|
|
1527
2042
|
filename, or list of filenames
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
2043
|
+
|
|
2044
|
+
raise_on_error : bool
|
|
2045
|
+
If ``False``, do not throw :class:`KeyError` if file does not exist
|
|
2046
|
+
or another error occurs.
|
|
2047
|
+
|
|
2048
|
+
ext : str
|
|
2049
|
+
Extension, or list thereof if ``file`` is a list.
|
|
2050
|
+
|
|
1532
2051
|
Use
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
2052
|
+
|
|
2053
|
+
* ``None`` for the directory default.
|
|
2054
|
+
* ``""`` to not use an automatic extension.
|
|
2055
|
+
* ``"*"`` to use the extension associated with the format of the directory.
|
|
1536
2056
|
"""
|
|
1537
2057
|
# do not do anything if the object was deleted
|
|
1538
2058
|
if self._path is None:
|
|
1539
|
-
if
|
|
2059
|
+
if raise_on_error: raise EOFError("Cannot delete '%s': current directory not specified" % file)
|
|
1540
2060
|
return
|
|
1541
2061
|
|
|
1542
2062
|
# vector version
|
|
1543
|
-
if not isinstance(
|
|
1544
|
-
if not isinstance(
|
|
1545
|
-
l = len(
|
|
2063
|
+
if not isinstance(file,str):
|
|
2064
|
+
if not isinstance(file, Collection): error( "'file' must be a string or an interable object. Found type %s", type(file))
|
|
2065
|
+
l = len(file)
|
|
1546
2066
|
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1547
2067
|
ext = [ ext ] * l
|
|
1548
2068
|
else:
|
|
1549
|
-
if len(ext) != l: error("'ext' must have same lengths as '
|
|
1550
|
-
for k, e in zip(
|
|
1551
|
-
self.delete(k,
|
|
2069
|
+
if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
2070
|
+
for k, e in zip(file,ext):
|
|
2071
|
+
self.delete(k, raise_on_error=raise_on_error, ext=e)
|
|
1552
2072
|
return
|
|
1553
2073
|
|
|
1554
|
-
# handle directories in '
|
|
1555
|
-
if len(
|
|
1556
|
-
sub, key_ = os.path.split(
|
|
1557
|
-
if len(key_) == 0: error("'
|
|
1558
|
-
if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,
|
|
2074
|
+
# handle directories in 'file'
|
|
2075
|
+
if len(file) == 0: error( "'file' is empty" )
|
|
2076
|
+
sub, key_ = os.path.split(file)
|
|
2077
|
+
if len(key_) == 0: error("'file' %s indicates a directory, not a file", file)
|
|
2078
|
+
if len(sub) > 0: return SubDir(sub,parent=self).delete(key_,raise_on_error=raise_on_error,ext=ext)
|
|
1559
2079
|
# don't try if directory doesn't existy
|
|
1560
|
-
if not self.
|
|
1561
|
-
if
|
|
1562
|
-
raise KeyError(
|
|
2080
|
+
if not self.path_exists():
|
|
2081
|
+
if raise_on_error:
|
|
2082
|
+
raise KeyError(file)
|
|
1563
2083
|
return
|
|
1564
|
-
|
|
1565
|
-
if not os.path.exists(
|
|
1566
|
-
if
|
|
1567
|
-
raise KeyError(
|
|
2084
|
+
full_file_name = self.full_file_name(file, ext=ext)
|
|
2085
|
+
if not os.path.exists(full_file_name):
|
|
2086
|
+
if raise_on_error:
|
|
2087
|
+
raise KeyError(file)
|
|
1568
2088
|
else:
|
|
1569
|
-
os.remove(
|
|
2089
|
+
os.remove(full_file_name)
|
|
1570
2090
|
|
|
1571
|
-
def
|
|
2091
|
+
def delete_all_files( self, raise_on_error : bool = False, *, ext : str = None ):
|
|
1572
2092
|
"""
|
|
1573
2093
|
Deletes all valid keys in this sub directory with the correct extension.
|
|
1574
2094
|
|
|
1575
2095
|
Parameters
|
|
1576
2096
|
----------
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
-- A Format to specify the default extension for that format.
|
|
2097
|
+
raise_on_error : bool
|
|
2098
|
+
Set to ``False`` to quietly ignore errors.
|
|
2099
|
+
|
|
2100
|
+
ext : str
|
|
2101
|
+
Extension to be used:
|
|
2102
|
+
|
|
2103
|
+
* ``None`` for the directory default.
|
|
2104
|
+
* ``""`` to not use an automatic extension.
|
|
2105
|
+
* ``"*"`` to use the extension associated with the format of the directory.
|
|
1587
2106
|
"""
|
|
1588
2107
|
if self._path is None:
|
|
1589
|
-
if
|
|
2108
|
+
if raise_on_error: raise EOFError("Cannot delete all files: current directory not specified")
|
|
1590
2109
|
return
|
|
1591
|
-
if not self.
|
|
2110
|
+
if not self.path_exists():
|
|
1592
2111
|
return
|
|
1593
|
-
self.delete( self.
|
|
2112
|
+
self.delete( self.files(ext=ext), raise_on_error=raise_on_error, ext=ext )
|
|
1594
2113
|
|
|
1595
|
-
def
|
|
2114
|
+
def delete_all_content( self, delete_self : bool = False, raise_on_error : bool = False, *, ext : str = None ):
|
|
1596
2115
|
"""
|
|
1597
2116
|
Deletes all valid keys and subdirectories in this sub directory.
|
|
2117
|
+
|
|
1598
2118
|
Does not delete files with other extensions.
|
|
1599
|
-
Use
|
|
2119
|
+
Use :meth:`cdxcore.subdir.SubDir.delete_everything` if the aim is to delete, well, everything.
|
|
1600
2120
|
|
|
1601
2121
|
Parameters
|
|
1602
2122
|
----------
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
False for silent failure
|
|
1607
|
-
ext:
|
|
1608
|
-
Extension for keys, or None for the directory's default.
|
|
1609
|
-
|
|
1610
|
-
Use "" to match all files regardless of extension.
|
|
2123
|
+
delete_self: bool
|
|
2124
|
+
Whether to delete the directory itself as well, or only its contents.
|
|
2125
|
+
raise_on_error: bool
|
|
2126
|
+
``False`` for silent failure
|
|
2127
|
+
ext: str
|
|
2128
|
+
Extension for keys, or ``None`` for the directory's default.
|
|
2129
|
+
Use ``""`` to match all files regardless of extension.
|
|
1611
2130
|
"""
|
|
1612
2131
|
# do not do anything if the object was deleted
|
|
1613
2132
|
if self._path is None:
|
|
1614
|
-
if
|
|
2133
|
+
if raise_on_error: raise EOFError("Cannot delete all contents: current directory not specified")
|
|
1615
2134
|
return
|
|
1616
|
-
if not self.
|
|
2135
|
+
if not self.path_exists():
|
|
1617
2136
|
return
|
|
1618
2137
|
# delete sub directories
|
|
1619
|
-
subdirs = self.
|
|
2138
|
+
subdirs = self.sub_dirs();
|
|
1620
2139
|
for subdir in subdirs:
|
|
1621
|
-
SubDir(subdir, parent=self).
|
|
2140
|
+
SubDir(subdir, parent=self).delete_all_content( delete_self=True, raise_on_error=raise_on_error, ext=ext )
|
|
1622
2141
|
# delete keys
|
|
1623
|
-
self.
|
|
2142
|
+
self.delete_all_files( raise_on_error=raise_on_error,ext=ext )
|
|
1624
2143
|
# delete myself
|
|
1625
|
-
if not
|
|
2144
|
+
if not delete_self:
|
|
1626
2145
|
return
|
|
1627
2146
|
rest = list( os.scandir(self._path[:-1]) )
|
|
1628
2147
|
txt = str(rest)
|
|
1629
2148
|
txt = txt if len(txt) < 50 else (txt[:47] + '...')
|
|
1630
2149
|
if len(rest) > 0:
|
|
1631
|
-
if
|
|
2150
|
+
if raise_on_error: error( "Cannot delete my own directory %s: directory not empty: found %ld object(s): %s", self._path,len(rest), txt)
|
|
1632
2151
|
return
|
|
1633
2152
|
os.rmdir(self._path[:-1]) ## does not work ????
|
|
1634
2153
|
self._path = None
|
|
1635
2154
|
|
|
1636
|
-
def
|
|
2155
|
+
def delete_everything( self, keep_directory : bool = True ):
|
|
1637
2156
|
"""
|
|
1638
|
-
Deletes the entire sub directory will all contents
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
If
|
|
1642
|
-
|
|
1643
|
-
If self is None, do nothing. That means you can call this function several times.
|
|
2157
|
+
Deletes the entire sub directory will all contents.
|
|
2158
|
+
|
|
2159
|
+
*WARNING:* deletes *all* files and sub-directories, not just those with the present extension.
|
|
2160
|
+
If ``keep_directory`` is ``False``, the directory referred to by this object will also be deleted.
|
|
2161
|
+
In this case, ``self`` will be set to ``None``.
|
|
1644
2162
|
"""
|
|
1645
2163
|
if self._path is None:
|
|
1646
2164
|
return
|
|
1647
|
-
if not self.
|
|
2165
|
+
if not self.path_exists():
|
|
1648
2166
|
return
|
|
1649
2167
|
shutil.rmtree(self._path[:-1], ignore_errors=True)
|
|
1650
|
-
if not
|
|
2168
|
+
if not keep_directory and os.path.exists(self._path[:-1]):
|
|
1651
2169
|
os.rmdir(self._path[:-1])
|
|
1652
2170
|
self._path = None
|
|
1653
|
-
elif
|
|
2171
|
+
elif keep_directory and not os.path.exists(self._path[:-1]):
|
|
1654
2172
|
os.makedirs(self._path[:-1])
|
|
1655
2173
|
|
|
1656
2174
|
# -- file ops --
|
|
1657
2175
|
|
|
1658
|
-
def exists(self,
|
|
2176
|
+
def exists(self, file : str, *, ext : str = None ) -> bool:
|
|
1659
2177
|
"""
|
|
1660
|
-
Checks whether
|
|
2178
|
+
Checks whether a file exists.
|
|
1661
2179
|
|
|
1662
2180
|
Parameters
|
|
1663
2181
|
----------
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
2182
|
+
file :
|
|
2183
|
+
Filename, or list of filenames.
|
|
2184
|
+
|
|
2185
|
+
ext : str
|
|
2186
|
+
Extension, or list thereof if ``file`` is a list.
|
|
2187
|
+
|
|
1668
2188
|
Use
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
2189
|
+
|
|
2190
|
+
* ``None`` for the directory default.
|
|
2191
|
+
* ``""`` to not use an automatic extension.
|
|
2192
|
+
* ``"*"`` to use the extension associated with the format of the directory.
|
|
1672
2193
|
|
|
1673
2194
|
Returns
|
|
1674
2195
|
-------
|
|
1675
|
-
|
|
2196
|
+
Status : bool
|
|
2197
|
+
If ``file`` is a string, returns ``True`` or ``False``, else it will return a list of ``bool`` values.
|
|
1676
2198
|
"""
|
|
1677
2199
|
# vector version
|
|
1678
|
-
if not isinstance(
|
|
1679
|
-
verify( isinstance(
|
|
1680
|
-
l = len(
|
|
2200
|
+
if not isinstance(file,str):
|
|
2201
|
+
verify( isinstance(file, Collection), "'file' must be a string or an interable object. Found type %s", type(file))
|
|
2202
|
+
l = len(file)
|
|
1681
2203
|
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1682
2204
|
ext = [ ext ] * l
|
|
1683
2205
|
else:
|
|
1684
|
-
if len(ext) != l: error("'ext' must have same lengths as '
|
|
1685
|
-
return [ self.exists(k,ext=e) for k,e in zip(
|
|
2206
|
+
if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
2207
|
+
return [ self.exists(k,ext=e) for k,e in zip(file,ext) ]
|
|
1686
2208
|
# empty directory
|
|
1687
2209
|
if self._path is None:
|
|
1688
2210
|
return False
|
|
1689
|
-
# handle directories in '
|
|
1690
|
-
if len(
|
|
1691
|
-
sub, key_ = os.path.split(
|
|
1692
|
-
if len(key_) == 0: raise IsADirectoryError(
|
|
2211
|
+
# handle directories in 'file'
|
|
2212
|
+
if len(file) == 0: raise ValueError("'file' missing (the filename)")
|
|
2213
|
+
sub, key_ = os.path.split(file)
|
|
2214
|
+
if len(key_) == 0: raise IsADirectoryError( file, txtfmt("'file' %s indicates a directory, not a file", file) )
|
|
1693
2215
|
if len(sub) > 0:
|
|
1694
|
-
return self(sub).exists(
|
|
2216
|
+
return self(sub).exists(file=key_,ext=ext)
|
|
1695
2217
|
# if directory doesn't exit
|
|
1696
|
-
if not self.
|
|
2218
|
+
if not self.path_exists():
|
|
1697
2219
|
return False
|
|
1698
|
-
# single
|
|
1699
|
-
|
|
1700
|
-
if not os.path.exists(
|
|
2220
|
+
# single file
|
|
2221
|
+
full_file_name = self.full_file_name(file, ext=ext)
|
|
2222
|
+
if not os.path.exists(full_file_name):
|
|
1701
2223
|
return False
|
|
1702
|
-
if not os.path.isfile(
|
|
1703
|
-
raise IsADirectoryError("Structural error:
|
|
2224
|
+
if not os.path.isfile(full_file_name):
|
|
2225
|
+
raise IsADirectoryError("Structural error: file %s: exists, but is not a file (full path %s)",file,full_file_name)
|
|
1704
2226
|
return True
|
|
1705
2227
|
|
|
1706
|
-
def _getFileProperty( self, *,
|
|
2228
|
+
def _getFileProperty( self, *, file : str, ext : str, func ):
|
|
1707
2229
|
# vector version
|
|
1708
|
-
if not isinstance(
|
|
1709
|
-
verify( isinstance(
|
|
1710
|
-
l = len(
|
|
2230
|
+
if not isinstance(file,str):
|
|
2231
|
+
verify( isinstance(file, Collection), "'file' must be a string or an interable object. Found type %s", type(file))
|
|
2232
|
+
l = len(file)
|
|
1711
2233
|
if ext is None or isinstance(ext,str) or not isinstance(ext, Collection):
|
|
1712
2234
|
ext = [ ext ] * l
|
|
1713
2235
|
else:
|
|
1714
|
-
if len(ext) != l: error("'ext' must have same lengths as '
|
|
1715
|
-
return [ self._getFileProperty(
|
|
2236
|
+
if len(ext) != l: error("'ext' must have same lengths as 'file' if the latter is a collection; found %ld and %ld", len(ext), l )
|
|
2237
|
+
return [ self._getFileProperty(file=k,ext=e,func=func) for k,e in zip(file,ext) ]
|
|
1716
2238
|
# empty directory
|
|
1717
2239
|
if self._path is None:
|
|
1718
2240
|
return None
|
|
1719
|
-
# handle directories in '
|
|
1720
|
-
if len(
|
|
1721
|
-
sub, key_ = os.path.split(
|
|
1722
|
-
if len(key_) == 0: raise IsADirectoryError(
|
|
1723
|
-
if len(sub) > 0: return self(sub)._getFileProperty(
|
|
2241
|
+
# handle directories in 'file'
|
|
2242
|
+
if len(file) == 0: raise ValueError("'file' missing (the filename)")
|
|
2243
|
+
sub, key_ = os.path.split(file)
|
|
2244
|
+
if len(key_) == 0: raise IsADirectoryError( file, txtfmt("'file' %s indicates a directory, not a file", file) )
|
|
2245
|
+
if len(sub) > 0: return self(sub)._getFileProperty(file=key_,ext=ext,func=func)
|
|
1724
2246
|
# if directory doesn't exit
|
|
1725
|
-
if not self.
|
|
2247
|
+
if not self.path_exists():
|
|
1726
2248
|
return None
|
|
1727
|
-
# single
|
|
1728
|
-
|
|
1729
|
-
if not os.path.exists(
|
|
2249
|
+
# single file
|
|
2250
|
+
full_file_name = self.full_file_name(file, ext=ext)
|
|
2251
|
+
if not os.path.exists(full_file_name):
|
|
1730
2252
|
return None
|
|
1731
|
-
return func(
|
|
2253
|
+
return func(full_file_name)
|
|
1732
2254
|
|
|
1733
|
-
def
|
|
2255
|
+
def get_creation_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
|
|
1734
2256
|
"""
|
|
1735
|
-
Returns the creation time of
|
|
1736
|
-
|
|
2257
|
+
Returns the creation time of a file.
|
|
2258
|
+
|
|
2259
|
+
See comments on :func:`os.path.getctime` for system compatibility information.
|
|
1737
2260
|
|
|
1738
2261
|
Parameters
|
|
1739
2262
|
----------
|
|
1740
|
-
|
|
1741
|
-
filename, or list of filenames
|
|
2263
|
+
file :
|
|
2264
|
+
filename, or list of filenames.
|
|
1742
2265
|
ext :
|
|
1743
|
-
Extension, or list thereof if
|
|
1744
|
-
Use
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
2266
|
+
Extension, or list thereof if ``file`` is an extension.
|
|
2267
|
+
Use:
|
|
2268
|
+
|
|
2269
|
+
* ``None`` for the directory default.
|
|
2270
|
+
* ``""`` for no automatic extension.
|
|
2271
|
+
* A :class:`cdxcore.subdir.Format` to use the default extension for that format.
|
|
1748
2272
|
|
|
1749
2273
|
Returns
|
|
1750
2274
|
-------
|
|
1751
|
-
datetime.datetime
|
|
2275
|
+
Datetime : :class:`datetime.datetime`
|
|
2276
|
+
A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
|
|
2277
|
+
Returns ``None`` if an error occured.
|
|
1752
2278
|
"""
|
|
1753
|
-
return self._getFileProperty(
|
|
2279
|
+
return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getctime(x)) )
|
|
1754
2280
|
|
|
1755
|
-
def
|
|
2281
|
+
def get_last_modification_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
|
|
1756
2282
|
"""
|
|
1757
|
-
Returns the last modification time
|
|
1758
|
-
|
|
2283
|
+
Returns the last modification time a file.
|
|
2284
|
+
|
|
2285
|
+
See comments on :func:`os.path.getmtime` for system compatibility information.
|
|
1759
2286
|
|
|
1760
2287
|
Parameters
|
|
1761
2288
|
----------
|
|
1762
|
-
|
|
1763
|
-
filename, or list of filenames
|
|
2289
|
+
file :
|
|
2290
|
+
filename, or list of filenames.
|
|
1764
2291
|
ext :
|
|
1765
|
-
Extension, or list thereof if
|
|
1766
|
-
Use
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
2292
|
+
Extension, or list thereof if ``file`` is an extension.
|
|
2293
|
+
Use:
|
|
2294
|
+
|
|
2295
|
+
* ``None`` for the directory default.
|
|
2296
|
+
* ``""`` for no automatic extension.
|
|
2297
|
+
* A :class:`cdxcore.subdir.Format` to use the default extension for that format.
|
|
1770
2298
|
|
|
1771
2299
|
Returns
|
|
1772
2300
|
-------
|
|
1773
|
-
datetime.datetime
|
|
2301
|
+
Datetime : :class:`datetime.datetime`
|
|
2302
|
+
A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
|
|
2303
|
+
Returns ``None`` if an error occured.
|
|
1774
2304
|
"""
|
|
1775
|
-
return self._getFileProperty(
|
|
2305
|
+
return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getmtime(x)) )
|
|
1776
2306
|
|
|
1777
|
-
def
|
|
2307
|
+
def get_last_access_time( self, file : str, *, ext : str = None ) -> datetime.datetime:
|
|
1778
2308
|
"""
|
|
1779
|
-
Returns the last access time of
|
|
1780
|
-
|
|
2309
|
+
Returns the last access time of a file.
|
|
2310
|
+
|
|
2311
|
+
See comments on :func:`os.path.getatime` for system compatibility information.
|
|
1781
2312
|
|
|
1782
2313
|
Parameters
|
|
1783
2314
|
----------
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
2315
|
+
file : str
|
|
2316
|
+
Filename, or list of filenames.
|
|
2317
|
+
|
|
2318
|
+
ext : str
|
|
2319
|
+
Extension, or list thereof if ``file`` is an extension.
|
|
2320
|
+
|
|
2321
|
+
* Use ``None`` for the directory default.
|
|
2322
|
+
* Use ``""`` for no automatic extension.
|
|
1792
2323
|
|
|
1793
2324
|
Returns
|
|
1794
2325
|
-------
|
|
1795
|
-
datetime.datetime
|
|
2326
|
+
Datetime : :class:`datetime.datetime`
|
|
2327
|
+
A single ``datetime`` if ``file`` is a string, otherwise a list of ``datetime``'s.
|
|
2328
|
+
Returns ``None`` if an error occured.
|
|
1796
2329
|
"""
|
|
1797
|
-
return self._getFileProperty(
|
|
2330
|
+
return self._getFileProperty( file=file, ext=ext, func=lambda x : datetime.datetime.fromtimestamp(os.path.getatime(x)) )
|
|
1798
2331
|
|
|
1799
|
-
def
|
|
2332
|
+
def file_size( self, file : str, *, ext : str = None ) -> int:
|
|
1800
2333
|
"""
|
|
1801
|
-
Returns the file size of
|
|
1802
|
-
|
|
2334
|
+
Returns the file size of a file.
|
|
2335
|
+
|
|
2336
|
+
See comments on :func:`os.path.getatime` for system compatibility information.
|
|
1803
2337
|
|
|
1804
2338
|
Parameters
|
|
1805
2339
|
----------
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
2340
|
+
file : str
|
|
2341
|
+
Filename, or list of filenames.
|
|
2342
|
+
|
|
2343
|
+
ext : str
|
|
2344
|
+
Extension, or list thereof if ``file`` is an extension.
|
|
2345
|
+
|
|
2346
|
+
* Use ``None`` for the directory default.
|
|
2347
|
+
* Use ``""`` for no automatic extension.
|
|
1814
2348
|
|
|
1815
2349
|
Returns
|
|
1816
2350
|
-------
|
|
1817
|
-
File size if
|
|
2351
|
+
File size if ``file``, or ``None`` if an error occured.
|
|
1818
2352
|
"""
|
|
1819
|
-
return self._getFileProperty(
|
|
2353
|
+
return self._getFileProperty( file=file, ext=ext, func=lambda x : os.path.getsize(x) )
|
|
1820
2354
|
|
|
1821
2355
|
def rename( self, source : str, target : str, *, ext : str = None ):
|
|
1822
2356
|
"""
|
|
1823
|
-
Rename
|
|
1824
|
-
|
|
2357
|
+
Rename a file.
|
|
2358
|
+
|
|
2359
|
+
This function will raise an exception if not successful.
|
|
1825
2360
|
|
|
1826
2361
|
Parameters
|
|
1827
2362
|
----------
|
|
1828
|
-
source, target:
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
2363
|
+
source, target : str
|
|
2364
|
+
Filenames.
|
|
2365
|
+
|
|
2366
|
+
ext : str
|
|
2367
|
+
Extension.
|
|
2368
|
+
|
|
2369
|
+
* Use ``None`` for the directory default.
|
|
2370
|
+
* Use ``""`` for no automatic extension.
|
|
1836
2371
|
"""
|
|
1837
2372
|
# empty directory
|
|
1838
2373
|
if self._path is None:
|
|
@@ -1843,9 +2378,9 @@ class SubDir(object):
|
|
|
1843
2378
|
sub, source_ = os.path.split(source)
|
|
1844
2379
|
if len(source_) == 0: raise IsADirectoryError( source, txtfmt("'source' %s indicates a directory, not a file", source ))
|
|
1845
2380
|
if len(sub) > 0:
|
|
1846
|
-
src_full = self(sub).
|
|
2381
|
+
src_full = self(sub).full_file_name(file=source_,ext=ext)
|
|
1847
2382
|
else:
|
|
1848
|
-
src_full = self.
|
|
2383
|
+
src_full = self.full_file_name( source, ext=ext )
|
|
1849
2384
|
|
|
1850
2385
|
# handle directories in 'target'
|
|
1851
2386
|
if len(target) == 0: raise ValueError("'target' missing (the filename)" )
|
|
@@ -1853,191 +2388,257 @@ class SubDir(object):
|
|
|
1853
2388
|
if len(target_) == 0: raise IsADirectoryError( target, txtfmt("'target' %s indicates a directory, not a file", target))
|
|
1854
2389
|
if len(sub) > 0:
|
|
1855
2390
|
tar_dir = self(sub)
|
|
1856
|
-
tar_dir.
|
|
1857
|
-
tar_full = tar_dir.
|
|
2391
|
+
tar_dir.create_directory()
|
|
2392
|
+
tar_full = tar_dir.full_file_name(file=target_,ext=ext)
|
|
1858
2393
|
else:
|
|
1859
|
-
tar_full = self.
|
|
1860
|
-
self.
|
|
2394
|
+
tar_full = self.full_file_name( target, ext=ext )
|
|
2395
|
+
self.create_directory()
|
|
1861
2396
|
|
|
1862
2397
|
os.rename(src_full, tar_full)
|
|
1863
2398
|
|
|
1864
2399
|
# utilities
|
|
1865
2400
|
|
|
1866
2401
|
@staticmethod
|
|
1867
|
-
def
|
|
2402
|
+
def remove_bad_file_characters( file : str, by : str="default" ) -> str:
|
|
1868
2403
|
"""
|
|
1869
|
-
Replaces invalid characters in a filename
|
|
1870
|
-
|
|
2404
|
+
Replaces invalid characters in a filename using the map ``by``.
|
|
2405
|
+
|
|
2406
|
+
See :func:`cdxcore.util.fmt_filename` for documentation and further options.
|
|
1871
2407
|
"""
|
|
1872
|
-
return fmt_filename(
|
|
2408
|
+
return fmt_filename( file, by=by )
|
|
1873
2409
|
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
2410
|
+
if False:
|
|
2411
|
+
def unqiueLabelToKey( self, unique_label:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
|
|
2412
|
+
"""
|
|
2413
|
+
Converts a unique label which might contain invalid characters into a unique file name, such that the full file name does not exceed 'max_length' bytes.
|
|
2414
|
+
The returned file has the format
|
|
2415
|
+
name + separator + ID
|
|
2416
|
+
where ID has length id_length.
|
|
2417
|
+
If unique_label is already guaranteed to be a valid filename, use unqiueLongFileNameToKey() instead.
|
|
2418
|
+
"""
|
|
2419
|
+
len_ext = len(self.ext)
|
|
2420
|
+
assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
|
|
2421
|
+
uqf = UniqueLabel( max_length=max_length-len_ext, id_length=id_length, separator=separator, filename_by="default" )
|
|
2422
|
+
return uqf( unique_label )
|
|
2423
|
+
|
|
2424
|
+
def unqiueLongFileNameToKey( self, unique_filename:str, id_length:int=8, separator:str='-', max_length:int=64 ) -> str:
|
|
2425
|
+
"""
|
|
2426
|
+
Converts a unique filename which might be too long to a unique filename such that the total length plus 'ext' does not exceed 'max_length' bytes.
|
|
2427
|
+
If the filename is already short enough, no change is made.
|
|
2428
|
+
|
|
2429
|
+
If 'unique_filename' is not guaranteed to be a valid filename, use unqiueLabelToKey() instead.
|
|
2430
|
+
"""
|
|
2431
|
+
len_ext = len(self.ext)
|
|
2432
|
+
assert len_ext < max_length, ("'max_length' must exceed the length of the extension", max_length, self.ext)
|
|
2433
|
+
uqf = UniqueLabel( max_length=max_length-len_ext, id_length=id_length, separator=separator )
|
|
2434
|
+
return uqf( unique_filename )
|
|
1898
2435
|
|
|
1899
2436
|
# -- dict-like interface --
|
|
1900
2437
|
|
|
1901
|
-
def __call__(self,
|
|
2438
|
+
def __call__(self, element : str,
|
|
1902
2439
|
default = RETURN_SUB_DIRECTORY,
|
|
1903
|
-
|
|
2440
|
+
raise_on_error : bool = False,
|
|
1904
2441
|
*,
|
|
1905
2442
|
version : str = None,
|
|
1906
2443
|
ext : str = None,
|
|
1907
2444
|
fmt : Format = None,
|
|
1908
2445
|
delete_wrong_version : bool = True,
|
|
1909
|
-
|
|
2446
|
+
create_directory : bool = None ):
|
|
1910
2447
|
"""
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
If
|
|
2448
|
+
Read either data from a file, or return a new sub directory.
|
|
2449
|
+
|
|
2450
|
+
If only the ``element`` argument is used, then this function returns a new sub directory
|
|
2451
|
+
named ``element``.
|
|
2452
|
+
|
|
2453
|
+
If both ``element`` and ``default`` arguments are used, then this function attempts to read the file ``element``
|
|
2454
|
+
from disk, returning ``default`` if it does not exist.
|
|
1914
2455
|
|
|
1915
|
-
sd
|
|
2456
|
+
Assume we have a subdirectory ``sd``::
|
|
2457
|
+
|
|
2458
|
+
from cdxcore.subdir import SubDir
|
|
2459
|
+
sd = SubDir("!/test")
|
|
1916
2460
|
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
x = sd('
|
|
1920
|
-
x = sd('
|
|
2461
|
+
Reading files::
|
|
2462
|
+
|
|
2463
|
+
x = sd('file', None) # reads 'file' with default value None
|
|
2464
|
+
x = sd('sd/file', default=1) # reads 'file' from sub directory 'sd' with default value 1
|
|
2465
|
+
x = sd('file', default=1, ext="tmp") # reads 'file.tmp' with default value 1
|
|
1921
2466
|
|
|
1922
|
-
Create sub directory
|
|
1923
|
-
|
|
1924
|
-
sd2 = sd("
|
|
1925
|
-
sd2 = sd("subdir1/subdir2"
|
|
1926
|
-
sd2 = sd(ext=".tmp")
|
|
2467
|
+
Create sub directory::
|
|
2468
|
+
|
|
2469
|
+
sd2 = sd("subdir") # creates and returns handle to subdirectory 'subdir'
|
|
2470
|
+
sd2 = sd("subdir1/subdir2") # creates and returns handle to subdirectory 'subdir1/subdir2'
|
|
2471
|
+
sd2 = sd("subdir1/subdir2", ext=".tmp") # creates and returns handle to subdirectory 'subdir1/subdir2' with extension "tmp"
|
|
2472
|
+
sd2 = sd(ext=".tmp") # returns handle to current subdirectory with extension "tmp"
|
|
1927
2473
|
|
|
1928
2474
|
Parameters
|
|
1929
2475
|
----------
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
If
|
|
2476
|
+
element : str
|
|
2477
|
+
File or directory name, or a list thereof.
|
|
2478
|
+
|
|
2479
|
+
default : optional
|
|
2480
|
+
If specified, this function reads ``element`` with
|
|
2481
|
+
``read( element, default, *args, **kwargs )``.
|
|
1935
2482
|
|
|
1936
|
-
|
|
1937
|
-
|
|
2483
|
+
If ``default`` is not specified, then this function returns a new sub-directory by calling
|
|
2484
|
+
``SubDir(element,parent=self,ext=ext,fmt=fmt)``.
|
|
1938
2485
|
|
|
1939
|
-
|
|
2486
|
+
create_directory : bool, optional
|
|
2487
|
+
*When creating sub-directories:*
|
|
2488
|
+
|
|
2489
|
+
Whether or not to instantly create the sub-directory. The default, ``None``, is to inherit the behaviour from ``self``.
|
|
2490
|
+
|
|
2491
|
+
raise_on_error : bool, optional
|
|
2492
|
+
*When reading files:*
|
|
2493
|
+
|
|
1940
2494
|
Whether to raise an exception if reading an existing file failed.
|
|
1941
|
-
By default this function fails silently and returns
|
|
1942
|
-
version : str
|
|
1943
|
-
If not None, specifies the version of the current code base.
|
|
1944
|
-
Use '*' to read any version (this is distrinct from reading a file without version).
|
|
1945
|
-
If version is not' '*', then this version will be compared to the version of the file being read.
|
|
1946
|
-
If they do not match, read fails (either by returning default or throwing an exception).
|
|
1947
|
-
delete_wrong_version : bool
|
|
1948
|
-
If True, and if a wrong version was found, delete the file.
|
|
1949
|
-
ext : str
|
|
1950
|
-
Extension overwrite, or a list thereof if key is a list
|
|
1951
|
-
Set to:
|
|
1952
|
-
-- None to use directory's default
|
|
1953
|
-
-- '*' to use the extension implied by 'fmt'
|
|
1954
|
-
-- for convenience 'ext' can also be a Format (in this case leave fmt to None)
|
|
1955
|
-
fmt : Format
|
|
1956
|
-
File format or None to use the directory's default.
|
|
1957
|
-
Note that 'fmt' cannot be a list even if 'key' is.
|
|
1958
|
-
Note that unless 'ext' or the SubDir's extension is '*', changing the format does not automatically change the extension.
|
|
2495
|
+
By default this function fails silently and returns ``default``.
|
|
1959
2496
|
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
2497
|
+
Default is ``False``.
|
|
2498
|
+
|
|
2499
|
+
version : str, optional
|
|
2500
|
+
*When reading files:*
|
|
2501
|
+
|
|
2502
|
+
If not ``None``, specifies the version of the current code base.
|
|
2503
|
+
|
|
2504
|
+
In this case, this version will be compared to the version of the file being read.
|
|
2505
|
+
If they do not match, read fails (either by returning default or throwing a :class:`cdxcore.version.VersionError` exception).
|
|
2506
|
+
|
|
2507
|
+
You can specify version ``"*"`` to accept any version.
|
|
2508
|
+
Note that this is distinct
|
|
2509
|
+
to using ``None`` which stipulates that the file should not
|
|
2510
|
+
have version information.
|
|
2511
|
+
|
|
2512
|
+
Default is ``None``.
|
|
2513
|
+
|
|
2514
|
+
delete_wrong_version : bool, optional
|
|
2515
|
+
*When reading files:*
|
|
2516
|
+
|
|
2517
|
+
If ``True``, and if a wrong version was found, delete the file.
|
|
2518
|
+
|
|
2519
|
+
Default is ``True``.
|
|
2520
|
+
|
|
2521
|
+
ext : str, optional
|
|
2522
|
+
*When reading files:*
|
|
2523
|
+
|
|
2524
|
+
Extension to be used, or a list thereof if ``element`` is a list. Defaults
|
|
2525
|
+
to the extension of ``self``.
|
|
2526
|
+
|
|
2527
|
+
Semantics:
|
|
2528
|
+
|
|
2529
|
+
* ``None`` to use the default extension of ``self``.
|
|
2530
|
+
* ``"*"`` to use the extension implied by ``fmt``.
|
|
2531
|
+
* ``""`` to turn off extension management.
|
|
2532
|
+
|
|
2533
|
+
*When creating sub-directories:*
|
|
2534
|
+
|
|
2535
|
+
Extension for the new subdirectory; set to ``None`` to inherit the parent's extension.
|
|
2536
|
+
|
|
2537
|
+
Default is ``None``.
|
|
2538
|
+
|
|
2539
|
+
|
|
2540
|
+
fmt : :class:`cdxcore.subdir.Format`, optional
|
|
2541
|
+
*When reading files:*
|
|
2542
|
+
|
|
2543
|
+
File format or ``None`` to use the directory's default.
|
|
2544
|
+
Note that ``fmt`` cannot be a list even if ``element`` is.
|
|
2545
|
+
Unless
|
|
2546
|
+
``ext`` or the SubDir's extension is ``"*"``, changing the
|
|
2547
|
+
format does not automatically change the extension.
|
|
2548
|
+
|
|
2549
|
+
*When creating sub-directories:*
|
|
1969
2550
|
|
|
2551
|
+
Format for the new sub-directory; set to ``None`` to inherit the parent's format.
|
|
2552
|
+
|
|
2553
|
+
Default is ``None``.
|
|
2554
|
+
|
|
1970
2555
|
Returns
|
|
1971
2556
|
-------
|
|
2557
|
+
Object : type|SubDir
|
|
1972
2558
|
Either the value in the file, a new sub directory, or lists thereof.
|
|
1973
|
-
Returns None if an element was not found.
|
|
1974
2559
|
"""
|
|
1975
2560
|
if default == SubDir.RETURN_SUB_DIRECTORY:
|
|
1976
|
-
if not isinstance(
|
|
1977
|
-
if not isinstance(
|
|
1978
|
-
raise ValueError(txtfmt("'
|
|
1979
|
-
return [ SubDir( k,parent=self,ext=ext,fmt=fmt,
|
|
1980
|
-
return SubDir(
|
|
1981
|
-
return self.read(
|
|
2561
|
+
if not isinstance(element, str):
|
|
2562
|
+
if not isinstance(element, Collection):
|
|
2563
|
+
raise ValueError(txtfmt("'element' must be a string or an iterable object. Found type '%s;", type(element)))
|
|
2564
|
+
return [ SubDir( k,parent=self,ext=ext,fmt=fmt,create_directory=create_directory) for k in element ]
|
|
2565
|
+
return SubDir(element,parent=self,ext=ext,fmt=fmt,create_directory=create_directory)
|
|
2566
|
+
return self.read( file=element,
|
|
1982
2567
|
default=default,
|
|
1983
|
-
|
|
2568
|
+
raise_on_error=raise_on_error,
|
|
1984
2569
|
version=version,
|
|
1985
2570
|
delete_wrong_version=delete_wrong_version,
|
|
1986
2571
|
ext=ext,
|
|
1987
2572
|
fmt=fmt )
|
|
1988
2573
|
|
|
1989
|
-
def __getitem__( self,
|
|
2574
|
+
def __getitem__( self, file ):
|
|
1990
2575
|
"""
|
|
1991
|
-
Reads
|
|
1992
|
-
If '
|
|
2576
|
+
Reads ``file`` using :meth:`cdxcore.subdir.SubDir.read`.
|
|
2577
|
+
If '`file'` does not exist, throw a :class:`KeyError`.
|
|
1993
2578
|
"""
|
|
1994
|
-
return self.read(
|
|
2579
|
+
return self.read( file=file, default=None, raise_on_error=True )
|
|
1995
2580
|
|
|
1996
|
-
def __setitem__( self,
|
|
1997
|
-
""" Writes
|
|
1998
|
-
self.write(
|
|
2581
|
+
def __setitem__( self, file, value):
|
|
2582
|
+
""" Writes ``value`` to ``file`` using :meth:`cdxcore.subdir.SubDir.write`. """
|
|
2583
|
+
self.write(file,value)
|
|
1999
2584
|
|
|
2000
|
-
def __delitem__(self,
|
|
2001
|
-
""" Silently delete
|
|
2002
|
-
self.delete(
|
|
2585
|
+
def __delitem__(self,file):
|
|
2586
|
+
""" Silently delete ``file`` using :meth:`cdxcore.subdir.SubDir.delete`. """
|
|
2587
|
+
self.delete(file, False )
|
|
2003
2588
|
|
|
2004
2589
|
def __len__(self) -> int:
|
|
2005
|
-
""" Return the number of files
|
|
2006
|
-
return len(self.
|
|
2590
|
+
""" Return the number of files in this directory with matching extension. """
|
|
2591
|
+
return len(self.files())
|
|
2007
2592
|
|
|
2008
2593
|
def __iter__(self):
|
|
2009
|
-
""" Returns an iterator which allows traversing through all
|
|
2010
|
-
return self.
|
|
2011
|
-
|
|
2012
|
-
def __contains__(self, key):
|
|
2013
|
-
""" Implements 'in' operator """
|
|
2014
|
-
return self.exists(key)
|
|
2015
|
-
|
|
2016
|
-
# -- object like interface --
|
|
2594
|
+
""" Returns an iterator which allows traversing through all files below in this directory with matching extension. """
|
|
2595
|
+
return self.files().__iter__()
|
|
2017
2596
|
|
|
2018
|
-
def
|
|
2019
|
-
"""
|
|
2020
|
-
|
|
2021
|
-
|
|
2597
|
+
def __contains__(self, file):
|
|
2598
|
+
""" Tests whether ``file`` :meth:`cdxcore.subdir.SubDir.exists`. """
|
|
2599
|
+
return self.exists(file)
|
|
2600
|
+
|
|
2601
|
+
def items(self, *, ext : str = None, raise_on_error : bool = False) -> Iterable:
|
|
2022
2602
|
"""
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2603
|
+
Dictionary-style iterable of filenames and their content.
|
|
2604
|
+
|
|
2605
|
+
Usage::
|
|
2606
|
+
|
|
2607
|
+
subdir = SubDir("!")
|
|
2608
|
+
for file, data in subdir.items():
|
|
2609
|
+
print( file, str(data)[:100] )
|
|
2026
2610
|
|
|
2027
|
-
|
|
2611
|
+
Parameters
|
|
2612
|
+
----------
|
|
2613
|
+
ext : str
|
|
2614
|
+
Extension or ``None`` for the directory's current extension. Use ``""``
|
|
2615
|
+
for all file extension.
|
|
2616
|
+
|
|
2617
|
+
Returns
|
|
2618
|
+
-------
|
|
2619
|
+
Iterable
|
|
2620
|
+
An iterable generator
|
|
2621
|
+
"""
|
|
2622
|
+
class ItemIterable(Iterable):
|
|
2623
|
+
def __init__(_):
|
|
2624
|
+
_._files = self.files(ext=ext)
|
|
2625
|
+
_._subdir = self
|
|
2626
|
+
def __len__(_):
|
|
2627
|
+
return len(_._files)
|
|
2628
|
+
def __iter__(_):
|
|
2629
|
+
for file in _._files:
|
|
2630
|
+
data = _._subdir.read(file, ext=ext, raise_on_error=raise_on_error)
|
|
2631
|
+
yield file, data
|
|
2632
|
+
return ItemIterable()
|
|
2633
|
+
|
|
2634
|
+
# convenient path ops
|
|
2635
|
+
# -------------------
|
|
2636
|
+
|
|
2637
|
+
def __add__(self, directory : str) -> str:
|
|
2028
2638
|
"""
|
|
2029
|
-
|
|
2030
|
-
Note: keys starting with '_' are /not/ written to disk
|
|
2639
|
+
Returns a the subdirectory ``directory`` of ``self``.
|
|
2031
2640
|
"""
|
|
2032
|
-
|
|
2033
|
-
self.__dict__[key] = value
|
|
2034
|
-
else:
|
|
2035
|
-
self.write(key,value)
|
|
2036
|
-
|
|
2037
|
-
def __delattr__(self, key):
|
|
2038
|
-
""" Silently delete a key with member notation. """
|
|
2039
|
-
verify( key[:1] != "_", "Deleting protected or private members disabled. Fix __delattr__ to support this")
|
|
2040
|
-
return self.delete( key=key, raiseOnError=False )
|
|
2641
|
+
return SubDir(directory,parent=self)
|
|
2041
2642
|
|
|
2042
2643
|
# pickling
|
|
2043
2644
|
# --------
|
|
@@ -2052,7 +2653,22 @@ class SubDir(object):
|
|
|
2052
2653
|
self._ext = state['ext']
|
|
2053
2654
|
self._fmt = state['fmt']
|
|
2054
2655
|
self._crt = state['crt']
|
|
2656
|
+
|
|
2657
|
+
@staticmethod
|
|
2658
|
+
def as_format( format_name : str ) -> int:
|
|
2659
|
+
"""
|
|
2660
|
+
Converts a named format into the respective format code.
|
|
2661
|
+
|
|
2662
|
+
Example::
|
|
2055
2663
|
|
|
2664
|
+
format = SubDir.as_format( config("format", "pickle", SubDir.FORMAT_NAMES, "File format") )
|
|
2665
|
+
"""
|
|
2666
|
+
format_name = format_name.upper()
|
|
2667
|
+
if not format_name in SubDir.FORMAT_NAMES:
|
|
2668
|
+
raise LookupError(f"Unknown format name '{format_name}'. Must be one of: {fmt_list(SubDir.FORMAT_NAMES)}")
|
|
2669
|
+
return Format[format_name]
|
|
2670
|
+
|
|
2671
|
+
|
|
2056
2672
|
# caching
|
|
2057
2673
|
# -------
|
|
2058
2674
|
|
|
@@ -2066,129 +2682,249 @@ class SubDir(object):
|
|
|
2066
2682
|
exclude_arg_types : list[type] = None,
|
|
2067
2683
|
version_auto_class : bool = True):
|
|
2068
2684
|
"""
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2685
|
+
Advanced versioned caching for callables.
|
|
2686
|
+
|
|
2687
|
+
Versioned caching is based on the following two simple principles:
|
|
2688
|
+
|
|
2689
|
+
1) **Unique Call IDs:**
|
|
2690
|
+
|
|
2691
|
+
When a function is called with some parameters, the wrapper identifies a unique ID based
|
|
2692
|
+
on the qualified name of the function and on its runtime functional parameters (ie those
|
|
2693
|
+
which alter the outcome of the function).
|
|
2694
|
+
When a function is called the first time with a given unique call ID, it will store
|
|
2695
|
+
the result of the call to disk. If the function is called with the same call ID again,
|
|
2696
|
+
the result is read from disk and returned.
|
|
2697
|
+
|
|
2698
|
+
To compute unique call IDs :class:`cdxcore.uniquehash.NamedUniqueHash` is used
|
|
2699
|
+
by default.
|
|
2700
|
+
|
|
2701
|
+
2) **Code Version:**
|
|
2702
|
+
|
|
2703
|
+
Each function has a version, which includes dependencies on other functions or classes.
|
|
2704
|
+
If the version of data on disk does not match the current version, it is deleted
|
|
2705
|
+
and the generating function is called again. This way you can use your code to drive updates
|
|
2706
|
+
to data generated with cached functions.
|
|
2707
|
+
|
|
2708
|
+
Behind the scenes this is implemented using :dec:`cdxcore.version.version` which means
|
|
2709
|
+
that the version of a cached function can also depend on versions of non-cached functions
|
|
2710
|
+
or other objects.
|
|
2711
|
+
|
|
2712
|
+
Caching Functions
|
|
2713
|
+
^^^^^^^^^^^^^^^^^
|
|
2714
|
+
|
|
2715
|
+
Caching a simple function ``f`` is staight forward:
|
|
2716
|
+
|
|
2717
|
+
.. code-block:: python
|
|
2718
|
+
|
|
2719
|
+
from cdxcore.subdir import SubDir
|
|
2720
|
+
cache = SubDir("!/.cache")
|
|
2721
|
+
cache.delete_all_content() # for illustration
|
|
2722
|
+
|
|
2723
|
+
@cache.cache("0.1")
|
|
2724
|
+
def f(x,y):
|
|
2725
|
+
return x*y
|
|
2726
|
+
|
|
2727
|
+
_ = f(1,2) # function gets computed and the result cached
|
|
2728
|
+
_ = f(1,2) # restore result from cache
|
|
2729
|
+
_ = f(2,2) # different parameters: compute and store result
|
|
2730
|
+
|
|
2731
|
+
Cache another function ``g`` which calls ``f``, and whose version therefore on ``f``'s version:
|
|
2732
|
+
|
|
2733
|
+
.. code-block:: python
|
|
2734
|
+
|
|
2735
|
+
@cache.cache("0.1", dependencies=[f])
|
|
2736
|
+
def g(x,y):
|
|
2737
|
+
return g(x,y)**2
|
|
2738
|
+
|
|
2739
|
+
**Debugging**
|
|
2740
|
+
|
|
2741
|
+
When using automated caching it
|
|
2742
|
+
is important to understand how changes in parameters and the version of the a function
|
|
2743
|
+
affect caching. To this end, :dec:`cdxcore.subdir.SubDir.cache` supports
|
|
2744
|
+
a tracing mechanism via the use of a :class:`cdxcore.subdir.CacheController`:
|
|
2745
|
+
|
|
2746
|
+
.. code-block:: python
|
|
2747
|
+
|
|
2748
|
+
from cdxcore.subdir import SubDir, CacheController, Context
|
|
2749
|
+
|
|
2750
|
+
ctrl = CacheController( debug_verbose=Context("all") )
|
|
2751
|
+
cache = SubDir("!/.cache", cache_controller=ctrl )
|
|
2752
|
+
cache.delete_all_content() # <- delete previous cached files, for this example only
|
|
2753
|
+
|
|
2754
|
+
@cache.cache("0.1")
|
|
2755
|
+
def f(x,y):
|
|
2756
|
+
return x*y
|
|
2757
|
+
|
|
2758
|
+
_ = f(1,2) # function gets computed and the result cached
|
|
2759
|
+
_ = f(1,2) # restore result from cache
|
|
2760
|
+
_ = f(2,2) # different parameters: compute and store result
|
|
2761
|
+
|
|
2762
|
+
Returns:
|
|
2116
2763
|
|
|
2117
|
-
|
|
2764
|
+
.. code-block:: python
|
|
2765
|
+
|
|
2766
|
+
00: cache(f@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2767
|
+
00: cache(f@__main__): called 'f@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ 668a6b111549e288.pck'.
|
|
2768
|
+
00: cache(f@__main__): read 'f@__main__' version 'version 0.1' from cache 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ 668a6b111549e288.pck'.
|
|
2769
|
+
00: cache(f@__main__): called 'f@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f@__main__ b5609542d7da0b04.pck'.
|
|
2118
2770
|
|
|
2119
|
-
|
|
2120
|
-
|
|
2771
|
+
**Non-Functional Parameters**
|
|
2772
|
+
|
|
2773
|
+
A function may have non-functional parameters which do not alter the function's outcome.
|
|
2774
|
+
An example are ``debug`` flags:
|
|
2775
|
+
|
|
2776
|
+
.. code-block:: python
|
|
2777
|
+
|
|
2778
|
+
from cdxcore.subdir import SubDir
|
|
2779
|
+
cache = SubDir("!/.cache")
|
|
2780
|
+
|
|
2781
|
+
@cache.cache("0.1", dependencies=[f], exclude_args='debug')
|
|
2782
|
+
def g(x,y,debug): # <--' 'debug' is a non-functional parameter
|
|
2783
|
+
if debug:
|
|
2784
|
+
print(f"h(x={x},y={y})")
|
|
2785
|
+
return g(x,y)**2
|
|
2786
|
+
|
|
2787
|
+
You can define certain types as non-functional for *all* functions wrapped
|
|
2788
|
+
by :meth:`cdxcore.subdir.SubDir.cache` when construcing
|
|
2789
|
+
the :class:`cdccore.cache.CacheController` parameter for in :class:`cdxcore.subdir.SubDir`:
|
|
2790
|
+
|
|
2791
|
+
.. code-block:: python
|
|
2121
2792
|
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2793
|
+
from cdxcore.subdir import SubDir
|
|
2794
|
+
|
|
2795
|
+
class Debugger:
|
|
2796
|
+
def output( cond, message ):
|
|
2797
|
+
print(message)
|
|
2798
|
+
|
|
2799
|
+
ctrl = CacheController(exclude_arg_types=[Debugger]) # <- exclude 'Debugger' parameters from hasing
|
|
2800
|
+
cache = SubDir("!/.cache")
|
|
2125
2801
|
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2802
|
+
@cache.cache("0.1", dependencies=[f], exclude_args='debug')
|
|
2803
|
+
def g(x,y,debugger : Debugger): # <-- 'debugger' is a non-functional parameter
|
|
2804
|
+
debugger.output(f"h(x={x},y={y})")
|
|
2805
|
+
return g(x,y)**2
|
|
2806
|
+
|
|
2807
|
+
**Unique IDs and File Naming**
|
|
2808
|
+
|
|
2809
|
+
The unique call ID of a decorated functions is by default generated by its fully qualified name
|
|
2810
|
+
and a unique hash of its functional parameters.
|
|
2811
|
+
|
|
2812
|
+
Key default behaviours of :class:`cdxcore.uniquehash.NamedUniqueHash`:
|
|
2813
|
+
|
|
2814
|
+
* The ``NamedUniqueHash`` hashes objects via their ``__dict__`` or ``__slot__`` members.
|
|
2815
|
+
This can be overwritten for a class by implementing ``__unique_hash__``; see :class:`cdxcore.uniquehash.NamedUniqueHash`.
|
|
2816
|
+
|
|
2817
|
+
* Function members of objects or any members starting with '_' are not hashed
|
|
2818
|
+
unless this behaviour is changed using :class:`cdxcore.subdir.CacheController`.
|
|
2819
|
+
|
|
2820
|
+
* Numpy and panda frames are hashed using their byte representation.
|
|
2821
|
+
That is slow and not recommended. It is better to identify numpy/panda inputs
|
|
2822
|
+
via their generating characteristic ID.
|
|
2823
|
+
|
|
2824
|
+
Either way, hashes are not particularly human readable. It is often useful
|
|
2825
|
+
to have unique IDs and therefore filenames which carry some context information.
|
|
2826
|
+
|
|
2827
|
+
This can be achieved by using ``label``:
|
|
2131
2828
|
|
|
2132
|
-
|
|
2133
|
-
def g(x,y,debug): # <-- debug is a non-functional parameter
|
|
2134
|
-
if debug:
|
|
2135
|
-
print(f"h(x={x},y={y})")
|
|
2136
|
-
return g(x,y)**2
|
|
2137
|
-
|
|
2138
|
-
You can systematically define certain types as non-functional for *all* functions wrapped
|
|
2139
|
-
by this SubDir by specifying the respective parameter for the CacheController() in SubDir.__init__().
|
|
2140
|
-
|
|
2141
|
-
The Unique Call ID of a functions is by default generated by its fully qualified name
|
|
2142
|
-
and a unique hash of its functional parameters.
|
|
2143
|
-
This can be made more readable by using id=
|
|
2144
|
-
|
|
2145
|
-
from cdxbasics.subdir import SubDir
|
|
2146
|
-
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2147
|
-
|
|
2148
|
-
@cache.cache("0.1", id="f({x},{y}") # <- using a string to be passed to str.format()
|
|
2149
|
-
def f(x,y):
|
|
2150
|
-
return x*y
|
|
2829
|
+
.. code-block:: python
|
|
2151
2830
|
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2831
|
+
from cdxcore.subdir import SubDir, CacheController
|
|
2832
|
+
ctrl = CacheController( debug_verbose=Context("all") )
|
|
2833
|
+
cache = SubDir("!/.cache", cache_controller=ctrl )
|
|
2834
|
+
cache.delete_all_content() # for illustration
|
|
2835
|
+
|
|
2836
|
+
@cache.cache("0.1") # <- no ID
|
|
2837
|
+
def f1(x,y):
|
|
2838
|
+
return x*y
|
|
2839
|
+
|
|
2840
|
+
@cache.cache("0.1", label="f2({x},{y})") # <- label uses a string to be passed to str.format()
|
|
2841
|
+
def f2(x,y):
|
|
2842
|
+
return x*y
|
|
2843
|
+
|
|
2844
|
+
We can also use a function to generate a ``label``. In that case all parameters
|
|
2845
|
+
to the function including its ``name`` are passed to the function. In below example
|
|
2846
|
+
we eat any parameters we are not interested in with ``** _``:
|
|
2163
2847
|
|
|
2164
|
-
|
|
2165
|
-
and a hash generated from all pertinent arguments will be generated.
|
|
2166
|
-
That is why in the previous example we still need to exclude_args 'debug' here.
|
|
2848
|
+
.. code-block:: python
|
|
2167
2849
|
|
|
2168
|
-
|
|
2169
|
-
|
|
2850
|
+
@cache.cache("0.1", label=lambda x,y,**_: f"h({x},{y})", exclude_args='debug')
|
|
2851
|
+
def h(x,y,debug=False):
|
|
2852
|
+
if debug:
|
|
2853
|
+
print(f"h(x={x},y={y})")
|
|
2854
|
+
return x*y
|
|
2170
2855
|
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2856
|
+
We obtain:
|
|
2857
|
+
|
|
2858
|
+
.. code-block:: python
|
|
2859
|
+
|
|
2860
|
+
f1(1,1)
|
|
2861
|
+
f2(1,1)
|
|
2862
|
+
h(1,1)
|
|
2863
|
+
|
|
2864
|
+
00: cache(f1@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2865
|
+
00: cache(f2@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2866
|
+
00: cache(h@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2867
|
+
00: cache(f1@__main__): called 'f1@__main__' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f1@__main__ ef197d80d6a0bbb0.pck'.
|
|
2868
|
+
00: cache(f2@__main__): called 'f2(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/f2(1,1) bdc3cd99157c10f7.pck'.
|
|
2869
|
+
00: cache(h@__main__): called 'h(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/h(1,1) d3fdafc9182070f4.pck'.
|
|
2870
|
+
|
|
2871
|
+
Note that the file names ``f2(1,1) bdc3cd99157c10f7.pck``
|
|
2872
|
+
and ``h(1,1) d3fdafc9182070f4.pck`` for the ``f2`` and ``h`` function calls are now easier to read as
|
|
2873
|
+
they are comprised of the label
|
|
2874
|
+
of the function and a terminal hash key.
|
|
2875
|
+
The trailing hash is appended because we do not assume that the label returned by ``label`` is unique.
|
|
2876
|
+
Therefore, a hash generated from all the ``label`` itself and
|
|
2877
|
+
all pertinent arguments will be appended to the filename.
|
|
2878
|
+
|
|
2879
|
+
If we know how to generate truly unique IDs which are always valid filenames, then we can use ``uid``
|
|
2880
|
+
instead of ``label``:
|
|
2881
|
+
|
|
2882
|
+
.. code-block:: python
|
|
2883
|
+
|
|
2884
|
+
@cache.cache("0.1", uid=lambda x,y,**_: f"h2({x},{y})", exclude_args='debug')
|
|
2885
|
+
def h2(x,y,debug=False):
|
|
2886
|
+
if debug:
|
|
2887
|
+
print(f"h(x={x},y={y})")
|
|
2888
|
+
return x*y
|
|
2889
|
+
h2(1,1)
|
|
2890
|
+
|
|
2891
|
+
yields::
|
|
2892
|
+
|
|
2893
|
+
00: cache(h2@__main__): function registered for caching into 'C:/Users/hans/AppData/Local/Temp/.cache/'.
|
|
2894
|
+
00: cache(h2@__main__): called 'h2(1,1)' version 'version 0.1' and wrote result into 'C:/Users/hans/AppData/Local/Temp/.cache/h2(1,1).pck'.
|
|
2895
|
+
|
|
2896
|
+
In particular, the filename is now ``h2(1,1).pck`` without any hash.
|
|
2897
|
+
If ``uid`` is used the parameter of the function are not hashed. Like ``label``
|
|
2898
|
+
the parameter ``uid`` can also be a :func:`str.format` string or a callable.
|
|
2180
2899
|
|
|
2181
|
-
|
|
2182
|
-
|
|
2900
|
+
**Controlliong which Parameters to Hash**
|
|
2901
|
+
|
|
2902
|
+
To specify which parameters are pertinent for identifying a unique id, use:
|
|
2903
|
+
|
|
2904
|
+
* ``include_args``: list of functions arguments to include. If ``None``, use all parameteres as input in the next step
|
|
2905
|
+
|
|
2906
|
+
* ``exclude_args``: list of function arguments to exclude, if not ``None``.
|
|
2907
|
+
|
|
2908
|
+
* ``exclude_arg_types``: a list of types to exclude.
|
|
2909
|
+
This is helpful if control flow is managed with dedicated data types.
|
|
2910
|
+
An example of such a type is :class:`cdxcore.verbose.Context` which is used to print hierarchical output messages.
|
|
2911
|
+
Types can be globally excluded using a :class:`cdccore.cache.CacheController`
|
|
2912
|
+
when calling
|
|
2913
|
+
:class:`cdxcore.subdir.SubDir`.
|
|
2914
|
+
|
|
2915
|
+
**Numpy/Pandas**
|
|
2916
|
+
|
|
2183
2917
|
Numpy/Panda data should not be hashed for identifying unique call IDs.
|
|
2184
2918
|
Instead, use the defining characteristics for generating the data frames.
|
|
2185
2919
|
|
|
2186
2920
|
For example:
|
|
2187
|
-
|
|
2188
|
-
from cdxbasics.subdir import SubDir
|
|
2189
|
-
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2190
2921
|
|
|
2191
|
-
|
|
2922
|
+
.. code-block:: python
|
|
2923
|
+
|
|
2924
|
+
from cdxcore.pretty import PrettyObject
|
|
2925
|
+
from cdxcore.subdir import SubDir
|
|
2926
|
+
cache = SubDir("!/.cache")
|
|
2927
|
+
cache.delete_all_content() # for illustration
|
|
2192
2928
|
|
|
2193
2929
|
@cache.cache("0.1")
|
|
2194
2930
|
def load_src( src_def ):
|
|
@@ -2201,22 +2937,24 @@ class SubDir(object):
|
|
|
2201
2937
|
stats = ... using data
|
|
2202
2938
|
return stats
|
|
2203
2939
|
|
|
2204
|
-
src_def =
|
|
2940
|
+
src_def = PrettyObject()
|
|
2205
2941
|
src_def.start = "2010-01-01"
|
|
2206
2942
|
src_def.end = "2025-01-01"
|
|
2207
2943
|
src_def.x = 0.1
|
|
2208
2944
|
|
|
2209
|
-
stats_def =
|
|
2945
|
+
stats_def = PrettyObject()
|
|
2210
2946
|
stats_def.lambda = 0.1
|
|
2211
2947
|
stats_def.window = 100
|
|
2212
2948
|
|
|
2213
2949
|
data = load_src( src_def )
|
|
2214
2950
|
stats = statistics( stats_def, src_def, data )
|
|
2215
2951
|
|
|
2216
|
-
While instructive, this case is not optimal: we do not really need to load
|
|
2217
|
-
if we can reconstruct
|
|
2952
|
+
While instructive, this case is not optimal: we do not really need to load ``data``
|
|
2953
|
+
if we can reconstruct ``stats`` from ``data`` (unless we need ``data`` further on).
|
|
2218
2954
|
|
|
2219
|
-
Consider therefore
|
|
2955
|
+
Consider therefore:
|
|
2956
|
+
|
|
2957
|
+
.. code-block:: python
|
|
2220
2958
|
|
|
2221
2959
|
@cache.cache("0.1")
|
|
2222
2960
|
def load_src( src_def ):
|
|
@@ -2232,14 +2970,18 @@ class SubDir(object):
|
|
|
2232
2970
|
|
|
2233
2971
|
stats = statistics_only( stats_def, src_def )
|
|
2234
2972
|
|
|
2235
|
-
Member
|
|
2236
|
-
|
|
2973
|
+
Caching Member Functions
|
|
2974
|
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
|
2975
|
+
|
|
2237
2976
|
You can cache member functions like any other function.
|
|
2238
|
-
Note that version information are by default inherited, i.e. member functions will be dependent on the version of their
|
|
2239
|
-
defining class, and class versions will be dependent on their base classes' versions
|
|
2977
|
+
Note that :dec:`cdxcore.version.version` information are by default inherited, i.e. member functions will be dependent on the version of their
|
|
2978
|
+
defining class, and class versions will be dependent on their base classes' versions:
|
|
2240
2979
|
|
|
2241
|
-
|
|
2242
|
-
|
|
2980
|
+
.. code-block:: python
|
|
2981
|
+
|
|
2982
|
+
from cdxcore.subdir import SubDir, version
|
|
2983
|
+
cache = SubDir("!/.cache")
|
|
2984
|
+
cache.delete_all_content() # for illustration
|
|
2243
2985
|
|
|
2244
2986
|
@version("0.1")
|
|
2245
2987
|
class A(object):
|
|
@@ -2259,18 +3001,26 @@ class SubDir(object):
|
|
|
2259
3001
|
_ = b.f(y=1) # same unique call ID as previous call -> restore result from disk
|
|
2260
3002
|
|
|
2261
3003
|
**WARNING**
|
|
2262
|
-
|
|
2263
|
-
which start with a "_". This behaviour can be changed using CacheController
|
|
2264
|
-
For reasonably complex objects it is recommended to implement
|
|
2265
|
-
|
|
2266
|
-
|
|
3004
|
+
:class:`cdxcore.uniquehash.UniqueHash` does *not* by default process members of objects or dictionaries
|
|
3005
|
+
which start with a "_". This behaviour can be changed using :class:`cdxcore.subdir.CacheController`.
|
|
3006
|
+
For reasonably complex objects it is recommended to implement for your objects
|
|
3007
|
+
the a custom hashing function::
|
|
3008
|
+
|
|
3009
|
+
__unique_hash__( self, uniqueHash : UniqueHash, debug_trace : DebugTrace )
|
|
3010
|
+
|
|
3011
|
+
This function is described at :class:`cdxcore.uniquehash.UniqueHash`.
|
|
3012
|
+
|
|
3013
|
+
Caching Bound Member Functions
|
|
3014
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
2267
3015
|
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
Note that above is functionally different to decorating a bound member function:
|
|
3016
|
+
Caching bound member functions is technically quite different to caching a function of a class in general,
|
|
3017
|
+
but also supported:
|
|
2271
3018
|
|
|
2272
|
-
|
|
2273
|
-
|
|
3019
|
+
.. code-block:: python
|
|
3020
|
+
|
|
3021
|
+
from cdxcore.subdir import SubDir, version
|
|
3022
|
+
cache = SubDir("!/.cache", cache_controller : CacheController(debug_verbose=Context("all")))
|
|
3023
|
+
cache.delete_all_content() # for illustration
|
|
2274
3024
|
|
|
2275
3025
|
class A(object):
|
|
2276
3026
|
def __init__(self,x):
|
|
@@ -2282,133 +3032,177 @@ class SubDir(object):
|
|
|
2282
3032
|
f = cache.cache("0.1", id=lambda self, y : f"a.f({y})")(a.f) # <- decorate bound 'f'.
|
|
2283
3033
|
r = c(y=2)
|
|
2284
3034
|
|
|
2285
|
-
In this case the function
|
|
2286
|
-
parameter list even though the bound function parameter list does not include
|
|
2287
|
-
This, together with the comments on hashing objects above, ensures that (hashed) changes to
|
|
3035
|
+
In this case the function ``f`` is bound to ``a``. The object is added as ``self`` to the function
|
|
3036
|
+
parameter list even though the bound function parameter list does not include ``self``.
|
|
3037
|
+
This, together with the comments on hashing objects above, ensures that (hashed) changes to ``a`` will
|
|
2288
3038
|
be reflected in the unique call ID for the member function.
|
|
2289
3039
|
|
|
2290
|
-
Classes
|
|
2291
|
-
|
|
2292
|
-
Classes can also be cached.
|
|
2293
|
-
This is done in two steps: first, the class itself is decorated to provide version information at its own level.
|
|
2294
|
-
Secondly, decorate __init__ which also helps to define the unique call id. You do not need to specify a version
|
|
2295
|
-
for __init__ as its version usually coincides with the version of the class.
|
|
2296
|
-
|
|
2297
|
-
Simple example:
|
|
2298
|
-
|
|
2299
|
-
cache = SubDir("!/.cache", cacheController : CacheController(debug_verbose=Context("all")))
|
|
2300
|
-
|
|
2301
|
-
@cache.cache("0.1")
|
|
2302
|
-
class A(object):
|
|
2303
|
-
|
|
2304
|
-
@cache.cache(exclude_args=['debug'])
|
|
2305
|
-
def __init__(self, x, debug):
|
|
2306
|
-
if debug:
|
|
2307
|
-
print("__init__",x)
|
|
2308
|
-
self.x = x
|
|
3040
|
+
Caching Classes
|
|
3041
|
+
^^^^^^^^^^^^^^^
|
|
2309
3042
|
|
|
2310
|
-
|
|
2311
|
-
|
|
3043
|
+
Classes can also be cached. In this case the creation of a class is cached, i.e. a call to
|
|
3044
|
+
the class constructor restores the respectiv object from disk.
|
|
2312
3045
|
|
|
2313
|
-
|
|
2314
|
-
It is therefore automatically excluded from computing a unique call ID.
|
|
2315
|
-
Specifically, 'self' is not part of the arguments passed to 'id':
|
|
3046
|
+
This is done in two steps:
|
|
2316
3047
|
|
|
2317
|
-
|
|
2318
|
-
|
|
3048
|
+
1) first, the class itself is decorated using
|
|
3049
|
+
:dec:`cdxcore.subdir.SubDir.cache`
|
|
3050
|
+
to provide version information at class level. Only version information are provided here.
|
|
3051
|
+
|
|
3052
|
+
2) Secondly, decorate ``__init__``. You do not need to specify a version
|
|
3053
|
+
for ``__init__`` as its version usually coincides with the version of the class. At ``__init__``
|
|
3054
|
+
you define how unique IDs are generated from the parameters passed to object construction.
|
|
3055
|
+
|
|
3056
|
+
Simple example:
|
|
3057
|
+
|
|
3058
|
+
.. code-block:: python
|
|
3059
|
+
|
|
3060
|
+
from cdxcore.subdir import SubDir
|
|
3061
|
+
cache = SubDir("!/.cache")
|
|
3062
|
+
cache.delete_all_content() # for illustration
|
|
3063
|
+
|
|
3064
|
+
@cache.cache("0.1")
|
|
3065
|
+
class A(object):
|
|
3066
|
+
|
|
3067
|
+
@cache.cache(exclude_args=['debug'])
|
|
3068
|
+
def __init__(self, x, debug):
|
|
3069
|
+
if debug:
|
|
3070
|
+
print("__init__",x)
|
|
3071
|
+
self.x = x
|
|
2319
3072
|
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
if debug:
|
|
2323
|
-
print("__init__",x)
|
|
2324
|
-
self.x = x
|
|
3073
|
+
a = A(1) # caches 'a'
|
|
3074
|
+
b = A(1) # reads the cached object into 'b'
|
|
2325
3075
|
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
b) exclude_args: list of funciton arguments to exclude, if not None.
|
|
2343
|
-
c) exclude_arg_types: a list of types to exclude. This is helpful if control flow is managed with dedicated data types.
|
|
2344
|
-
An example of such a type is cdxbasics.verbose.Context which is used to print hierarchical output messages.
|
|
2345
|
-
Types can be globally excluded using the CacheController.
|
|
3076
|
+
**Technical Comments**
|
|
3077
|
+
|
|
3078
|
+
The function ``__init__`` does not actually return a value; for this reason
|
|
3079
|
+
behind the scenes it is actually ``__new__`` which is being decorated.
|
|
3080
|
+
Attempting to cache-decorate ``__new__`` manually will lead to an exception.
|
|
3081
|
+
|
|
3082
|
+
A nuance for ``__init__`` vs ordinary member function is that the
|
|
3083
|
+
``self`` parameter is non-functional
|
|
3084
|
+
(in the sense that it is an empty object when ``__init__`` is called).
|
|
3085
|
+
``self`` is therefore automatically excluded from computing a unique call ID.
|
|
3086
|
+
That also means ``self`` is not part of the arguments passed to ``uid``:
|
|
3087
|
+
|
|
3088
|
+
.. code-block:: python
|
|
3089
|
+
|
|
3090
|
+
@cache.cache("0.1")
|
|
3091
|
+
class A(object):
|
|
2346
3092
|
|
|
3093
|
+
@cache.cache("0.1", id=lambda x, debug: f"A.__init__(x={x})") # <-- 'self' is not passed to the lambda function; no need to add **_
|
|
3094
|
+
def __init__(self, x, debug):
|
|
3095
|
+
if debug:
|
|
3096
|
+
print("__init__",x)
|
|
3097
|
+
self.x = x
|
|
3098
|
+
|
|
3099
|
+
Decorating classes with ``__slots__`` does not yet work.
|
|
3100
|
+
|
|
2347
3101
|
See also
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
|
|
3102
|
+
^^^^^^^^
|
|
3103
|
+
|
|
3104
|
+
For project-wide use it is usually inconvenient to control caching at the level of a
|
|
3105
|
+
project-wide cache root directory.
|
|
3106
|
+
See :class:`cdxcore.subdir.VersionedCacheRoot` for a thin convenience wrapper around a :class:`cdxcore.subdir.SubDir`
|
|
3107
|
+
with a :class:`cdxcore.subdir.CacheController`.
|
|
2351
3108
|
|
|
2352
3109
|
Parameters
|
|
2353
3110
|
----------
|
|
2354
3111
|
version : str, optional
|
|
2355
3112
|
Version of the function.
|
|
2356
|
-
* If None then F must be decorated with cdxbasics.version.version().
|
|
2357
|
-
* If set, the function F is first decorated with cdxbasics.version.version().
|
|
2358
|
-
dependencies : list, optional
|
|
2359
|
-
List of version dependencies
|
|
2360
|
-
|
|
2361
|
-
id : str, Callable
|
|
2362
|
-
Create a call label for the function call and its parameters.
|
|
2363
|
-
See above for a description.
|
|
2364
|
-
* A plain string without {} formatting: this is the fully qualified id
|
|
2365
|
-
* A string with {} formatting: id.str( name=name, **parameters ) will be used to generate the fully qualified id
|
|
2366
|
-
* A Callable, in which case id( name=name, **parameters ) will be used to generate the fully qualified id
|
|
2367
|
-
|
|
2368
|
-
unique : bool
|
|
2369
|
-
Whether the 'id' generated by 'id' is unique for this function call with its parameters.
|
|
2370
|
-
If True, then the function will attempt to use 'id' as filename as long as it has no invalid characters and is short
|
|
2371
|
-
enough (see 'max_filename_length').
|
|
2372
|
-
If False, the function will append to the 'id' a unique hash of the qualified function name and all pertinent parameters
|
|
2373
|
-
|
|
2374
|
-
name : str
|
|
2375
|
-
The name of the function, or None for using the fully qualified function name.
|
|
2376
3113
|
|
|
3114
|
+
* If ``None`` then ``F`` must be decorated with :dec:`cdxcore.version.version`.
|
|
3115
|
+
* If set, the function ``F`` is first decorated with :dec:`cdxcore.version.version`.
|
|
3116
|
+
|
|
3117
|
+
dependencies : list[type], optional
|
|
3118
|
+
A list of version dependencies, either by reference or by name.
|
|
3119
|
+
See :dec:`cdxcore.version.version` for details on name lookup if strings are used.
|
|
3120
|
+
|
|
3121
|
+
label : str | Callable
|
|
3122
|
+
Specify a human-readable label for the function call given its parameters.
|
|
3123
|
+
This label is used to generate the cache file name, and is also printed in when tracing
|
|
3124
|
+
hashing operations. Labels are not assumed to be unique, hence a unique hash of
|
|
3125
|
+
the label and the parameters to this function will be appended to generate
|
|
3126
|
+
the actual cache file name.
|
|
3127
|
+
|
|
3128
|
+
Use ``uid`` instead if ``label`` represents valid unique filenames.
|
|
3129
|
+
|
|
3130
|
+
|
|
3131
|
+
**Usage:**
|
|
3132
|
+
|
|
3133
|
+
* If ``label`` is a plain string without ``{}`` formatting: use this string as-is.
|
|
3134
|
+
|
|
3135
|
+
* If ``label`` is a string with ``{}`` formatting, then ``label.format( name=name, **parameters )``
|
|
3136
|
+
will be used to generate the actual label.
|
|
3137
|
+
|
|
3138
|
+
* If ``label`` is a ``Callable`` then ``label( name=name, **parameters )`` will be called
|
|
3139
|
+
to generate the actual label.
|
|
3140
|
+
|
|
3141
|
+
See above for examples.
|
|
3142
|
+
|
|
3143
|
+
``label`` cannot be used alongside ``uid``.
|
|
3144
|
+
|
|
3145
|
+
uid : str | Callable
|
|
3146
|
+
Alternative to ``label`` which is assumed to generate a unique cache file name. It has the same
|
|
3147
|
+
semantics as ``label``. When used, parameters to the decorated function are not hashed.
|
|
3148
|
+
|
|
3149
|
+
``uid`` be used alongside ``label``.
|
|
3150
|
+
|
|
3151
|
+
name : str, optional
|
|
3152
|
+
Name of this function which is used either on its own if neither ``label`` not ``uid`` are used.
|
|
3153
|
+
If either of them is used, ``name`` is passed as a parameter to either the callable or the
|
|
3154
|
+
formatting operator.
|
|
3155
|
+
|
|
3156
|
+
If ``name`` is not specified it defaults to ``__qualname__`` expanded
|
|
3157
|
+
by the module name the function is defined in.
|
|
3158
|
+
|
|
2377
3159
|
include_args : list[str]
|
|
2378
|
-
List of arguments to include in generating
|
|
3160
|
+
List of arguments to include in generating an unqiue ID, or ``None`` for all.
|
|
2379
3161
|
|
|
2380
3162
|
exclude_args : list[str]:
|
|
2381
|
-
List of
|
|
3163
|
+
List of arguments to exclude from generating an unique ID.
|
|
2382
3164
|
|
|
2383
3165
|
exclude_arg_types : list[type]
|
|
2384
|
-
List of types to exclude.
|
|
3166
|
+
List of parameter types to exclude from generating an unique ID.
|
|
2385
3167
|
|
|
2386
3168
|
version_auto_class : bool
|
|
2387
|
-
|
|
2388
|
-
|
|
3169
|
+
Whether to automaticallty add version dependencies on base classes or, for member functions, on containing
|
|
3170
|
+
classes. This is the ``auto_class`` parameter for :dec:`cdxcore.version.version`.
|
|
2389
3171
|
|
|
2390
3172
|
Returns
|
|
2391
3173
|
-------
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
Information available at any time after decoration:
|
|
2396
|
-
F.cache_info.name : qualified name of the function
|
|
2397
|
-
F.cache_info.signature : signature of the function
|
|
3174
|
+
Decorated F: Callable
|
|
3175
|
+
|
|
3176
|
+
A decorator ``cache(F)`` whose ``__call__`` implements the cached call to ``F``.
|
|
2398
3177
|
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
3178
|
+
This callable has a member ``cache_info``
|
|
3179
|
+
of type :class:`cdxcore.subdir.CacheInfo`
|
|
3180
|
+
which can be used to access information on caching activity.
|
|
3181
|
+
|
|
3182
|
+
* Information available at any time after decoration:**
|
|
3183
|
+
|
|
3184
|
+
* ``F.cache_info.name`` : qualified name of the function
|
|
3185
|
+
* ``F.cache_info.signature`` : signature of the function
|
|
3186
|
+
|
|
3187
|
+
* Additonal information available during a call to a decorated function F, and thereafter:
|
|
3188
|
+
|
|
3189
|
+
* ``F.cache_info.version`` : unique version string reflecting all dependencies.
|
|
3190
|
+
* ``F.cache_info.filename`` : unique filename used for caching logic during the last function call.
|
|
3191
|
+
* ``F.cache_info.label`` : last label generated, or ``None``.
|
|
3192
|
+
* ``F.cache_info.arguments`` : arguments parsed to create a unique call ID, or ``None``.
|
|
2404
3193
|
|
|
2405
|
-
|
|
2406
|
-
|
|
3194
|
+
* Additonal information available after a call to ``F``:
|
|
3195
|
+
|
|
3196
|
+
* ``F.cache_info.last_cached`` : whether the last function call returned a cached object.
|
|
2407
3197
|
|
|
2408
|
-
The
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
3198
|
+
The decorated ``F()`` has additional function parameters, namely:
|
|
3199
|
+
|
|
3200
|
+
* ``override_cache_mode`` : allows to override caching mode temporarily, in particular you can set it to ``"off"``.
|
|
3201
|
+
* ``track_cached_files`` : allows passing a :class:`cdxcore.subdir.CacheTracker`
|
|
3202
|
+
object to keep track of all
|
|
3203
|
+
files used (loaded from or saved to).
|
|
3204
|
+
The function :meth:`cdxcore.subdir.CacheTracker.delete_cache_files` can be used
|
|
3205
|
+
to delete all files involved in caching.
|
|
2412
3206
|
"""
|
|
2413
3207
|
return CacheCallable(subdir = self,
|
|
2414
3208
|
version = version,
|
|
@@ -2428,14 +3222,153 @@ class SubDir(object):
|
|
|
2428
3222
|
version_auto_class : bool = True
|
|
2429
3223
|
):
|
|
2430
3224
|
"""
|
|
2431
|
-
Short
|
|
2432
|
-
|
|
3225
|
+
Short-cut for :dec:`cdxcore.subdir.SubDir.cache` applied to classes
|
|
3226
|
+
with a reduced number of available parameters.
|
|
3227
|
+
|
|
3228
|
+
Example::
|
|
3229
|
+
|
|
3230
|
+
cache = SubDir("!/.cache")
|
|
3231
|
+
|
|
3232
|
+
@cache.cache_class("0.1")
|
|
3233
|
+
class A(object):
|
|
3234
|
+
|
|
3235
|
+
@cache.cache(exclude_args=['debug'])
|
|
3236
|
+
def __init__(self, x, debug):
|
|
3237
|
+
if debug:
|
|
3238
|
+
print("__init__",x)
|
|
3239
|
+
self.x = x
|
|
3240
|
+
|
|
2433
3241
|
"""
|
|
2434
3242
|
return self.cache( name=name,
|
|
2435
|
-
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
3243
|
+
version=version,
|
|
3244
|
+
dependencies=dependencies,
|
|
3245
|
+
version_auto_class=version_auto_class)
|
|
3246
|
+
|
|
3247
|
+
# ========================================================================
|
|
3248
|
+
# Caching, convenience
|
|
3249
|
+
# ========================================================================
|
|
3250
|
+
|
|
3251
|
+
def VersionedCacheRoot( directory : str, *,
|
|
3252
|
+
ext : str = None,
|
|
3253
|
+
fmt : Format = None,
|
|
3254
|
+
create_directory : bool = False,
|
|
3255
|
+
**controller_kwargs
|
|
3256
|
+
):
|
|
3257
|
+
"""
|
|
3258
|
+
Create a root directory for versioned caching on disk
|
|
3259
|
+
using :dec:`cdxcore.subdir.SubDir.cache`.
|
|
3260
|
+
|
|
3261
|
+
**Usage:**
|
|
3262
|
+
|
|
3263
|
+
In a central file, define a root directory for all caching activity::
|
|
3264
|
+
|
|
3265
|
+
from cdxcore.subdir import VersionedCacheRoot
|
|
3266
|
+
vroot = VersionedCacheRoot("!/cache")
|
|
3267
|
+
|
|
3268
|
+
Create sub-directories as suitable, for example::
|
|
3269
|
+
|
|
3270
|
+
vtest = vroot("test")
|
|
3271
|
+
|
|
3272
|
+
Use these for caching::
|
|
3273
|
+
|
|
3274
|
+
@vtest.cache("1.0")
|
|
3275
|
+
def f1( x=1, y=2 ):
|
|
3276
|
+
print(x,y)
|
|
3277
|
+
|
|
3278
|
+
@vtest.cache("1.0", dps=[f1])
|
|
3279
|
+
def f2( x=1, y=2, z=3 ):
|
|
3280
|
+
f1( x,y )
|
|
3281
|
+
print(z)
|
|
3282
|
+
|
|
3283
|
+
Parameters
|
|
3284
|
+
----------
|
|
3285
|
+
directory : str
|
|
3286
|
+
Name of the root directory for caching.
|
|
3287
|
+
|
|
3288
|
+
Using SubDir the following Short-cuts are supported:
|
|
3289
|
+
|
|
3290
|
+
* ``"!/dir"`` creates ``dir`` in the temporary directory.
|
|
3291
|
+
* ``"~/dir"`` creates ``dir`` in the home directory.
|
|
3292
|
+
* ``"./dir"`` creates ``dir`` relative to the current directory.
|
|
3293
|
+
|
|
3294
|
+
ext : str
|
|
3295
|
+
Extension, which will automatically be appended to file names.
|
|
3296
|
+
The default value depends on ``fmt`; for ``Format.PICKLE`` it is "pck".
|
|
3297
|
+
|
|
3298
|
+
fmt : :class:`cdxcore.subdir.Format`
|
|
3299
|
+
File format; if ``ext`` is not specified, the format drives the extension, too.
|
|
3300
|
+
Default is ``Format.PICKLE``.
|
|
3301
|
+
|
|
3302
|
+
create_directory : bool
|
|
3303
|
+
Whether to create the directory upon creation. Default is ``False``.
|
|
3304
|
+
|
|
3305
|
+
controller_kwargs: dict
|
|
3306
|
+
Parameters passed to :class:`cdxcore.subdir.CacheController``.
|
|
3307
|
+
|
|
3308
|
+
Common parameters used:
|
|
3309
|
+
|
|
3310
|
+
* ``exclude_arg_types``: list of types or names of types to exclude when auto-generating function
|
|
3311
|
+
signatures from function arguments.
|
|
3312
|
+
An example is :class:`cdxcore.verbose.Context` which is used to print progress messages.
|
|
3313
|
+
|
|
3314
|
+
* ``max_filename_length``: maximum filename length.
|
|
3315
|
+
|
|
3316
|
+
* ``hash_length``: length used for hashes, see :class:`cdxcore.uniquehash.UniqueHash`.
|
|
3317
|
+
|
|
3318
|
+
Returns
|
|
3319
|
+
-------
|
|
3320
|
+
Root : SubDir
|
|
3321
|
+
A root directory suitable for caching.
|
|
3322
|
+
"""
|
|
3323
|
+
controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
|
|
3324
|
+
return SubDir( directory=directory, ext=ext, fmt=fmt, create_directory=create_directory, controller=controller )
|
|
3325
|
+
|
|
3326
|
+
version = version_decorator
|
|
3327
|
+
|
|
3328
|
+
class CacheTracker(object):
|
|
3329
|
+
"""
|
|
3330
|
+
Utility class to track caching and be able to delete all dependent objects.
|
|
3331
|
+
"""
|
|
3332
|
+
def __init__(self):
|
|
3333
|
+
""" track cache files """
|
|
3334
|
+
self._files = []
|
|
3335
|
+
def __iadd__(self, new_file):
|
|
3336
|
+
""" Add a new file to the tracker """
|
|
3337
|
+
self._files.append( new_file )
|
|
3338
|
+
def delete_cache_files(self):
|
|
3339
|
+
""" Delete all tracked files """
|
|
3340
|
+
for file in self._files:
|
|
3341
|
+
if os.path.exists(file):
|
|
3342
|
+
os.remove(file)
|
|
3343
|
+
self._files = []
|
|
3344
|
+
def __str__(self) -> str:#NOQA
|
|
3345
|
+
return f"Tracked: {self._files}"
|
|
3346
|
+
def __repr__(self) -> str:#NOQA
|
|
3347
|
+
return f"Tracked: {self._files}"
|
|
3348
|
+
|
|
3349
|
+
class CacheInfo(object):
|
|
3350
|
+
"""
|
|
3351
|
+
Information on cfunctions decorated with :dec:`cdxcore.subdir.SubDir.cache`.
|
|
3352
|
+
|
|
3353
|
+
Functions decorated with :dec:`cdxcore.subdir.SubDir.cache`
|
|
3354
|
+
will have a member ``cache_info`` of this type
|
|
3355
|
+
"""
|
|
3356
|
+
def __init__(self, name, F, keep_last_arguments):
|
|
3357
|
+
"""
|
|
3358
|
+
:meta private:
|
|
3359
|
+
"""
|
|
3360
|
+
self.name = name #: Decoded name of the function.
|
|
3361
|
+
|
|
3362
|
+
self.signature = inspect.signature(F) #: :func:`inspect.signature` of the function.
|
|
3363
|
+
|
|
3364
|
+
self.filename = None #: Unique filename of the last function call.
|
|
3365
|
+
self.label = None #: Label of the last function call.
|
|
3366
|
+
self.version = None #: Last version used.
|
|
3367
|
+
|
|
3368
|
+
self.last_cached = None #: Whether the last function call restored data from disk.
|
|
3369
|
+
|
|
3370
|
+
if keep_last_arguments:
|
|
3371
|
+
self.arguments = None #: Last arguments used. This member is only present if ``keep_last_arguments`` was set to ``True`` for the relevant :class:`cdxcore.subdir.CacheController`.
|
|
2439
3372
|
|
|
2440
3373
|
def _ensure_has_version( F,
|
|
2441
3374
|
version : str = None,
|
|
@@ -2486,8 +3419,9 @@ def _qualified_name( F, name ):
|
|
|
2486
3419
|
|
|
2487
3420
|
class CacheCallable(object):
|
|
2488
3421
|
"""
|
|
2489
|
-
|
|
2490
|
-
|
|
3422
|
+
Wrapper for a cached function.
|
|
3423
|
+
|
|
3424
|
+
This is the wrapper returned by :dec:`cdxcore.subdir.SubDir.cache`.
|
|
2491
3425
|
"""
|
|
2492
3426
|
|
|
2493
3427
|
def __init__(self,
|
|
@@ -2503,8 +3437,9 @@ class CacheCallable(object):
|
|
|
2503
3437
|
version_auto_class : bool = True,
|
|
2504
3438
|
name_of_name_arg : str = "name"):
|
|
2505
3439
|
"""
|
|
2506
|
-
Utility class for SubDir.
|
|
2507
|
-
|
|
3440
|
+
Utility class for :dec:`cdxcore.subdir.SubDir.cache`.
|
|
3441
|
+
|
|
3442
|
+
*Do not use directly.*
|
|
2508
3443
|
"""
|
|
2509
3444
|
if not label is None and not uid is None:
|
|
2510
3445
|
error("Cannot specify both 'label' and 'uid'.")
|
|
@@ -2523,35 +3458,41 @@ class CacheCallable(object):
|
|
|
2523
3458
|
|
|
2524
3459
|
@property
|
|
2525
3460
|
def uid_or_label(self) -> Callable:
|
|
3461
|
+
""" ID or label """
|
|
2526
3462
|
return self._uid if self._label is None else self._label
|
|
2527
3463
|
@property
|
|
2528
3464
|
def unique(self) -> bool:
|
|
3465
|
+
""" Whether the ID is unique """
|
|
2529
3466
|
return not self._uid is None
|
|
2530
|
-
|
|
2531
3467
|
@property
|
|
2532
|
-
def
|
|
2533
|
-
""" Returns the
|
|
2534
|
-
return self._subdir.
|
|
3468
|
+
def cache_controller(self) -> CacheController:
|
|
3469
|
+
""" Returns the :class:`cdxcore.subdir.CacheController` """
|
|
3470
|
+
return self._subdir.cache_controller
|
|
2535
3471
|
@property
|
|
2536
|
-
def cache_mode(self) ->
|
|
2537
|
-
|
|
3472
|
+
def cache_mode(self) -> CacheMode:
|
|
3473
|
+
""" Returns the :class:`cdxcore.subdir.CacheMode` of the underlying :class:`cdxcore.subdir.CacheController` """
|
|
3474
|
+
return self.cache_controller.cache_mode
|
|
2538
3475
|
@property
|
|
2539
3476
|
def debug_verbose(self) -> Context:
|
|
2540
|
-
|
|
3477
|
+
""" Returns the debug :class:`cdxcore.verbose.Context` used to print caching information, or ``None`` """
|
|
3478
|
+
return self.cache_controller.debug_verbose
|
|
2541
3479
|
@property
|
|
2542
|
-
def
|
|
2543
|
-
|
|
3480
|
+
def labelledFileName(self) -> Callable:
|
|
3481
|
+
""" Returns ``labelledFileName()`` of the underlying :class:`cdxcore.subdir.CacheController` """
|
|
3482
|
+
return self.cache_controller.labelledFileName
|
|
2544
3483
|
@property
|
|
2545
|
-
def
|
|
2546
|
-
|
|
3484
|
+
def uniqueFileName(self) -> Callable:
|
|
3485
|
+
""" Returns ``uniqueFileName()`` of the underlying :class:`cdxcore.subdir.CacheController` """
|
|
3486
|
+
return self.cache_controller.uniqueFileName
|
|
2547
3487
|
@property
|
|
2548
3488
|
def global_exclude_arg_types(self) -> list[type]:
|
|
2549
|
-
|
|
3489
|
+
""" Returns ``exclude_arg_types`` of the underlying :class:`cdxcore.subdir.CacheController` """
|
|
3490
|
+
return self.cache_controller.exclude_arg_types
|
|
2550
3491
|
|
|
2551
3492
|
def __call__(self, F : Callable):
|
|
2552
3493
|
"""
|
|
2553
|
-
Decorate
|
|
2554
|
-
See SubDir.cache
|
|
3494
|
+
Decorate ``F`` as cachable callable.
|
|
3495
|
+
See :dec:`cdxcore.subdir.SubDir.cache` for documentation.
|
|
2555
3496
|
"""
|
|
2556
3497
|
if inspect.isclass(F):
|
|
2557
3498
|
if not self._label is None: raise ValueError("'{F.__qualname__}': when decorating a class specify 'label' for __init__, not the class")
|
|
@@ -2566,11 +3507,13 @@ class CacheCallable(object):
|
|
|
2566
3507
|
def _wrap_class(self, C : type):
|
|
2567
3508
|
"""
|
|
2568
3509
|
Wrap class
|
|
3510
|
+
|
|
2569
3511
|
This wrapper:
|
|
2570
|
-
|
|
2571
|
-
|
|
3512
|
+
|
|
3513
|
+
* Assigns a :dec:`cdxcore.version.version` for the class (if not yet present).
|
|
3514
|
+
* Extracts from ``__init__`` the wrapper to decorate`` __new__``.
|
|
2572
3515
|
"""
|
|
2573
|
-
debug_verbose = self.
|
|
3516
|
+
debug_verbose = self.cache_controller.debug_verbose
|
|
2574
3517
|
|
|
2575
3518
|
assert not inspect.isclass(C), ("Not a class", C)
|
|
2576
3519
|
|
|
@@ -2609,8 +3552,7 @@ class CacheCallable(object):
|
|
|
2609
3552
|
"""
|
|
2610
3553
|
Decorate callable 'F'.
|
|
2611
3554
|
"""
|
|
2612
|
-
|
|
2613
|
-
debug_verbose = self.cacheController.debug_verbose
|
|
3555
|
+
debug_verbose = self.cache_controller.debug_verbose
|
|
2614
3556
|
assert not inspect.isclass(F), ("Internal error")
|
|
2615
3557
|
|
|
2616
3558
|
# check validity
|
|
@@ -2712,21 +3654,24 @@ class CacheCallable(object):
|
|
|
2712
3654
|
# determine unique id_ for this function call
|
|
2713
3655
|
# -------------------------------------------
|
|
2714
3656
|
|
|
2715
|
-
label = None
|
|
2716
|
-
uid = None
|
|
2717
3657
|
uid_or_label = self.uid_or_label
|
|
3658
|
+
filename = None
|
|
2718
3659
|
if isinstance(uid_or_label, str) and self.unique:
|
|
2719
|
-
# if 'id' does not contain formatting codes,
|
|
3660
|
+
# if 'id' does not contain formatting codes,
|
|
3661
|
+
# and the result is 'unique' then do not bother collecting
|
|
2720
3662
|
# function arguments
|
|
2721
3663
|
try:
|
|
2722
|
-
|
|
3664
|
+
filename = uid_or_label.format() # throws a KeyError if 'id' contains formatting information
|
|
2723
3665
|
except KeyError:
|
|
2724
3666
|
pass
|
|
2725
3667
|
|
|
2726
|
-
if not
|
|
3668
|
+
if not filename is None:
|
|
2727
3669
|
# generate name with the unique string provided by the user
|
|
2728
|
-
|
|
2729
|
-
|
|
3670
|
+
if not is_filename(filename):
|
|
3671
|
+
raise ValueError(f"The unique filename '{filename}' computed for '{name}' contains invalid characters for filename. When using `uid` make sure that "+\
|
|
3672
|
+
"the returned ID is a valid filename (and unique)")
|
|
3673
|
+
label = filename
|
|
3674
|
+
filename = self.uniqueFileName( filename )
|
|
2730
3675
|
arguments = None
|
|
2731
3676
|
|
|
2732
3677
|
else:
|
|
@@ -2767,9 +3712,9 @@ class CacheCallable(object):
|
|
|
2767
3712
|
if arg in arguments:
|
|
2768
3713
|
del arguments[arg]
|
|
2769
3714
|
|
|
2770
|
-
#
|
|
3715
|
+
# did the user provide a label or unique ID?
|
|
2771
3716
|
if uid_or_label is None:
|
|
2772
|
-
|
|
3717
|
+
uid_or_label = name
|
|
2773
3718
|
|
|
2774
3719
|
else:
|
|
2775
3720
|
if self._name_of_name_arg in arguments:
|
|
@@ -2789,24 +3734,30 @@ class CacheCallable(object):
|
|
|
2789
3734
|
# call format or function
|
|
2790
3735
|
if isinstance( uid_or_label, str ):
|
|
2791
3736
|
try:
|
|
2792
|
-
|
|
3737
|
+
uid_or_label = str.format( uid_or_label, **arguments )
|
|
2793
3738
|
except KeyError as e:
|
|
2794
3739
|
raise KeyError(e, f"Error while generating id for '{name}' using format string '{uid_or_label}': {e}. Available arguments: {list(arguments)}")
|
|
2795
3740
|
|
|
2796
3741
|
else:
|
|
2797
3742
|
which = 'uid' if not self._uid is None else 'label'
|
|
2798
3743
|
try:
|
|
2799
|
-
|
|
3744
|
+
uid_or_label = uid_or_label(**arguments)
|
|
2800
3745
|
except TypeError as e:
|
|
2801
3746
|
raise TypeError(e, f"Error while generating '{which}' for '{name}' using a function: {e}. Available arguments: {list(arguments)}")
|
|
2802
3747
|
except Exception as e:
|
|
2803
3748
|
raise type(e)(f"Error while generating '{which}' for '{name}': attempt to call '{which}' of type {type(uid_or_label)} failed: {e}")
|
|
2804
|
-
assert isinstance(
|
|
3749
|
+
assert isinstance(uid_or_label, str), ("Error:", which, "callable must return a string. Found",type(uid_or_label))
|
|
2805
3750
|
|
|
2806
3751
|
if self.unique:
|
|
2807
|
-
|
|
3752
|
+
if not is_filename(uid_or_label):
|
|
3753
|
+
raise ValueError(f"The unique filename '{uid_or_label}' computed for '{name}' contains invalid characters for filename. When using `uid` make sure that "+\
|
|
3754
|
+
"the returned filename is indeed a valid filename (and unique)")
|
|
3755
|
+
|
|
3756
|
+
label = uid_or_label
|
|
3757
|
+
filename = self.uniqueFileName( uid_or_label )
|
|
2808
3758
|
else:
|
|
2809
|
-
|
|
3759
|
+
label = uid_or_label
|
|
3760
|
+
filename = self.labelledFileName( uid_or_label, **arguments )
|
|
2810
3761
|
|
|
2811
3762
|
# determine version, cache mode
|
|
2812
3763
|
# ------------------
|
|
@@ -2818,11 +3769,11 @@ class CacheCallable(object):
|
|
|
2818
3769
|
# store process information
|
|
2819
3770
|
# -------------------------
|
|
2820
3771
|
|
|
2821
|
-
execute.cache_info.label
|
|
2822
|
-
execute.cache_info.
|
|
2823
|
-
execute.cache_info.version
|
|
3772
|
+
execute.cache_info.label = str(label) if not label is None else None
|
|
3773
|
+
execute.cache_info.filename = filename
|
|
3774
|
+
execute.cache_info.version = version_
|
|
2824
3775
|
|
|
2825
|
-
if self.
|
|
3776
|
+
if self.cache_controller.keep_last_arguments:
|
|
2826
3777
|
info_arguments = OrderedDict()
|
|
2827
3778
|
for argname, argvalue in arguments.items():
|
|
2828
3779
|
info_arguments[argname] = str(argvalue)[:100]
|
|
@@ -2833,26 +3784,26 @@ class CacheCallable(object):
|
|
|
2833
3784
|
# ---------------
|
|
2834
3785
|
|
|
2835
3786
|
if cache_mode.delete:
|
|
2836
|
-
self._subdir.delete(
|
|
3787
|
+
self._subdir.delete( filename )
|
|
2837
3788
|
elif cache_mode.read:
|
|
2838
3789
|
class Tag:
|
|
2839
3790
|
pass
|
|
2840
3791
|
tag = Tag()
|
|
2841
3792
|
if not is_new:
|
|
2842
|
-
r = self._subdir.read(
|
|
3793
|
+
r = self._subdir.read( filename, tag, version=version_ )
|
|
2843
3794
|
else:
|
|
2844
3795
|
try:
|
|
2845
3796
|
execute.__new_during_read = True
|
|
2846
|
-
r = self._subdir.read(
|
|
3797
|
+
r = self._subdir.read( filename, tag, version=version_ )
|
|
2847
3798
|
finally:
|
|
2848
3799
|
execute.__new_during_read = False
|
|
2849
3800
|
|
|
2850
3801
|
if not r is tag:
|
|
2851
3802
|
if not track_cached_files is None:
|
|
2852
|
-
track_cached_files += self._fullFileName(
|
|
3803
|
+
track_cached_files += self._fullFileName(filename)
|
|
2853
3804
|
execute.cache_info.last_cached = True
|
|
2854
3805
|
if not debug_verbose is None:
|
|
2855
|
-
debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.
|
|
3806
|
+
debug_verbose.write(f"cache({name}): read '{label}' version 'version {version_}' from cache '{self._subdir.full_file_name(filename)}'.")
|
|
2856
3807
|
if is_new:
|
|
2857
3808
|
assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__", F.__qualname__, label)
|
|
2858
3809
|
r.__magic_cache_call_init__ = False # since we called __new__, __init__ will be called next
|
|
@@ -2871,9 +3822,9 @@ class CacheCallable(object):
|
|
|
2871
3822
|
assert r.__magic_cache_call_init__ is None, ("**** Internal error. __init__ should reset __magic_cache_call_init__")
|
|
2872
3823
|
|
|
2873
3824
|
if cache_mode.write:
|
|
2874
|
-
self._subdir.write(
|
|
3825
|
+
self._subdir.write(filename,r,version=version_)
|
|
2875
3826
|
if not track_cached_files is None:
|
|
2876
|
-
track_cached_files += self._subdir.
|
|
3827
|
+
track_cached_files += self._subdir.full_file_name(filename)
|
|
2877
3828
|
execute.cache_info.last_cached = False
|
|
2878
3829
|
|
|
2879
3830
|
if is_new:
|
|
@@ -2883,81 +3834,21 @@ class CacheCallable(object):
|
|
|
2883
3834
|
|
|
2884
3835
|
if not debug_verbose is None:
|
|
2885
3836
|
if cache_mode.write:
|
|
2886
|
-
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.
|
|
3837
|
+
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' and wrote result into '{self._subdir.full_file_name(filename)}'.")
|
|
2887
3838
|
else:
|
|
2888
|
-
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.
|
|
3839
|
+
debug_verbose.write(f"cache({name}): called '{label}' version 'version {version_}' but did *not* write into '{self._subdir.full_file_name(filename)}'.")
|
|
2889
3840
|
return r
|
|
2890
3841
|
|
|
2891
3842
|
update_wrapper( wrapper=execute, wrapped=F )
|
|
2892
|
-
execute.cache_info = CacheInfo()
|
|
2893
|
-
|
|
2894
|
-
execute.cache_info.name = name # decoded name of the function
|
|
2895
|
-
execute.cache_info.signature = inspect.signature(F) # signature of the function
|
|
2896
|
-
|
|
2897
|
-
execute.cache_info.uid = None # last function call ID
|
|
2898
|
-
execute.cache_info.label = None # last unique file name cached to
|
|
2899
|
-
execute.cache_info.version = None # last version used
|
|
2900
|
-
|
|
2901
|
-
execute.cache_info.last_cached = None # last function call restored from disk?
|
|
2902
|
-
|
|
2903
|
-
if self.cacheController.keep_last_arguments:
|
|
2904
|
-
execute.cache_info.arguments = None # last function call arguments dictionary of strings
|
|
3843
|
+
execute.cache_info = CacheInfo(name, F, self.cache_controller.keep_last_arguments)
|
|
2905
3844
|
|
|
2906
3845
|
if is_new:
|
|
2907
3846
|
execute.__new_during_read = False
|
|
2908
3847
|
|
|
2909
3848
|
if not debug_verbose is None:
|
|
2910
3849
|
debug_verbose.write(f"cache({name}): {'function' if not is_new else 'class constructor function'} registered for caching into '{self._subdir.path}'.")
|
|
2911
|
-
self.
|
|
3850
|
+
self.cache_controller.versioned[name] = execute
|
|
2912
3851
|
return execute
|
|
2913
3852
|
|
|
2914
|
-
def VersionedCacheRoot( directory : str, *,
|
|
2915
|
-
ext : str = None,
|
|
2916
|
-
fmt : Format = None,
|
|
2917
|
-
createDirectory : bool = None,
|
|
2918
|
-
**controller_kwargs
|
|
2919
|
-
):
|
|
2920
|
-
"""
|
|
2921
|
-
Create a root directory for versioning caching on disk
|
|
2922
|
-
|
|
2923
|
-
Usage:
|
|
2924
|
-
In a central file, define a root directory
|
|
2925
|
-
vroot = VersionedCacheRoot("!/cache")
|
|
2926
3853
|
|
|
2927
|
-
and a sub-directory
|
|
2928
|
-
vtest = vroot("test")
|
|
2929
|
-
|
|
2930
|
-
@vtest.cache("1.0")
|
|
2931
|
-
def f1( x=1, y=2 ):
|
|
2932
|
-
print(x,y)
|
|
2933
|
-
|
|
2934
|
-
@vtest.cache("1.0", dps=[f1])
|
|
2935
|
-
def f2( x=1, y=2, z=3 ):
|
|
2936
|
-
f1( x,y )
|
|
2937
|
-
print(z)
|
|
2938
|
-
|
|
2939
|
-
Parameters
|
|
2940
|
-
----------
|
|
2941
|
-
directory : name of the directory. Using SubDir the following short cuts are supported:
|
|
2942
|
-
"!/dir" creates 'dir' in the temporary directory
|
|
2943
|
-
"~/dir" creates 'dir' in the home directory
|
|
2944
|
-
"./dir" created 'dir' relative to the current directory
|
|
2945
|
-
ext : extension, which will automatically be appended to file names (see SubDir). Default depends on format. For Format.PICKLE it is 'pck'
|
|
2946
|
-
fmt : format, see SubDir.Format. Default is Format.PICKLE
|
|
2947
|
-
createDirectory : whether to create the directory upon creation. Default is no.
|
|
2948
|
-
controller_kwargs: parameters passed to VersionController, for example:
|
|
2949
|
-
exclude_arg_types : list of types or names of types to exclude when auto-generating function signatures from function arguments.
|
|
2950
|
-
A standard example from cdxbasics is "Context" as it is used to print progress messages.
|
|
2951
|
-
max_filename_length : maximum filename length
|
|
2952
|
-
hash_length: length used for hashes, see cdxbasics.util.uniqueHash()
|
|
2953
|
-
|
|
2954
|
-
Returns
|
|
2955
|
-
-------
|
|
2956
|
-
A root cache directory
|
|
2957
|
-
"""
|
|
2958
|
-
controller = CacheController(**controller_kwargs) if len(controller_kwargs) > 0 else None
|
|
2959
|
-
return SubDir( directory=directory, ext=ext, fmt=fmt, createDirectory=createDirectory, controller=controller )
|
|
2960
3854
|
|
|
2961
|
-
version = version_decorator
|
|
2962
|
-
|
|
2963
|
-
|