rbx.cp 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rbx/__init__.py +0 -0
- rbx/annotations.py +127 -0
- rbx/autoenum.py +333 -0
- rbx/box/__init__.py +0 -0
- rbx/box/builder.py +77 -0
- rbx/box/cd.py +37 -0
- rbx/box/checkers.py +134 -0
- rbx/box/code.py +185 -0
- rbx/box/compile.py +56 -0
- rbx/box/conftest.py +42 -0
- rbx/box/contest/__init__.py +0 -0
- rbx/box/contest/build_contest_statements.py +347 -0
- rbx/box/contest/contest_package.py +76 -0
- rbx/box/contest/contest_utils.py +20 -0
- rbx/box/contest/main.py +179 -0
- rbx/box/contest/schema.py +155 -0
- rbx/box/contest/statements.py +82 -0
- rbx/box/creation.py +72 -0
- rbx/box/download.py +64 -0
- rbx/box/environment.py +345 -0
- rbx/box/extensions.py +26 -0
- rbx/box/generators.py +478 -0
- rbx/box/generators_test.py +63 -0
- rbx/box/main.py +449 -0
- rbx/box/package.py +316 -0
- rbx/box/packaging/boca/extension.py +27 -0
- rbx/box/packaging/boca/packager.py +245 -0
- rbx/box/packaging/contest_main.py +82 -0
- rbx/box/packaging/main.py +68 -0
- rbx/box/packaging/packager.py +117 -0
- rbx/box/packaging/polygon/packager.py +320 -0
- rbx/box/packaging/polygon/test.py +81 -0
- rbx/box/packaging/polygon/xml_schema.py +106 -0
- rbx/box/presets/__init__.py +503 -0
- rbx/box/presets/fetch.py +70 -0
- rbx/box/presets/lock_schema.py +20 -0
- rbx/box/presets/schema.py +59 -0
- rbx/box/schema.py +394 -0
- rbx/box/solutions.py +792 -0
- rbx/box/solutions_test.py +41 -0
- rbx/box/statements/__init__.py +0 -0
- rbx/box/statements/build_statements.py +359 -0
- rbx/box/statements/builders.py +375 -0
- rbx/box/statements/joiners.py +113 -0
- rbx/box/statements/latex.py +47 -0
- rbx/box/statements/latex_jinja.py +214 -0
- rbx/box/statements/schema.py +138 -0
- rbx/box/stresses.py +292 -0
- rbx/box/stressing/__init__.py +0 -0
- rbx/box/stressing/finder_parser.py +359 -0
- rbx/box/stressing/generator_parser.py +258 -0
- rbx/box/testcases.py +54 -0
- rbx/box/ui/__init__.py +0 -0
- rbx/box/ui/captured_log.py +372 -0
- rbx/box/ui/css/app.tcss +48 -0
- rbx/box/ui/main.py +38 -0
- rbx/box/ui/run.py +209 -0
- rbx/box/validators.py +245 -0
- rbx/box/validators_test.py +15 -0
- rbx/checker.py +128 -0
- rbx/clone.py +197 -0
- rbx/config.py +271 -0
- rbx/conftest.py +38 -0
- rbx/console.py +27 -0
- rbx/create.py +37 -0
- rbx/edit.py +24 -0
- rbx/grading/__init__.py +0 -0
- rbx/grading/caching.py +356 -0
- rbx/grading/conftest.py +33 -0
- rbx/grading/judge/__init__.py +0 -0
- rbx/grading/judge/cacher.py +503 -0
- rbx/grading/judge/digester.py +35 -0
- rbx/grading/judge/sandbox.py +748 -0
- rbx/grading/judge/sandboxes/__init__.py +0 -0
- rbx/grading/judge/sandboxes/isolate.py +683 -0
- rbx/grading/judge/sandboxes/stupid_sandbox.py +310 -0
- rbx/grading/judge/sandboxes/timeit.py +217 -0
- rbx/grading/judge/storage.py +284 -0
- rbx/grading/judge/test.py +38 -0
- rbx/grading/judge/testiso.py +54 -0
- rbx/grading/steps.py +522 -0
- rbx/grading/steps_with_caching.py +59 -0
- rbx/grading/steps_with_caching_run_test.py +429 -0
- rbx/grading_utils.py +148 -0
- rbx/hydration.py +101 -0
- rbx/main.py +122 -0
- rbx/metadata.py +105 -0
- rbx/providers/__init__.py +43 -0
- rbx/providers/codeforces.py +73 -0
- rbx/providers/provider.py +26 -0
- rbx/resources/checkers/boilerplate.cpp +20 -0
- rbx/resources/default_config.json +48 -0
- rbx/resources/envs/default.rbx.yml +37 -0
- rbx/resources/envs/isolate.rbx.yml +37 -0
- rbx/resources/packagers/boca/checker.sh +43 -0
- rbx/resources/packagers/boca/compare +53 -0
- rbx/resources/packagers/boca/compile/c +172 -0
- rbx/resources/packagers/boca/compile/cc +173 -0
- rbx/resources/packagers/boca/compile/cpp +172 -0
- rbx/resources/packagers/boca/compile/java +194 -0
- rbx/resources/packagers/boca/compile/kt +155 -0
- rbx/resources/packagers/boca/compile/pas +172 -0
- rbx/resources/packagers/boca/compile/py2 +173 -0
- rbx/resources/packagers/boca/compile/py3 +173 -0
- rbx/resources/packagers/boca/run/c +128 -0
- rbx/resources/packagers/boca/run/cc +128 -0
- rbx/resources/packagers/boca/run/cpp +128 -0
- rbx/resources/packagers/boca/run/java +194 -0
- rbx/resources/packagers/boca/run/kt +159 -0
- rbx/resources/packagers/boca/run/py2 +166 -0
- rbx/resources/packagers/boca/run/py3 +166 -0
- rbx/resources/presets/default/contest/contest.rbx.yml +14 -0
- rbx/resources/presets/default/contest/statement/contest.rbx.tex +97 -0
- rbx/resources/presets/default/contest/statement/olymp.sty +250 -0
- rbx/resources/presets/default/contest/statement/template.rbx.tex +42 -0
- rbx/resources/presets/default/preset.rbx.yml +12 -0
- rbx/resources/presets/default/problem/.gitignore +6 -0
- rbx/resources/presets/default/problem/gen.cpp +9 -0
- rbx/resources/presets/default/problem/problem.rbx.yml +44 -0
- rbx/resources/presets/default/problem/random.py +3 -0
- rbx/resources/presets/default/problem/random.txt +2 -0
- rbx/resources/presets/default/problem/sols/main.cpp +9 -0
- rbx/resources/presets/default/problem/sols/slow.cpp +15 -0
- rbx/resources/presets/default/problem/sols/wa.cpp +9 -0
- rbx/resources/presets/default/problem/statement/olymp.sty +250 -0
- rbx/resources/presets/default/problem/statement/projecao.png +0 -0
- rbx/resources/presets/default/problem/statement/statement.rbx.tex +18 -0
- rbx/resources/presets/default/problem/statement/template.rbx.tex +89 -0
- rbx/resources/presets/default/problem/tests/samples/000.in +1 -0
- rbx/resources/presets/default/problem/tests/samples/001.in +1 -0
- rbx/resources/presets/default/problem/validator.cpp +16 -0
- rbx/resources/presets/default/problem/wcmp.cpp +34 -0
- rbx/resources/templates/template.cpp +19 -0
- rbx/run.py +45 -0
- rbx/schema.py +64 -0
- rbx/submit.py +61 -0
- rbx/submitors/__init__.py +18 -0
- rbx/submitors/codeforces.py +120 -0
- rbx/submitors/submitor.py +25 -0
- rbx/test.py +347 -0
- rbx/testcase.py +70 -0
- rbx/testcase_rendering.py +79 -0
- rbx/testdata/box1/gen1.cpp +7 -0
- rbx/testdata/box1/gen2.cpp +9 -0
- rbx/testdata/box1/genScript.py +2 -0
- rbx/testdata/box1/hard-tle.sol.cpp +26 -0
- rbx/testdata/box1/ole.cpp +17 -0
- rbx/testdata/box1/problem.rbx.yml +39 -0
- rbx/testdata/box1/re.sol.cpp +23 -0
- rbx/testdata/box1/sol.cpp +22 -0
- rbx/testdata/box1/tests/1.in +1 -0
- rbx/testdata/box1/tle-and-incorrect.sol.cpp +33 -0
- rbx/testdata/box1/tle.sol.cpp +35 -0
- rbx/testdata/box1/validator.cpp +11 -0
- rbx/testdata/box1/wa.sol.cpp +22 -0
- rbx/testdata/caching/executable.py +1 -0
- rbx/testdata/compatible +0 -0
- rbx/testing_utils.py +65 -0
- rbx/utils.py +162 -0
- rbx_cp-0.5.0.dist-info/LICENSE +201 -0
- rbx_cp-0.5.0.dist-info/METADATA +89 -0
- rbx_cp-0.5.0.dist-info/RECORD +164 -0
- rbx_cp-0.5.0.dist-info/WHEEL +4 -0
- rbx_cp-0.5.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,503 @@
|
|
1
|
+
import atexit
|
2
|
+
import fcntl
|
3
|
+
import io
|
4
|
+
import logging
|
5
|
+
import os
|
6
|
+
import pathlib
|
7
|
+
import shutil
|
8
|
+
import tempfile
|
9
|
+
import typing
|
10
|
+
from typing import IO, List, Optional
|
11
|
+
|
12
|
+
import gevent
|
13
|
+
|
14
|
+
from rbx.grading.judge import digester, storage
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
class TombstoneError(RuntimeError):
|
20
|
+
"""An error that represents the file cacher trying to read
|
21
|
+
files that have been deleted from the database.
|
22
|
+
|
23
|
+
"""
|
24
|
+
|
25
|
+
pass
|
26
|
+
|
27
|
+
|
28
|
+
class FileCacher:
|
29
|
+
"""This class implement a local cache for files stored as FSObject
|
30
|
+
in the database.
|
31
|
+
|
32
|
+
"""
|
33
|
+
|
34
|
+
# This value is very arbitrary, and in this case we want it to be a
|
35
|
+
# one-size-fits-all, since we use it for many conversions. It has
|
36
|
+
# been chosen arbitrarily based on performance tests on my machine.
|
37
|
+
# A few consideration on the value it could assume follow:
|
38
|
+
# - The page size of large objects is LOBLKSIZE, which is BLCKSZ/4
|
39
|
+
# (BLCKSZ is the block size of the PostgreSQL database, which is
|
40
|
+
# set during pre-build configuration). BLCKSZ is by default 8192,
|
41
|
+
# therefore LOBLKSIZE is 2048. See:
|
42
|
+
# http://www.postgresql.org/docs/9.0/static/catalog-pg-largeobject.html
|
43
|
+
# - The `io' module defines a DEFAULT_BUFFER_SIZE constant, whose
|
44
|
+
# value is 8192.
|
45
|
+
# CHUNK_SIZE should be a multiple of these values.
|
46
|
+
CHUNK_SIZE = 1024 * 1024 # 1 MiB
|
47
|
+
|
48
|
+
backend: storage.Storage
|
49
|
+
shared: bool
|
50
|
+
file_dir: pathlib.Path
|
51
|
+
temp_dir: pathlib.Path
|
52
|
+
folder: Optional[pathlib.Path]
|
53
|
+
|
54
|
+
def __init__(
|
55
|
+
self,
|
56
|
+
backend: storage.Storage,
|
57
|
+
shared: bool = False,
|
58
|
+
folder: Optional[pathlib.Path] = None,
|
59
|
+
):
|
60
|
+
"""Initialize."""
|
61
|
+
|
62
|
+
self.backend = backend
|
63
|
+
self.shared = shared
|
64
|
+
self.folder = folder
|
65
|
+
|
66
|
+
# First we create the config directories.
|
67
|
+
if folder:
|
68
|
+
self._create_directory_or_die(folder)
|
69
|
+
|
70
|
+
if not self.is_shared():
|
71
|
+
self.file_dir = pathlib.Path(tempfile.mkdtemp())
|
72
|
+
# Delete this directory on exit since it has a random name and
|
73
|
+
# won't be used again.
|
74
|
+
atexit.register(lambda: shutil.rmtree(str(self.file_dir)))
|
75
|
+
else:
|
76
|
+
assert folder is not None
|
77
|
+
self.file_dir = folder / 'fs-cache-shared'
|
78
|
+
self._create_directory_or_die(self.file_dir)
|
79
|
+
|
80
|
+
# Temp dir must be a subdirectory of file_dir to avoid cross-filesystem
|
81
|
+
# moves.
|
82
|
+
self.temp_dir = pathlib.Path(
|
83
|
+
tempfile.mkdtemp(dir=self.file_dir, prefix='_temp')
|
84
|
+
)
|
85
|
+
atexit.register(lambda: shutil.rmtree(str(self.temp_dir)))
|
86
|
+
# Just to make sure it was created.
|
87
|
+
|
88
|
+
def is_shared(self) -> bool:
|
89
|
+
"""Return whether the cache directory is shared with other services."""
|
90
|
+
return self.shared
|
91
|
+
|
92
|
+
@staticmethod
|
93
|
+
def _create_directory_or_die(directory: pathlib.Path):
|
94
|
+
"""Create directory and ensure it exists, or raise a RuntimeError."""
|
95
|
+
directory.mkdir(parents=True, exist_ok=True)
|
96
|
+
|
97
|
+
def precache_lock(self) -> Optional[IO[bytes]]:
|
98
|
+
"""Lock the (shared) cache for precaching if it is currently unlocked.
|
99
|
+
|
100
|
+
Locking is optional: Any process can perform normal cache operations
|
101
|
+
at any time whether the cache is locked or not.
|
102
|
+
|
103
|
+
The locking mechanism's only purpose is to avoid wasting resources by
|
104
|
+
ensuring that on each machine, only one worker precaches at any time.
|
105
|
+
|
106
|
+
return (fileobj|None): The lock file if the cache was previously
|
107
|
+
unlocked. Closing the file object will release the lock.
|
108
|
+
None if the cache was already locked.
|
109
|
+
|
110
|
+
"""
|
111
|
+
lock_file = self.file_dir / 'cache_lock'
|
112
|
+
fobj = lock_file.open('wb')
|
113
|
+
returned = False
|
114
|
+
try:
|
115
|
+
fcntl.flock(fobj, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
116
|
+
except BlockingIOError:
|
117
|
+
# This exception is raised only if the errno is EWOULDBLOCK,
|
118
|
+
# which means that the file is already locked.
|
119
|
+
return None
|
120
|
+
else:
|
121
|
+
returned = True
|
122
|
+
return fobj
|
123
|
+
finally:
|
124
|
+
if not returned:
|
125
|
+
fobj.close()
|
126
|
+
|
127
|
+
def _load(self, digest: str, cache_only: bool) -> Optional[IO[bytes]]:
|
128
|
+
"""Load a file into the cache and open it for reading.
|
129
|
+
|
130
|
+
cache_only (bool): don't open the file for reading.
|
131
|
+
|
132
|
+
return (fileobj): a readable binary file-like object from which
|
133
|
+
to read the contents of the file (None if cache_only is True).
|
134
|
+
|
135
|
+
raise (KeyError): if the file cannot be found.
|
136
|
+
|
137
|
+
"""
|
138
|
+
cache_file_path = self.file_dir / digest
|
139
|
+
|
140
|
+
if cache_only:
|
141
|
+
if cache_file_path.exists():
|
142
|
+
return None
|
143
|
+
else:
|
144
|
+
try:
|
145
|
+
return cache_file_path.open('rb')
|
146
|
+
except FileNotFoundError:
|
147
|
+
pass
|
148
|
+
|
149
|
+
logger.debug('File %s not in cache, downloading ' 'from database.', digest)
|
150
|
+
|
151
|
+
ftmp_handle, temp_file_path = tempfile.mkstemp(dir=self.temp_dir, text=False)
|
152
|
+
temp_file_path = pathlib.Path(temp_file_path)
|
153
|
+
with open(ftmp_handle, 'wb') as ftmp, self.backend.get_file(digest) as fobj:
|
154
|
+
storage.copyfileobj(fobj, ftmp, self.CHUNK_SIZE)
|
155
|
+
|
156
|
+
if not cache_only:
|
157
|
+
# We allow anyone to delete files from the cache directory
|
158
|
+
# self.file_dir at any time. Hence, cache_file_path might no
|
159
|
+
# longer exist an instant after we create it. Opening the
|
160
|
+
# temporary file before renaming it circumvents this issue.
|
161
|
+
# (Note that the temporary file may not be manually deleted!)
|
162
|
+
fd = temp_file_path.open('rb')
|
163
|
+
|
164
|
+
# Then move it to its real location (this operation is atomic
|
165
|
+
# by POSIX requirement)
|
166
|
+
temp_file_path.rename(cache_file_path)
|
167
|
+
|
168
|
+
logger.debug('File %s downloaded.', digest)
|
169
|
+
|
170
|
+
if not cache_only:
|
171
|
+
return fd
|
172
|
+
|
173
|
+
def cache_file(self, digest: str):
|
174
|
+
"""Load a file into the cache.
|
175
|
+
|
176
|
+
Ask the backend to provide the file and store it in the cache for the
|
177
|
+
benefit of future accesses, unless the file is already cached.
|
178
|
+
Note that the cached file might still be deleted at any time, so it
|
179
|
+
cannot be assumed to actually exist after this function completes.
|
180
|
+
Always use the get_file* functions to access a file.
|
181
|
+
|
182
|
+
digest (unicode): the digest of the file to get.
|
183
|
+
|
184
|
+
raise (KeyError): if the file cannot be found.
|
185
|
+
raise (TombstoneError): if the digest is the tombstone
|
186
|
+
|
187
|
+
"""
|
188
|
+
if digest == storage.TOMBSTONE:
|
189
|
+
raise TombstoneError()
|
190
|
+
|
191
|
+
self._load(digest, True)
|
192
|
+
|
193
|
+
def get_file(self, digest: str) -> IO[bytes]:
|
194
|
+
"""Retrieve a file from the storage.
|
195
|
+
|
196
|
+
If it's available in the cache use that copy, without querying
|
197
|
+
the backend. Otherwise ask the backend to provide it, and store
|
198
|
+
it in the cache for the benefit of future accesses.
|
199
|
+
|
200
|
+
The file is returned as a file-object. Other interfaces are
|
201
|
+
available as `get_file_content', `get_file_to_fobj' and `get_
|
202
|
+
file_to_path'.
|
203
|
+
|
204
|
+
digest (unicode): the digest of the file to get.
|
205
|
+
|
206
|
+
return (fileobj): a readable binary file-like object from which
|
207
|
+
to read the contents of the file.
|
208
|
+
|
209
|
+
raise (KeyError): if the file cannot be found.
|
210
|
+
raise (TombstoneError): if the digest is the tombstone
|
211
|
+
|
212
|
+
"""
|
213
|
+
if digest == storage.TOMBSTONE:
|
214
|
+
raise TombstoneError()
|
215
|
+
|
216
|
+
logger.debug('Getting file %s.', digest)
|
217
|
+
|
218
|
+
return typing.cast(IO[bytes], self._load(digest, False))
|
219
|
+
|
220
|
+
def path_for_symlink(self, digest: str) -> Optional[pathlib.Path]:
|
221
|
+
if digest == storage.TOMBSTONE:
|
222
|
+
raise TombstoneError()
|
223
|
+
|
224
|
+
logger.debug('Getting symlink file path %s.', digest)
|
225
|
+
return self.backend.path_for_symlink(digest)
|
226
|
+
|
227
|
+
def get_file_content(self, digest: str) -> bytes:
|
228
|
+
"""Retrieve a file from the storage.
|
229
|
+
|
230
|
+
See `get_file'. This method returns the content of the file, as
|
231
|
+
a binary string.
|
232
|
+
|
233
|
+
digest (unicode): the digest of the file to get.
|
234
|
+
|
235
|
+
return (bytes): the content of the retrieved file.
|
236
|
+
|
237
|
+
raise (KeyError): if the file cannot be found.
|
238
|
+
raise (TombstoneError): if the digest is the tombstone
|
239
|
+
|
240
|
+
"""
|
241
|
+
if digest == storage.TOMBSTONE:
|
242
|
+
raise TombstoneError()
|
243
|
+
with self.get_file(digest) as src:
|
244
|
+
return src.read()
|
245
|
+
|
246
|
+
def get_file_to_fobj(self, digest: str, dst: IO[bytes]):
|
247
|
+
"""Retrieve a file from the storage.
|
248
|
+
|
249
|
+
See `get_file'. This method will write the content of the file
|
250
|
+
to the given file-object.
|
251
|
+
|
252
|
+
digest (unicode): the digest of the file to get.
|
253
|
+
dst (fileobj): a writable binary file-like object on which to
|
254
|
+
write the contents of the file.
|
255
|
+
|
256
|
+
raise (KeyError): if the file cannot be found.
|
257
|
+
raise (TombstoneError): if the digest is the tombstone
|
258
|
+
|
259
|
+
"""
|
260
|
+
if digest == storage.TOMBSTONE:
|
261
|
+
raise TombstoneError()
|
262
|
+
with self.get_file(digest) as src:
|
263
|
+
storage.copyfileobj(src, dst, self.CHUNK_SIZE)
|
264
|
+
|
265
|
+
def get_file_to_path(self, digest: str, dst_path: pathlib.Path):
|
266
|
+
"""Retrieve a file from the storage.
|
267
|
+
|
268
|
+
See `get_file'. This method will write the content of a file
|
269
|
+
to the given file-system location.
|
270
|
+
|
271
|
+
digest (unicode): the digest of the file to get.
|
272
|
+
dst_path (string): an accessible location on the file-system on
|
273
|
+
which to write the contents of the file.
|
274
|
+
|
275
|
+
raise (KeyError): if the file cannot be found.
|
276
|
+
|
277
|
+
"""
|
278
|
+
if digest == storage.TOMBSTONE:
|
279
|
+
raise TombstoneError()
|
280
|
+
with self.get_file(digest) as src:
|
281
|
+
with dst_path.open('wb') as dst:
|
282
|
+
storage.copyfileobj(src, dst, self.CHUNK_SIZE)
|
283
|
+
|
284
|
+
def put_file_from_fobj(self, src: IO[bytes], desc: str = '') -> str:
|
285
|
+
"""Store a file in the storage.
|
286
|
+
|
287
|
+
If it's already (for some reason...) in the cache send that
|
288
|
+
copy to the backend. Otherwise store it in the file-system
|
289
|
+
cache first.
|
290
|
+
|
291
|
+
The file is obtained from a file-object. Other interfaces are
|
292
|
+
available as `put_file_content', `put_file_from_path'.
|
293
|
+
|
294
|
+
src (fileobj): a readable binary file-like object from which
|
295
|
+
to read the contents of the file.
|
296
|
+
desc (unicode): the (optional) description to associate to the
|
297
|
+
file.
|
298
|
+
|
299
|
+
return (unicode): the digest of the stored file.
|
300
|
+
|
301
|
+
"""
|
302
|
+
logger.debug('Reading input file to store on the database.')
|
303
|
+
|
304
|
+
# Unfortunately, we have to read the whole file-obj to compute
|
305
|
+
# the digest but we take that chance to save it to a temporary
|
306
|
+
# path so that we then just need to move it. Hoping that both
|
307
|
+
# locations will be on the same filesystem, that should be way
|
308
|
+
# faster than reading the whole file-obj again (as it could be
|
309
|
+
# compressed or require network communication).
|
310
|
+
# XXX We're *almost* reimplementing copyfileobj.
|
311
|
+
with tempfile.NamedTemporaryFile(
|
312
|
+
'wb', delete=False, dir=str(self.temp_dir)
|
313
|
+
) as dst:
|
314
|
+
d = digester.Digester()
|
315
|
+
buf = src.read(self.CHUNK_SIZE)
|
316
|
+
while len(buf) > 0:
|
317
|
+
d.update(buf)
|
318
|
+
while len(buf) > 0:
|
319
|
+
written = dst.write(buf)
|
320
|
+
# Cooperative yield.
|
321
|
+
gevent.sleep(0)
|
322
|
+
if written is None:
|
323
|
+
break
|
324
|
+
buf = buf[written:]
|
325
|
+
buf = src.read(self.CHUNK_SIZE)
|
326
|
+
digest = d.digest()
|
327
|
+
dst.flush()
|
328
|
+
|
329
|
+
logger.debug('File has digest %s.', digest)
|
330
|
+
|
331
|
+
cache_file_path = self.file_dir / digest
|
332
|
+
|
333
|
+
# Store the file in the backend. We do that even if the file
|
334
|
+
# was already in the cache
|
335
|
+
# because there's a (small) chance that the file got removed
|
336
|
+
# from the backend but somehow remained in the cache.
|
337
|
+
# We read from the temporary file before moving it to
|
338
|
+
# cache_file_path because the latter might be deleted before
|
339
|
+
# we get a chance to open it.
|
340
|
+
with open(dst.name, 'rb') as src:
|
341
|
+
pending_file = self.backend.create_file(digest)
|
342
|
+
if pending_file is not None:
|
343
|
+
storage.copyfileobj(src, pending_file.fd, self.CHUNK_SIZE)
|
344
|
+
self.backend.commit_file(pending_file, desc)
|
345
|
+
|
346
|
+
os.rename(dst.name, cache_file_path)
|
347
|
+
|
348
|
+
return digest
|
349
|
+
|
350
|
+
def put_file_content(self, content: bytes, desc: str = '') -> str:
|
351
|
+
"""Store a file in the storage.
|
352
|
+
|
353
|
+
See `put_file_from_fobj'. This method will read the content of
|
354
|
+
the file from the given binary string.
|
355
|
+
|
356
|
+
content (bytes): the content of the file to store.
|
357
|
+
desc (unicode): the (optional) description to associate to the
|
358
|
+
file.
|
359
|
+
|
360
|
+
return (unicode): the digest of the stored file.
|
361
|
+
|
362
|
+
"""
|
363
|
+
with io.BytesIO(content) as src:
|
364
|
+
return self.put_file_from_fobj(src, desc)
|
365
|
+
|
366
|
+
def put_file_text(self, text: str, desc: str = '') -> str:
|
367
|
+
return self.put_file_content(text.encode('utf-8'), desc)
|
368
|
+
|
369
|
+
def put_file_from_path(self, src_path: pathlib.Path, desc: str = '') -> str:
|
370
|
+
"""Store a file in the storage.
|
371
|
+
|
372
|
+
See `put_file_from_fobj'. This method will read the content of
|
373
|
+
the file from the given file-system location.
|
374
|
+
|
375
|
+
src_path (Path): an accessible location on the file-system
|
376
|
+
from which to read the contents of the file.
|
377
|
+
desc (unicode): the (optional) description to associate to the
|
378
|
+
file.
|
379
|
+
|
380
|
+
return (unicode): the digest of the stored file.
|
381
|
+
|
382
|
+
"""
|
383
|
+
with src_path.open('rb') as src:
|
384
|
+
return self.put_file_from_fobj(src, desc)
|
385
|
+
|
386
|
+
def describe(self, digest: str) -> str:
|
387
|
+
"""Return the description of a file given its digest.
|
388
|
+
|
389
|
+
digest (unicode): the digest of the file to describe.
|
390
|
+
|
391
|
+
return (unicode): the description of the file.
|
392
|
+
|
393
|
+
raise (KeyError): if the file cannot be found.
|
394
|
+
|
395
|
+
"""
|
396
|
+
if digest == storage.TOMBSTONE:
|
397
|
+
raise TombstoneError()
|
398
|
+
return self.backend.describe(digest)
|
399
|
+
|
400
|
+
def get_size(self, digest: str) -> int:
|
401
|
+
"""Return the size of a file given its digest.
|
402
|
+
|
403
|
+
digest (unicode): the digest of the file to calculate the size
|
404
|
+
of.
|
405
|
+
|
406
|
+
return (int): the size of the file, in bytes.
|
407
|
+
|
408
|
+
raise (KeyError): if the file cannot be found.
|
409
|
+
raise (TombstoneError): if the digest is the tombstone
|
410
|
+
|
411
|
+
"""
|
412
|
+
if digest == storage.TOMBSTONE:
|
413
|
+
raise TombstoneError()
|
414
|
+
return self.backend.get_size(digest)
|
415
|
+
|
416
|
+
def delete(self, digest: str):
|
417
|
+
"""Delete a file from the backend and the local cache.
|
418
|
+
|
419
|
+
digest (unicode): the digest of the file to delete.
|
420
|
+
|
421
|
+
"""
|
422
|
+
if digest == storage.TOMBSTONE:
|
423
|
+
return
|
424
|
+
self.drop(digest)
|
425
|
+
self.backend.delete(digest)
|
426
|
+
|
427
|
+
def drop(self, digest):
|
428
|
+
"""Delete a file only from the local cache.
|
429
|
+
|
430
|
+
digest (unicode): the file to delete.
|
431
|
+
|
432
|
+
"""
|
433
|
+
if digest == storage.TOMBSTONE:
|
434
|
+
return
|
435
|
+
cache_file_path: pathlib.Path = self.file_dir / digest
|
436
|
+
cache_file_path.unlink(missing_ok=True)
|
437
|
+
|
438
|
+
def purge_cache(self):
|
439
|
+
"""Empty the local cache.
|
440
|
+
|
441
|
+
This function must not be called if the cache directory is shared.
|
442
|
+
|
443
|
+
"""
|
444
|
+
self.destroy_cache()
|
445
|
+
self.file_dir.mkdir(parents=True, exist_ok=True)
|
446
|
+
if self.folder is not None:
|
447
|
+
self.folder.mkdir(parents=True, exist_ok=True)
|
448
|
+
|
449
|
+
def destroy_cache(self):
|
450
|
+
"""Completely remove and destroy the cache.
|
451
|
+
|
452
|
+
Nothing that could have been created by this object will be
|
453
|
+
left on disk. After that, this instance isn't usable anymore.
|
454
|
+
|
455
|
+
This function must not be called if the cache directory is shared.
|
456
|
+
|
457
|
+
"""
|
458
|
+
if self.is_shared():
|
459
|
+
raise Exception('You may not destroy a shared cache.')
|
460
|
+
shutil.rmtree(str(self.file_dir))
|
461
|
+
|
462
|
+
def list(self) -> List[storage.FileWithDescription]:
|
463
|
+
"""List the files available in the storage.
|
464
|
+
|
465
|
+
return ([(unicode, unicode)]): a list of pairs, each
|
466
|
+
representing a file in the form (digest, description).
|
467
|
+
|
468
|
+
"""
|
469
|
+
return self.backend.list()
|
470
|
+
|
471
|
+
def check_backend_integrity(self, delete: bool = False) -> bool:
|
472
|
+
"""Check the integrity of the backend.
|
473
|
+
|
474
|
+
Request all the files from the backend. For each of them the
|
475
|
+
digest is recomputed and checked against the one recorded in
|
476
|
+
the backend.
|
477
|
+
|
478
|
+
If mismatches are found, they are reported with ERROR
|
479
|
+
severity. The method returns False if at least a mismatch is
|
480
|
+
found, True otherwise.
|
481
|
+
|
482
|
+
delete (bool): if True, files with wrong digest are deleted.
|
483
|
+
|
484
|
+
"""
|
485
|
+
clean = True
|
486
|
+
for fwd in self.list():
|
487
|
+
digest = fwd.filename
|
488
|
+
d = digester.Digester()
|
489
|
+
with self.backend.get_file(digest) as fobj:
|
490
|
+
buf = fobj.read(self.CHUNK_SIZE)
|
491
|
+
while len(buf) > 0:
|
492
|
+
d.update(buf)
|
493
|
+
buf = fobj.read(self.CHUNK_SIZE)
|
494
|
+
computed_digest = d.digest()
|
495
|
+
if digest != computed_digest:
|
496
|
+
logger.error(
|
497
|
+
'File with hash %s actually has hash %s', digest, computed_digest
|
498
|
+
)
|
499
|
+
if delete:
|
500
|
+
self.delete(digest)
|
501
|
+
clean = False
|
502
|
+
|
503
|
+
return clean
|
@@ -0,0 +1,35 @@
|
|
1
|
+
import hashlib
|
2
|
+
from typing import IO
|
3
|
+
|
4
|
+
import gevent
|
5
|
+
|
6
|
+
|
7
|
+
class Digester:
|
8
|
+
"""Simple wrapper of hashlib using our preferred hasher."""
|
9
|
+
|
10
|
+
def __init__(self):
|
11
|
+
self._hasher = hashlib.sha1()
|
12
|
+
|
13
|
+
def update(self, b):
|
14
|
+
"""Add the bytes b to the hasher."""
|
15
|
+
self._hasher.update(b)
|
16
|
+
|
17
|
+
def digest(self):
|
18
|
+
"""Return the digest as an hex string."""
|
19
|
+
return self._hasher.digest().hex()
|
20
|
+
|
21
|
+
|
22
|
+
def digest_cooperatively_into_digester(
|
23
|
+
f: IO[bytes], digester: Digester, chunk_size: int = 2**20
|
24
|
+
):
|
25
|
+
buf = f.read(chunk_size)
|
26
|
+
while len(buf) > 0:
|
27
|
+
digester.update(buf)
|
28
|
+
gevent.sleep(0)
|
29
|
+
buf = f.read(chunk_size)
|
30
|
+
|
31
|
+
|
32
|
+
def digest_cooperatively(f: IO[bytes], chunk_size: int = 2**20):
|
33
|
+
d = Digester()
|
34
|
+
digest_cooperatively_into_digester(f, d, chunk_size)
|
35
|
+
return d.digest()
|