rbx.cp 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. rbx/__init__.py +0 -0
  2. rbx/annotations.py +127 -0
  3. rbx/autoenum.py +333 -0
  4. rbx/box/__init__.py +0 -0
  5. rbx/box/builder.py +77 -0
  6. rbx/box/cd.py +37 -0
  7. rbx/box/checkers.py +134 -0
  8. rbx/box/code.py +185 -0
  9. rbx/box/compile.py +56 -0
  10. rbx/box/conftest.py +42 -0
  11. rbx/box/contest/__init__.py +0 -0
  12. rbx/box/contest/build_contest_statements.py +347 -0
  13. rbx/box/contest/contest_package.py +76 -0
  14. rbx/box/contest/contest_utils.py +20 -0
  15. rbx/box/contest/main.py +179 -0
  16. rbx/box/contest/schema.py +155 -0
  17. rbx/box/contest/statements.py +82 -0
  18. rbx/box/creation.py +72 -0
  19. rbx/box/download.py +64 -0
  20. rbx/box/environment.py +345 -0
  21. rbx/box/extensions.py +26 -0
  22. rbx/box/generators.py +478 -0
  23. rbx/box/generators_test.py +63 -0
  24. rbx/box/main.py +449 -0
  25. rbx/box/package.py +316 -0
  26. rbx/box/packaging/boca/extension.py +27 -0
  27. rbx/box/packaging/boca/packager.py +245 -0
  28. rbx/box/packaging/contest_main.py +82 -0
  29. rbx/box/packaging/main.py +68 -0
  30. rbx/box/packaging/packager.py +117 -0
  31. rbx/box/packaging/polygon/packager.py +320 -0
  32. rbx/box/packaging/polygon/test.py +81 -0
  33. rbx/box/packaging/polygon/xml_schema.py +106 -0
  34. rbx/box/presets/__init__.py +503 -0
  35. rbx/box/presets/fetch.py +70 -0
  36. rbx/box/presets/lock_schema.py +20 -0
  37. rbx/box/presets/schema.py +59 -0
  38. rbx/box/schema.py +394 -0
  39. rbx/box/solutions.py +792 -0
  40. rbx/box/solutions_test.py +41 -0
  41. rbx/box/statements/__init__.py +0 -0
  42. rbx/box/statements/build_statements.py +359 -0
  43. rbx/box/statements/builders.py +375 -0
  44. rbx/box/statements/joiners.py +113 -0
  45. rbx/box/statements/latex.py +47 -0
  46. rbx/box/statements/latex_jinja.py +214 -0
  47. rbx/box/statements/schema.py +138 -0
  48. rbx/box/stresses.py +292 -0
  49. rbx/box/stressing/__init__.py +0 -0
  50. rbx/box/stressing/finder_parser.py +359 -0
  51. rbx/box/stressing/generator_parser.py +258 -0
  52. rbx/box/testcases.py +54 -0
  53. rbx/box/ui/__init__.py +0 -0
  54. rbx/box/ui/captured_log.py +372 -0
  55. rbx/box/ui/css/app.tcss +48 -0
  56. rbx/box/ui/main.py +38 -0
  57. rbx/box/ui/run.py +209 -0
  58. rbx/box/validators.py +245 -0
  59. rbx/box/validators_test.py +15 -0
  60. rbx/checker.py +128 -0
  61. rbx/clone.py +197 -0
  62. rbx/config.py +271 -0
  63. rbx/conftest.py +38 -0
  64. rbx/console.py +27 -0
  65. rbx/create.py +37 -0
  66. rbx/edit.py +24 -0
  67. rbx/grading/__init__.py +0 -0
  68. rbx/grading/caching.py +356 -0
  69. rbx/grading/conftest.py +33 -0
  70. rbx/grading/judge/__init__.py +0 -0
  71. rbx/grading/judge/cacher.py +503 -0
  72. rbx/grading/judge/digester.py +35 -0
  73. rbx/grading/judge/sandbox.py +748 -0
  74. rbx/grading/judge/sandboxes/__init__.py +0 -0
  75. rbx/grading/judge/sandboxes/isolate.py +683 -0
  76. rbx/grading/judge/sandboxes/stupid_sandbox.py +310 -0
  77. rbx/grading/judge/sandboxes/timeit.py +217 -0
  78. rbx/grading/judge/storage.py +284 -0
  79. rbx/grading/judge/test.py +38 -0
  80. rbx/grading/judge/testiso.py +54 -0
  81. rbx/grading/steps.py +522 -0
  82. rbx/grading/steps_with_caching.py +59 -0
  83. rbx/grading/steps_with_caching_run_test.py +429 -0
  84. rbx/grading_utils.py +148 -0
  85. rbx/hydration.py +101 -0
  86. rbx/main.py +122 -0
  87. rbx/metadata.py +105 -0
  88. rbx/providers/__init__.py +43 -0
  89. rbx/providers/codeforces.py +73 -0
  90. rbx/providers/provider.py +26 -0
  91. rbx/resources/checkers/boilerplate.cpp +20 -0
  92. rbx/resources/default_config.json +48 -0
  93. rbx/resources/envs/default.rbx.yml +37 -0
  94. rbx/resources/envs/isolate.rbx.yml +37 -0
  95. rbx/resources/packagers/boca/checker.sh +43 -0
  96. rbx/resources/packagers/boca/compare +53 -0
  97. rbx/resources/packagers/boca/compile/c +172 -0
  98. rbx/resources/packagers/boca/compile/cc +173 -0
  99. rbx/resources/packagers/boca/compile/cpp +172 -0
  100. rbx/resources/packagers/boca/compile/java +194 -0
  101. rbx/resources/packagers/boca/compile/kt +155 -0
  102. rbx/resources/packagers/boca/compile/pas +172 -0
  103. rbx/resources/packagers/boca/compile/py2 +173 -0
  104. rbx/resources/packagers/boca/compile/py3 +173 -0
  105. rbx/resources/packagers/boca/run/c +128 -0
  106. rbx/resources/packagers/boca/run/cc +128 -0
  107. rbx/resources/packagers/boca/run/cpp +128 -0
  108. rbx/resources/packagers/boca/run/java +194 -0
  109. rbx/resources/packagers/boca/run/kt +159 -0
  110. rbx/resources/packagers/boca/run/py2 +166 -0
  111. rbx/resources/packagers/boca/run/py3 +166 -0
  112. rbx/resources/presets/default/contest/contest.rbx.yml +14 -0
  113. rbx/resources/presets/default/contest/statement/contest.rbx.tex +97 -0
  114. rbx/resources/presets/default/contest/statement/olymp.sty +250 -0
  115. rbx/resources/presets/default/contest/statement/template.rbx.tex +42 -0
  116. rbx/resources/presets/default/preset.rbx.yml +12 -0
  117. rbx/resources/presets/default/problem/.gitignore +6 -0
  118. rbx/resources/presets/default/problem/gen.cpp +9 -0
  119. rbx/resources/presets/default/problem/problem.rbx.yml +44 -0
  120. rbx/resources/presets/default/problem/random.py +3 -0
  121. rbx/resources/presets/default/problem/random.txt +2 -0
  122. rbx/resources/presets/default/problem/sols/main.cpp +9 -0
  123. rbx/resources/presets/default/problem/sols/slow.cpp +15 -0
  124. rbx/resources/presets/default/problem/sols/wa.cpp +9 -0
  125. rbx/resources/presets/default/problem/statement/olymp.sty +250 -0
  126. rbx/resources/presets/default/problem/statement/projecao.png +0 -0
  127. rbx/resources/presets/default/problem/statement/statement.rbx.tex +18 -0
  128. rbx/resources/presets/default/problem/statement/template.rbx.tex +89 -0
  129. rbx/resources/presets/default/problem/tests/samples/000.in +1 -0
  130. rbx/resources/presets/default/problem/tests/samples/001.in +1 -0
  131. rbx/resources/presets/default/problem/validator.cpp +16 -0
  132. rbx/resources/presets/default/problem/wcmp.cpp +34 -0
  133. rbx/resources/templates/template.cpp +19 -0
  134. rbx/run.py +45 -0
  135. rbx/schema.py +64 -0
  136. rbx/submit.py +61 -0
  137. rbx/submitors/__init__.py +18 -0
  138. rbx/submitors/codeforces.py +120 -0
  139. rbx/submitors/submitor.py +25 -0
  140. rbx/test.py +347 -0
  141. rbx/testcase.py +70 -0
  142. rbx/testcase_rendering.py +79 -0
  143. rbx/testdata/box1/gen1.cpp +7 -0
  144. rbx/testdata/box1/gen2.cpp +9 -0
  145. rbx/testdata/box1/genScript.py +2 -0
  146. rbx/testdata/box1/hard-tle.sol.cpp +26 -0
  147. rbx/testdata/box1/ole.cpp +17 -0
  148. rbx/testdata/box1/problem.rbx.yml +39 -0
  149. rbx/testdata/box1/re.sol.cpp +23 -0
  150. rbx/testdata/box1/sol.cpp +22 -0
  151. rbx/testdata/box1/tests/1.in +1 -0
  152. rbx/testdata/box1/tle-and-incorrect.sol.cpp +33 -0
  153. rbx/testdata/box1/tle.sol.cpp +35 -0
  154. rbx/testdata/box1/validator.cpp +11 -0
  155. rbx/testdata/box1/wa.sol.cpp +22 -0
  156. rbx/testdata/caching/executable.py +1 -0
  157. rbx/testdata/compatible +0 -0
  158. rbx/testing_utils.py +65 -0
  159. rbx/utils.py +162 -0
  160. rbx_cp-0.5.0.dist-info/LICENSE +201 -0
  161. rbx_cp-0.5.0.dist-info/METADATA +89 -0
  162. rbx_cp-0.5.0.dist-info/RECORD +164 -0
  163. rbx_cp-0.5.0.dist-info/WHEEL +4 -0
  164. rbx_cp-0.5.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,503 @@
1
+ import atexit
2
+ import fcntl
3
+ import io
4
+ import logging
5
+ import os
6
+ import pathlib
7
+ import shutil
8
+ import tempfile
9
+ import typing
10
+ from typing import IO, List, Optional
11
+
12
+ import gevent
13
+
14
+ from rbx.grading.judge import digester, storage
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class TombstoneError(RuntimeError):
20
+ """An error that represents the file cacher trying to read
21
+ files that have been deleted from the database.
22
+
23
+ """
24
+
25
+ pass
26
+
27
+
28
+ class FileCacher:
29
+ """This class implement a local cache for files stored as FSObject
30
+ in the database.
31
+
32
+ """
33
+
34
+ # This value is very arbitrary, and in this case we want it to be a
35
+ # one-size-fits-all, since we use it for many conversions. It has
36
+ # been chosen arbitrarily based on performance tests on my machine.
37
+ # A few consideration on the value it could assume follow:
38
+ # - The page size of large objects is LOBLKSIZE, which is BLCKSZ/4
39
+ # (BLCKSZ is the block size of the PostgreSQL database, which is
40
+ # set during pre-build configuration). BLCKSZ is by default 8192,
41
+ # therefore LOBLKSIZE is 2048. See:
42
+ # http://www.postgresql.org/docs/9.0/static/catalog-pg-largeobject.html
43
+ # - The `io' module defines a DEFAULT_BUFFER_SIZE constant, whose
44
+ # value is 8192.
45
+ # CHUNK_SIZE should be a multiple of these values.
46
+ CHUNK_SIZE = 1024 * 1024 # 1 MiB
47
+
48
+ backend: storage.Storage
49
+ shared: bool
50
+ file_dir: pathlib.Path
51
+ temp_dir: pathlib.Path
52
+ folder: Optional[pathlib.Path]
53
+
54
+ def __init__(
55
+ self,
56
+ backend: storage.Storage,
57
+ shared: bool = False,
58
+ folder: Optional[pathlib.Path] = None,
59
+ ):
60
+ """Initialize."""
61
+
62
+ self.backend = backend
63
+ self.shared = shared
64
+ self.folder = folder
65
+
66
+ # First we create the config directories.
67
+ if folder:
68
+ self._create_directory_or_die(folder)
69
+
70
+ if not self.is_shared():
71
+ self.file_dir = pathlib.Path(tempfile.mkdtemp())
72
+ # Delete this directory on exit since it has a random name and
73
+ # won't be used again.
74
+ atexit.register(lambda: shutil.rmtree(str(self.file_dir)))
75
+ else:
76
+ assert folder is not None
77
+ self.file_dir = folder / 'fs-cache-shared'
78
+ self._create_directory_or_die(self.file_dir)
79
+
80
+ # Temp dir must be a subdirectory of file_dir to avoid cross-filesystem
81
+ # moves.
82
+ self.temp_dir = pathlib.Path(
83
+ tempfile.mkdtemp(dir=self.file_dir, prefix='_temp')
84
+ )
85
+ atexit.register(lambda: shutil.rmtree(str(self.temp_dir)))
86
+ # Just to make sure it was created.
87
+
88
+ def is_shared(self) -> bool:
89
+ """Return whether the cache directory is shared with other services."""
90
+ return self.shared
91
+
92
+ @staticmethod
93
+ def _create_directory_or_die(directory: pathlib.Path):
94
+ """Create directory and ensure it exists, or raise a RuntimeError."""
95
+ directory.mkdir(parents=True, exist_ok=True)
96
+
97
+ def precache_lock(self) -> Optional[IO[bytes]]:
98
+ """Lock the (shared) cache for precaching if it is currently unlocked.
99
+
100
+ Locking is optional: Any process can perform normal cache operations
101
+ at any time whether the cache is locked or not.
102
+
103
+ The locking mechanism's only purpose is to avoid wasting resources by
104
+ ensuring that on each machine, only one worker precaches at any time.
105
+
106
+ return (fileobj|None): The lock file if the cache was previously
107
+ unlocked. Closing the file object will release the lock.
108
+ None if the cache was already locked.
109
+
110
+ """
111
+ lock_file = self.file_dir / 'cache_lock'
112
+ fobj = lock_file.open('wb')
113
+ returned = False
114
+ try:
115
+ fcntl.flock(fobj, fcntl.LOCK_EX | fcntl.LOCK_NB)
116
+ except BlockingIOError:
117
+ # This exception is raised only if the errno is EWOULDBLOCK,
118
+ # which means that the file is already locked.
119
+ return None
120
+ else:
121
+ returned = True
122
+ return fobj
123
+ finally:
124
+ if not returned:
125
+ fobj.close()
126
+
127
+ def _load(self, digest: str, cache_only: bool) -> Optional[IO[bytes]]:
128
+ """Load a file into the cache and open it for reading.
129
+
130
+ cache_only (bool): don't open the file for reading.
131
+
132
+ return (fileobj): a readable binary file-like object from which
133
+ to read the contents of the file (None if cache_only is True).
134
+
135
+ raise (KeyError): if the file cannot be found.
136
+
137
+ """
138
+ cache_file_path = self.file_dir / digest
139
+
140
+ if cache_only:
141
+ if cache_file_path.exists():
142
+ return None
143
+ else:
144
+ try:
145
+ return cache_file_path.open('rb')
146
+ except FileNotFoundError:
147
+ pass
148
+
149
+ logger.debug('File %s not in cache, downloading ' 'from database.', digest)
150
+
151
+ ftmp_handle, temp_file_path = tempfile.mkstemp(dir=self.temp_dir, text=False)
152
+ temp_file_path = pathlib.Path(temp_file_path)
153
+ with open(ftmp_handle, 'wb') as ftmp, self.backend.get_file(digest) as fobj:
154
+ storage.copyfileobj(fobj, ftmp, self.CHUNK_SIZE)
155
+
156
+ if not cache_only:
157
+ # We allow anyone to delete files from the cache directory
158
+ # self.file_dir at any time. Hence, cache_file_path might no
159
+ # longer exist an instant after we create it. Opening the
160
+ # temporary file before renaming it circumvents this issue.
161
+ # (Note that the temporary file may not be manually deleted!)
162
+ fd = temp_file_path.open('rb')
163
+
164
+ # Then move it to its real location (this operation is atomic
165
+ # by POSIX requirement)
166
+ temp_file_path.rename(cache_file_path)
167
+
168
+ logger.debug('File %s downloaded.', digest)
169
+
170
+ if not cache_only:
171
+ return fd
172
+
173
+ def cache_file(self, digest: str):
174
+ """Load a file into the cache.
175
+
176
+ Ask the backend to provide the file and store it in the cache for the
177
+ benefit of future accesses, unless the file is already cached.
178
+ Note that the cached file might still be deleted at any time, so it
179
+ cannot be assumed to actually exist after this function completes.
180
+ Always use the get_file* functions to access a file.
181
+
182
+ digest (unicode): the digest of the file to get.
183
+
184
+ raise (KeyError): if the file cannot be found.
185
+ raise (TombstoneError): if the digest is the tombstone
186
+
187
+ """
188
+ if digest == storage.TOMBSTONE:
189
+ raise TombstoneError()
190
+
191
+ self._load(digest, True)
192
+
193
+ def get_file(self, digest: str) -> IO[bytes]:
194
+ """Retrieve a file from the storage.
195
+
196
+ If it's available in the cache use that copy, without querying
197
+ the backend. Otherwise ask the backend to provide it, and store
198
+ it in the cache for the benefit of future accesses.
199
+
200
+ The file is returned as a file-object. Other interfaces are
201
+ available as `get_file_content', `get_file_to_fobj' and `get_
202
+ file_to_path'.
203
+
204
+ digest (unicode): the digest of the file to get.
205
+
206
+ return (fileobj): a readable binary file-like object from which
207
+ to read the contents of the file.
208
+
209
+ raise (KeyError): if the file cannot be found.
210
+ raise (TombstoneError): if the digest is the tombstone
211
+
212
+ """
213
+ if digest == storage.TOMBSTONE:
214
+ raise TombstoneError()
215
+
216
+ logger.debug('Getting file %s.', digest)
217
+
218
+ return typing.cast(IO[bytes], self._load(digest, False))
219
+
220
+ def path_for_symlink(self, digest: str) -> Optional[pathlib.Path]:
221
+ if digest == storage.TOMBSTONE:
222
+ raise TombstoneError()
223
+
224
+ logger.debug('Getting symlink file path %s.', digest)
225
+ return self.backend.path_for_symlink(digest)
226
+
227
+ def get_file_content(self, digest: str) -> bytes:
228
+ """Retrieve a file from the storage.
229
+
230
+ See `get_file'. This method returns the content of the file, as
231
+ a binary string.
232
+
233
+ digest (unicode): the digest of the file to get.
234
+
235
+ return (bytes): the content of the retrieved file.
236
+
237
+ raise (KeyError): if the file cannot be found.
238
+ raise (TombstoneError): if the digest is the tombstone
239
+
240
+ """
241
+ if digest == storage.TOMBSTONE:
242
+ raise TombstoneError()
243
+ with self.get_file(digest) as src:
244
+ return src.read()
245
+
246
+ def get_file_to_fobj(self, digest: str, dst: IO[bytes]):
247
+ """Retrieve a file from the storage.
248
+
249
+ See `get_file'. This method will write the content of the file
250
+ to the given file-object.
251
+
252
+ digest (unicode): the digest of the file to get.
253
+ dst (fileobj): a writable binary file-like object on which to
254
+ write the contents of the file.
255
+
256
+ raise (KeyError): if the file cannot be found.
257
+ raise (TombstoneError): if the digest is the tombstone
258
+
259
+ """
260
+ if digest == storage.TOMBSTONE:
261
+ raise TombstoneError()
262
+ with self.get_file(digest) as src:
263
+ storage.copyfileobj(src, dst, self.CHUNK_SIZE)
264
+
265
+ def get_file_to_path(self, digest: str, dst_path: pathlib.Path):
266
+ """Retrieve a file from the storage.
267
+
268
+ See `get_file'. This method will write the content of a file
269
+ to the given file-system location.
270
+
271
+ digest (unicode): the digest of the file to get.
272
+ dst_path (string): an accessible location on the file-system on
273
+ which to write the contents of the file.
274
+
275
+ raise (KeyError): if the file cannot be found.
276
+
277
+ """
278
+ if digest == storage.TOMBSTONE:
279
+ raise TombstoneError()
280
+ with self.get_file(digest) as src:
281
+ with dst_path.open('wb') as dst:
282
+ storage.copyfileobj(src, dst, self.CHUNK_SIZE)
283
+
284
+ def put_file_from_fobj(self, src: IO[bytes], desc: str = '') -> str:
285
+ """Store a file in the storage.
286
+
287
+ If it's already (for some reason...) in the cache send that
288
+ copy to the backend. Otherwise store it in the file-system
289
+ cache first.
290
+
291
+ The file is obtained from a file-object. Other interfaces are
292
+ available as `put_file_content', `put_file_from_path'.
293
+
294
+ src (fileobj): a readable binary file-like object from which
295
+ to read the contents of the file.
296
+ desc (unicode): the (optional) description to associate to the
297
+ file.
298
+
299
+ return (unicode): the digest of the stored file.
300
+
301
+ """
302
+ logger.debug('Reading input file to store on the database.')
303
+
304
+ # Unfortunately, we have to read the whole file-obj to compute
305
+ # the digest but we take that chance to save it to a temporary
306
+ # path so that we then just need to move it. Hoping that both
307
+ # locations will be on the same filesystem, that should be way
308
+ # faster than reading the whole file-obj again (as it could be
309
+ # compressed or require network communication).
310
+ # XXX We're *almost* reimplementing copyfileobj.
311
+ with tempfile.NamedTemporaryFile(
312
+ 'wb', delete=False, dir=str(self.temp_dir)
313
+ ) as dst:
314
+ d = digester.Digester()
315
+ buf = src.read(self.CHUNK_SIZE)
316
+ while len(buf) > 0:
317
+ d.update(buf)
318
+ while len(buf) > 0:
319
+ written = dst.write(buf)
320
+ # Cooperative yield.
321
+ gevent.sleep(0)
322
+ if written is None:
323
+ break
324
+ buf = buf[written:]
325
+ buf = src.read(self.CHUNK_SIZE)
326
+ digest = d.digest()
327
+ dst.flush()
328
+
329
+ logger.debug('File has digest %s.', digest)
330
+
331
+ cache_file_path = self.file_dir / digest
332
+
333
+ # Store the file in the backend. We do that even if the file
334
+ # was already in the cache
335
+ # because there's a (small) chance that the file got removed
336
+ # from the backend but somehow remained in the cache.
337
+ # We read from the temporary file before moving it to
338
+ # cache_file_path because the latter might be deleted before
339
+ # we get a chance to open it.
340
+ with open(dst.name, 'rb') as src:
341
+ pending_file = self.backend.create_file(digest)
342
+ if pending_file is not None:
343
+ storage.copyfileobj(src, pending_file.fd, self.CHUNK_SIZE)
344
+ self.backend.commit_file(pending_file, desc)
345
+
346
+ os.rename(dst.name, cache_file_path)
347
+
348
+ return digest
349
+
350
+ def put_file_content(self, content: bytes, desc: str = '') -> str:
351
+ """Store a file in the storage.
352
+
353
+ See `put_file_from_fobj'. This method will read the content of
354
+ the file from the given binary string.
355
+
356
+ content (bytes): the content of the file to store.
357
+ desc (unicode): the (optional) description to associate to the
358
+ file.
359
+
360
+ return (unicode): the digest of the stored file.
361
+
362
+ """
363
+ with io.BytesIO(content) as src:
364
+ return self.put_file_from_fobj(src, desc)
365
+
366
+ def put_file_text(self, text: str, desc: str = '') -> str:
367
+ return self.put_file_content(text.encode('utf-8'), desc)
368
+
369
+ def put_file_from_path(self, src_path: pathlib.Path, desc: str = '') -> str:
370
+ """Store a file in the storage.
371
+
372
+ See `put_file_from_fobj'. This method will read the content of
373
+ the file from the given file-system location.
374
+
375
+ src_path (Path): an accessible location on the file-system
376
+ from which to read the contents of the file.
377
+ desc (unicode): the (optional) description to associate to the
378
+ file.
379
+
380
+ return (unicode): the digest of the stored file.
381
+
382
+ """
383
+ with src_path.open('rb') as src:
384
+ return self.put_file_from_fobj(src, desc)
385
+
386
+ def describe(self, digest: str) -> str:
387
+ """Return the description of a file given its digest.
388
+
389
+ digest (unicode): the digest of the file to describe.
390
+
391
+ return (unicode): the description of the file.
392
+
393
+ raise (KeyError): if the file cannot be found.
394
+
395
+ """
396
+ if digest == storage.TOMBSTONE:
397
+ raise TombstoneError()
398
+ return self.backend.describe(digest)
399
+
400
+ def get_size(self, digest: str) -> int:
401
+ """Return the size of a file given its digest.
402
+
403
+ digest (unicode): the digest of the file to calculate the size
404
+ of.
405
+
406
+ return (int): the size of the file, in bytes.
407
+
408
+ raise (KeyError): if the file cannot be found.
409
+ raise (TombstoneError): if the digest is the tombstone
410
+
411
+ """
412
+ if digest == storage.TOMBSTONE:
413
+ raise TombstoneError()
414
+ return self.backend.get_size(digest)
415
+
416
+ def delete(self, digest: str):
417
+ """Delete a file from the backend and the local cache.
418
+
419
+ digest (unicode): the digest of the file to delete.
420
+
421
+ """
422
+ if digest == storage.TOMBSTONE:
423
+ return
424
+ self.drop(digest)
425
+ self.backend.delete(digest)
426
+
427
+ def drop(self, digest):
428
+ """Delete a file only from the local cache.
429
+
430
+ digest (unicode): the file to delete.
431
+
432
+ """
433
+ if digest == storage.TOMBSTONE:
434
+ return
435
+ cache_file_path: pathlib.Path = self.file_dir / digest
436
+ cache_file_path.unlink(missing_ok=True)
437
+
438
+ def purge_cache(self):
439
+ """Empty the local cache.
440
+
441
+ This function must not be called if the cache directory is shared.
442
+
443
+ """
444
+ self.destroy_cache()
445
+ self.file_dir.mkdir(parents=True, exist_ok=True)
446
+ if self.folder is not None:
447
+ self.folder.mkdir(parents=True, exist_ok=True)
448
+
449
+ def destroy_cache(self):
450
+ """Completely remove and destroy the cache.
451
+
452
+ Nothing that could have been created by this object will be
453
+ left on disk. After that, this instance isn't usable anymore.
454
+
455
+ This function must not be called if the cache directory is shared.
456
+
457
+ """
458
+ if self.is_shared():
459
+ raise Exception('You may not destroy a shared cache.')
460
+ shutil.rmtree(str(self.file_dir))
461
+
462
+ def list(self) -> List[storage.FileWithDescription]:
463
+ """List the files available in the storage.
464
+
465
+ return ([(unicode, unicode)]): a list of pairs, each
466
+ representing a file in the form (digest, description).
467
+
468
+ """
469
+ return self.backend.list()
470
+
471
+ def check_backend_integrity(self, delete: bool = False) -> bool:
472
+ """Check the integrity of the backend.
473
+
474
+ Request all the files from the backend. For each of them the
475
+ digest is recomputed and checked against the one recorded in
476
+ the backend.
477
+
478
+ If mismatches are found, they are reported with ERROR
479
+ severity. The method returns False if at least a mismatch is
480
+ found, True otherwise.
481
+
482
+ delete (bool): if True, files with wrong digest are deleted.
483
+
484
+ """
485
+ clean = True
486
+ for fwd in self.list():
487
+ digest = fwd.filename
488
+ d = digester.Digester()
489
+ with self.backend.get_file(digest) as fobj:
490
+ buf = fobj.read(self.CHUNK_SIZE)
491
+ while len(buf) > 0:
492
+ d.update(buf)
493
+ buf = fobj.read(self.CHUNK_SIZE)
494
+ computed_digest = d.digest()
495
+ if digest != computed_digest:
496
+ logger.error(
497
+ 'File with hash %s actually has hash %s', digest, computed_digest
498
+ )
499
+ if delete:
500
+ self.delete(digest)
501
+ clean = False
502
+
503
+ return clean
@@ -0,0 +1,35 @@
1
+ import hashlib
2
+ from typing import IO
3
+
4
+ import gevent
5
+
6
+
7
+ class Digester:
8
+ """Simple wrapper of hashlib using our preferred hasher."""
9
+
10
+ def __init__(self):
11
+ self._hasher = hashlib.sha1()
12
+
13
+ def update(self, b):
14
+ """Add the bytes b to the hasher."""
15
+ self._hasher.update(b)
16
+
17
+ def digest(self):
18
+ """Return the digest as an hex string."""
19
+ return self._hasher.digest().hex()
20
+
21
+
22
+ def digest_cooperatively_into_digester(
23
+ f: IO[bytes], digester: Digester, chunk_size: int = 2**20
24
+ ):
25
+ buf = f.read(chunk_size)
26
+ while len(buf) > 0:
27
+ digester.update(buf)
28
+ gevent.sleep(0)
29
+ buf = f.read(chunk_size)
30
+
31
+
32
+ def digest_cooperatively(f: IO[bytes], chunk_size: int = 2**20):
33
+ d = Digester()
34
+ digest_cooperatively_into_digester(f, d, chunk_size)
35
+ return d.digest()