rbx.cp 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. rbx/box/cli.py +79 -31
  2. rbx/box/code.py +131 -82
  3. rbx/box/global_package.py +74 -0
  4. rbx/box/package.py +6 -19
  5. rbx/box/remote.py +19 -0
  6. rbx/box/sanitizers/warning_stack.py +3 -3
  7. rbx/box/solutions.py +13 -7
  8. rbx/box/stats.py +10 -0
  9. rbx/box/stresses.py +45 -64
  10. rbx/box/stressing/finder_parser.py +11 -16
  11. rbx/box/tasks.py +33 -22
  12. rbx/box/tooling/boca/scraper.py +1 -1
  13. rbx/grading/caching.py +98 -47
  14. rbx/grading/debug_context.py +31 -0
  15. rbx/grading/grading_context.py +96 -0
  16. rbx/grading/judge/cacher.py +93 -21
  17. rbx/grading/judge/sandbox.py +6 -3
  18. rbx/grading/judge/sandboxes/timeit.py +1 -1
  19. rbx/grading/judge/storage.py +169 -35
  20. rbx/grading/profiling.py +126 -0
  21. rbx/grading/steps.py +44 -16
  22. rbx/grading/steps_with_caching.py +52 -26
  23. rbx/resources/presets/default/contest/.gitignore +2 -0
  24. rbx/resources/presets/default/contest/contest.rbx.yml +14 -1
  25. rbx/resources/presets/default/contest/statement/contest.rbx.tex +25 -86
  26. rbx/resources/presets/default/contest/statement/icpc.sty +322 -0
  27. rbx/resources/presets/default/contest/statement/instructions.tex +40 -0
  28. rbx/resources/presets/default/contest/statement/logo.png +0 -0
  29. rbx/resources/presets/default/contest/statement/template.rbx.tex +45 -36
  30. rbx/resources/presets/default/preset.rbx.yml +2 -2
  31. rbx/resources/presets/default/problem/problem.rbx.yml +12 -8
  32. rbx/resources/presets/default/problem/statement/icpc.sty +322 -0
  33. rbx/resources/presets/default/problem/statement/template.rbx.tex +47 -79
  34. {rbx_cp-0.7.0.dist-info → rbx_cp-0.8.0.dist-info}/METADATA +3 -1
  35. {rbx_cp-0.7.0.dist-info → rbx_cp-0.8.0.dist-info}/RECORD +43 -36
  36. rbx/resources/presets/default/contest/statement/olymp.sty +0 -250
  37. rbx/resources/presets/default/problem/statement/olymp.sty +0 -250
  38. /rbx/resources/presets/default/problem/{gen.cpp → gens/gen.cpp} +0 -0
  39. /rbx/resources/presets/default/problem/{tests → manual_tests}/samples/000.in +0 -0
  40. /rbx/resources/presets/default/problem/{tests → manual_tests}/samples/001.in +0 -0
  41. /rbx/resources/presets/default/problem/{random.py → testplan/random.py} +0 -0
  42. /rbx/resources/presets/default/problem/{random.txt → testplan/random.txt} +0 -0
  43. {rbx_cp-0.7.0.dist-info → rbx_cp-0.8.0.dist-info}/LICENSE +0 -0
  44. {rbx_cp-0.7.0.dist-info → rbx_cp-0.8.0.dist-info}/WHEEL +0 -0
  45. {rbx_cp-0.7.0.dist-info → rbx_cp-0.8.0.dist-info}/entry_points.txt +0 -0
rbx/grading/caching.py CHANGED
@@ -3,13 +3,18 @@ import io
3
3
  import os
4
4
  import pathlib
5
5
  import shelve
6
+ import shutil
7
+ import tempfile
6
8
  from typing import Any, Dict, List, Optional
7
9
 
8
10
  from pydantic import BaseModel
9
11
 
10
12
  from rbx import console
13
+ from rbx.grading import grading_context
14
+ from rbx.grading.judge.cacher import FileCacher
11
15
  from rbx.grading.judge.digester import digest_cooperatively
12
- from rbx.grading.judge.storage import Storage, copyfileobj
16
+ from rbx.grading.judge.storage import copyfileobj
17
+ from rbx.grading.profiling import Profiler
13
18
  from rbx.grading.steps import DigestHolder, GradingArtifacts, GradingLogsHolder
14
19
 
15
20
  VERBOSE = False
@@ -89,11 +94,15 @@ def _build_digest_list(artifacts_list: List[GradingArtifacts]) -> List[DigestHol
89
94
  return digests
90
95
 
91
96
 
92
- def _build_fingerprint_list(artifacts_list: List[GradingArtifacts]) -> List[str]:
97
+ def _build_fingerprint_list(
98
+ artifacts_list: List[GradingArtifacts], cacher: FileCacher
99
+ ) -> List[str]:
93
100
  fingerprints = []
94
101
  for artifacts in artifacts_list:
95
102
  for input in artifacts.inputs:
96
- if input.src is None:
103
+ if input.src is None or not input.hash:
104
+ continue
105
+ if cacher.digest_from_symlink(input.src) is not None:
97
106
  continue
98
107
  with input.src.open('rb') as f:
99
108
  fingerprints.append(digest_cooperatively(f))
@@ -124,9 +133,10 @@ def _build_logs_list(artifacts_list: List[GradingArtifacts]) -> List[GradingLogs
124
133
 
125
134
  def _build_cache_fingerprint(
126
135
  artifacts_list: List[GradingArtifacts],
136
+ cacher: FileCacher,
127
137
  ) -> CacheFingerprint:
128
138
  digests = [digest.value for digest in _build_digest_list(artifacts_list)]
129
- fingerprints = _build_fingerprint_list(artifacts_list)
139
+ fingerprints = _build_fingerprint_list(artifacts_list, cacher)
130
140
  output_fingerprints = _build_output_fingerprint_list(artifacts_list)
131
141
  logs = _build_logs_list(artifacts_list)
132
142
  return CacheFingerprint(
@@ -155,6 +165,7 @@ def _build_cache_input(
155
165
  commands: List[str],
156
166
  artifact_list: List[GradingArtifacts],
157
167
  extra_params: Dict[str, Any],
168
+ cacher: FileCacher,
158
169
  ) -> CacheInput:
159
170
  cloned_artifact_list = [
160
171
  artifacts.model_copy(deep=True) for artifacts in artifact_list
@@ -164,6 +175,15 @@ def _build_cache_input(
164
175
  # part of the cache key.
165
176
  artifacts.logs = None
166
177
 
178
+ for input in artifacts.inputs:
179
+ if input.src is None:
180
+ continue
181
+ inferred_digest = cacher.digest_from_symlink(input.src)
182
+ if inferred_digest is not None:
183
+ # Consume cache from digest instead of file.
184
+ input.digest = DigestHolder(value=inferred_digest)
185
+ input.src = None
186
+
167
187
  for output in artifacts.outputs:
168
188
  if output.hash:
169
189
  # Cleanup dest field from hash artifacts
@@ -185,7 +205,7 @@ def _build_cache_key(input: CacheInput) -> str:
185
205
  return digest_cooperatively(fobj)
186
206
 
187
207
 
188
- def _copy_hashed_files(artifact_list: List[GradingArtifacts], storage: Storage):
208
+ def _copy_hashed_files(artifact_list: List[GradingArtifacts], cacher: FileCacher):
189
209
  for artifact in artifact_list:
190
210
  for output in artifact.outputs:
191
211
  if not output.hash or output.dest is None:
@@ -194,19 +214,27 @@ def _copy_hashed_files(artifact_list: List[GradingArtifacts], storage: Storage):
194
214
  if output.optional and output.digest.value is None:
195
215
  continue
196
216
  assert output.digest.value is not None
197
- with storage.get_file(output.digest.value) as fobj:
198
- with output.dest.open('wb') as f:
199
- copyfileobj(fobj, f, maxlen=output.maxlen)
217
+ if (
218
+ path_to_symlink := cacher.path_for_symlink(output.digest.value)
219
+ ) is not None:
220
+ # Use a symlink to the file in the persistent cache, if available.
221
+ output.dest.unlink(missing_ok=True)
222
+ output.dest.symlink_to(path_to_symlink)
223
+ else:
224
+ # Otherwise, copy it.
225
+ with cacher.get_file(output.digest.value) as fobj:
226
+ with output.dest.open('wb') as f:
227
+ copyfileobj(fobj, f, maxlen=output.maxlen)
200
228
  if output.executable:
201
229
  output.dest.chmod(0o755)
202
230
 
203
231
 
204
- def is_artifact_ok(artifact: GradingArtifacts, storage: Storage) -> bool:
232
+ def is_artifact_ok(artifact: GradingArtifacts, cacher: FileCacher) -> bool:
205
233
  for output in artifact.outputs:
206
234
  if output.optional or output.intermediate:
207
235
  continue
208
236
  if output.digest is not None:
209
- if output.digest.value is None or not storage.exists(output.digest.value):
237
+ if output.digest.value is None or not cacher.exists(output.digest.value):
210
238
  return False
211
239
  return True
212
240
  assert output.dest is not None
@@ -219,9 +247,9 @@ def is_artifact_ok(artifact: GradingArtifacts, storage: Storage) -> bool:
219
247
  return True
220
248
 
221
249
 
222
- def are_artifacts_ok(artifacts: List[GradingArtifacts], storage: Storage) -> bool:
250
+ def are_artifacts_ok(artifacts: List[GradingArtifacts], cacher: FileCacher) -> bool:
223
251
  for artifact in artifacts:
224
- if not is_artifact_ok(artifact, storage):
252
+ if not is_artifact_ok(artifact, cacher):
225
253
  return False
226
254
  return True
227
255
 
@@ -244,53 +272,70 @@ class DependencyCacheBlock:
244
272
  self._key = None
245
273
 
246
274
  def __enter__(self):
247
- input = _build_cache_input(
248
- commands=self.commands,
249
- artifact_list=self.artifact_list,
250
- extra_params=self.extra_params,
251
- )
252
- if VERBOSE:
253
- console.console.log(f'Cache input is: {input}')
254
- self._key = _build_cache_key(input)
255
- if VERBOSE:
256
- console.console.log(f'Cache key is: {self._key}')
257
- found = self.cache.find_in_cache(
258
- self.commands, self.artifact_list, self.extra_params, key=self._key
259
- )
260
- return found
261
-
262
- def __exit__(self, exc_type, exc_val, exc_tb):
263
- if exc_type is None:
264
- self.cache.store_in_cache(
275
+ with Profiler('enter_in_cache'):
276
+ if grading_context.is_no_cache():
277
+ return False
278
+ input = _build_cache_input(
279
+ commands=self.commands,
280
+ artifact_list=self.artifact_list,
281
+ extra_params=self.extra_params,
282
+ cacher=self.cache.cacher,
283
+ )
284
+ if VERBOSE:
285
+ console.console.log(f'Cache input is: {input}')
286
+ self._key = _build_cache_key(input)
287
+ if VERBOSE:
288
+ console.console.log(f'Cache key is: {self._key}')
289
+ found = self.cache.find_in_cache(
265
290
  self.commands, self.artifact_list, self.extra_params, key=self._key
266
291
  )
267
- if exc_type is NoCacheException:
268
- return True
269
- return None
292
+ return found
293
+
294
+ def __exit__(self, exc_type, exc_val, exc_tb):
295
+ with Profiler('exit_in_cache'):
296
+ if grading_context.is_no_cache():
297
+ return True if exc_type is NoCacheException else None
298
+ if exc_type is None:
299
+ self.cache.store_in_cache(
300
+ self.commands, self.artifact_list, self.extra_params, key=self._key
301
+ )
302
+ if exc_type is NoCacheException:
303
+ return True
304
+ return None
270
305
 
271
306
 
272
307
  class DependencyCache:
273
308
  root: pathlib.Path
274
- storage: Storage
309
+ cacher: FileCacher
275
310
 
276
- def __init__(self, root: pathlib.Path, storage: Storage):
311
+ def __init__(self, root: pathlib.Path, cacher: FileCacher):
277
312
  self.root = root
278
- self.storage = storage
313
+ self.cacher = cacher
279
314
  self.db = shelve.open(self._cache_name())
315
+ tmp_dir = pathlib.Path(tempfile.mkdtemp())
316
+ self.transient_db = shelve.open(tmp_dir / '.cache_db')
280
317
  atexit.register(lambda: self.db.close())
318
+ atexit.register(lambda: self.transient_db.close())
319
+ atexit.register(lambda: shutil.rmtree(tmp_dir))
281
320
 
282
321
  def _cache_name(self) -> str:
283
322
  return str(self.root / '.cache_db')
284
323
 
324
+ def get_db(self) -> shelve.Shelf:
325
+ if grading_context.is_transient():
326
+ return self.transient_db
327
+ return self.db
328
+
285
329
  def _find_in_cache(self, key: str) -> Optional[CacheFingerprint]:
286
- return self.db.get(key)
330
+ return self.get_db().get(key)
287
331
 
288
332
  def _store_in_cache(self, key: str, fingerprint: CacheFingerprint):
289
- self.db[key] = fingerprint
333
+ self.get_db()[key] = fingerprint
290
334
 
291
335
  def _evict_from_cache(self, key: str):
292
- if key in self.db:
293
- del self.db[key]
336
+ db = self.get_db()
337
+ if key in db:
338
+ del db[key]
294
339
 
295
340
  def __call__(
296
341
  self,
@@ -309,7 +354,10 @@ class DependencyCache:
309
354
  key: Optional[str] = None,
310
355
  ) -> bool:
311
356
  input = _build_cache_input(
312
- commands=commands, artifact_list=artifact_list, extra_params=extra_params
357
+ commands=commands,
358
+ artifact_list=artifact_list,
359
+ extra_params=extra_params,
360
+ cacher=self.cacher,
313
361
  )
314
362
  key = key or _build_cache_key(input)
315
363
 
@@ -317,7 +365,7 @@ class DependencyCache:
317
365
  if fingerprint is None:
318
366
  return False
319
367
 
320
- reference_fingerprint = _build_cache_fingerprint(artifact_list)
368
+ reference_fingerprint = _build_cache_fingerprint(artifact_list, self.cacher)
321
369
 
322
370
  if not _fingerprints_match(fingerprint, reference_fingerprint):
323
371
  self._evict_from_cache(key)
@@ -334,7 +382,7 @@ class DependencyCache:
334
382
  for digest, reference_digest in zip(fingerprint.digests, reference_digests):
335
383
  reference_digest.value = digest
336
384
 
337
- if not are_artifacts_ok(artifact_list, self.storage):
385
+ if not are_artifacts_ok(artifact_list, self.cacher):
338
386
  # Rollback digest changes.
339
387
  for old_digest_value, reference_digest in zip(
340
388
  old_digest_values, reference_digests
@@ -344,7 +392,7 @@ class DependencyCache:
344
392
  return False
345
393
 
346
394
  # Copy hashed files to file system.
347
- _copy_hashed_files(artifact_list, self.storage)
395
+ _copy_hashed_files(artifact_list, self.cacher)
348
396
 
349
397
  # Apply logs changes.
350
398
  for logs, reference_logs in zip(fingerprint.logs, reference_fingerprint.logs):
@@ -366,11 +414,14 @@ class DependencyCache:
366
414
  key: Optional[str] = None,
367
415
  ):
368
416
  input = _build_cache_input(
369
- commands=commands, artifact_list=artifact_list, extra_params=extra_params
417
+ commands=commands,
418
+ artifact_list=artifact_list,
419
+ extra_params=extra_params,
420
+ cacher=self.cacher,
370
421
  )
371
422
  key = key or _build_cache_key(input)
372
423
 
373
- if not are_artifacts_ok(artifact_list, self.storage):
424
+ if not are_artifacts_ok(artifact_list, self.cacher):
374
425
  return
375
426
 
376
- self._store_in_cache(key, _build_cache_fingerprint(artifact_list))
427
+ self._store_in_cache(key, _build_cache_fingerprint(artifact_list, self.cacher))
@@ -0,0 +1,31 @@
1
+ import contextvars
2
+ import dataclasses
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class DebugContext:
8
+ enable: bool = False
9
+
10
+
11
+ debug_var = contextvars.ContextVar('debug', default=DebugContext())
12
+
13
+
14
+ def get_debug_context() -> DebugContext:
15
+ return debug_var.get()
16
+
17
+
18
+ class Debug:
19
+ def __init__(self, *args, **kwargs):
20
+ self.args = args
21
+ self.kwargs = kwargs
22
+ self.token = None
23
+
24
+ def __enter__(self):
25
+ self.token = debug_var.set(
26
+ dataclasses.replace(debug_var.get(), *self.args, **self.kwargs)
27
+ )
28
+
29
+ def __exit__(self, exc_type, exc_value, traceback):
30
+ if self.token is not None:
31
+ debug_var.reset(self.token)
@@ -0,0 +1,96 @@
1
+ import contextvars
2
+ from enum import Enum
3
+ from typing import Callable, Optional, Union
4
+
5
+ Condition = Union[bool, Callable[[], bool]]
6
+
7
+
8
+ class ConditionedContext:
9
+ def __init__(self, when: Condition = True):
10
+ self.when = when
11
+
12
+ def should_enter(self) -> bool:
13
+ if isinstance(self.when, bool):
14
+ return self.when
15
+ return self.when()
16
+
17
+
18
+ class CacheLevel(Enum):
19
+ NO_CACHE = 0
20
+ CACHE_TRANSIENTLY = 1
21
+ CACHE_COMPILATION = 2
22
+ CACHE_ALL = 3
23
+
24
+
25
+ cache_level_var = contextvars.ContextVar('cache_level', default=CacheLevel.CACHE_ALL)
26
+
27
+
28
+ def is_compilation_only() -> bool:
29
+ return cache_level_var.get() == CacheLevel.CACHE_COMPILATION
30
+
31
+
32
+ def is_transient() -> bool:
33
+ return cache_level_var.get().value <= CacheLevel.CACHE_TRANSIENTLY.value
34
+
35
+
36
+ def is_no_cache() -> bool:
37
+ return cache_level_var.get().value <= CacheLevel.NO_CACHE.value
38
+
39
+
40
+ class cache_level(ConditionedContext):
41
+ def __init__(self, level: CacheLevel, when: Condition = True):
42
+ super().__init__(when)
43
+ self.level = level
44
+ self.token = None
45
+
46
+ def __enter__(self):
47
+ if self.should_enter():
48
+ self.token = cache_level_var.set(self.level)
49
+ return self
50
+
51
+ def __exit__(self, exc_type, exc_val, exc_tb):
52
+ if self.token is not None:
53
+ cache_level_var.reset(self.token)
54
+ return None
55
+
56
+
57
+ compression_level_var = contextvars.ContextVar('compression_level', default=5)
58
+ use_compression_var = contextvars.ContextVar('use_compression', default=False)
59
+
60
+
61
+ def get_compression_level() -> int:
62
+ return compression_level_var.get()
63
+
64
+
65
+ def should_compress() -> bool:
66
+ return use_compression_var.get()
67
+
68
+
69
+ class compression(ConditionedContext):
70
+ def __init__(
71
+ self,
72
+ level: Optional[int] = None,
73
+ use_compression: Optional[bool] = None,
74
+ when: Condition = True,
75
+ ):
76
+ super().__init__(when)
77
+ self.level = level
78
+ self.use_compression = use_compression
79
+ self.level_token = None
80
+ self.use_compression_token = None
81
+
82
+ def __enter__(self):
83
+ if not self.should_enter():
84
+ return self
85
+ if self.level is not None:
86
+ self.level_token = compression_level_var.set(self.level)
87
+ if self.use_compression is not None:
88
+ self.use_compression_token = use_compression_var.set(self.use_compression)
89
+ return self
90
+
91
+ def __exit__(self, exc_type, exc_val, exc_tb):
92
+ if self.level_token is not None:
93
+ compression_level_var.reset(self.level_token)
94
+ if self.use_compression_token is not None:
95
+ use_compression_var.reset(self.use_compression_token)
96
+ return None
@@ -7,8 +7,11 @@ import pathlib
7
7
  import shutil
8
8
  import tempfile
9
9
  import typing
10
- from typing import IO, List, Optional
10
+ from typing import IO, Dict, List, Optional, Type
11
11
 
12
+ from pydantic import BaseModel
13
+
14
+ from rbx.grading import grading_context
12
15
  from rbx.grading.judge import digester, storage
13
16
 
14
17
  logger = logging.getLogger(__name__)
@@ -60,6 +63,7 @@ class FileCacher:
60
63
  self.backend = backend
61
64
  self.shared = shared
62
65
  self.folder = folder
66
+ self.existing = set()
63
67
 
64
68
  # First we create the config directories.
65
69
  if folder:
@@ -146,6 +150,11 @@ class FileCacher:
146
150
 
147
151
  logger.debug('File %s not in cache, downloading ' 'from database.', digest)
148
152
 
153
+ if (symlink := self.backend.path_for_symlink(digest)) is not None:
154
+ cache_file_path.unlink(missing_ok=True)
155
+ cache_file_path.symlink_to(symlink)
156
+ return cache_file_path.open('rb') if not cache_only else None
157
+
149
158
  ftmp_handle, temp_file_path = tempfile.mkstemp(dir=self.temp_dir, text=False)
150
159
  temp_file_path = pathlib.Path(temp_file_path)
151
160
  with open(ftmp_handle, 'wb') as ftmp, self.backend.get_file(digest) as fobj:
@@ -168,6 +177,22 @@ class FileCacher:
168
177
  if not cache_only:
169
178
  return fd
170
179
 
180
+ def exists(self, digest: str, cache_only: bool = False) -> bool:
181
+ """Check if a file exists in the cacher.
182
+
183
+ cache_only (bool): don't check the backend.
184
+
185
+ """
186
+ cache_file_path = self.file_dir / digest
187
+ if cache_file_path.exists() or digest in self.existing:
188
+ return True
189
+ if cache_only:
190
+ return False
191
+ exists = self.backend.exists(digest)
192
+ if exists:
193
+ self.existing.add(digest)
194
+ return exists
195
+
171
196
  def cache_file(self, digest: str):
172
197
  """Load a file into the cache.
173
198
 
@@ -219,9 +244,18 @@ class FileCacher:
219
244
  if digest == storage.TOMBSTONE:
220
245
  raise TombstoneError()
221
246
 
247
+ if grading_context.is_transient():
248
+ return None
249
+
222
250
  logger.debug('Getting symlink file path %s.', digest)
223
251
  return self.backend.path_for_symlink(digest)
224
252
 
253
+ def digest_from_symlink(self, link: pathlib.Path) -> Optional[str]:
254
+ if grading_context.is_transient():
255
+ return None
256
+
257
+ return self.backend.filename_from_symlink(link)
258
+
225
259
  def get_file_content(self, digest: str) -> bytes:
226
260
  """Retrieve a file from the storage.
227
261
 
@@ -280,7 +314,9 @@ class FileCacher:
280
314
  with dst_path.open('wb') as dst:
281
315
  storage.copyfileobj(src, dst, self.CHUNK_SIZE)
282
316
 
283
- def put_file_from_fobj(self, src: IO[bytes], desc: str = '') -> str:
317
+ def put_file_from_fobj(
318
+ self, src: IO[bytes], metadata: Optional[Dict[str, BaseModel]] = None
319
+ ) -> str:
284
320
  """Store a file in the storage.
285
321
 
286
322
  If it's already (for some reason...) in the cache send that
@@ -292,7 +328,7 @@ class FileCacher:
292
328
 
293
329
  src (fileobj): a readable binary file-like object from which
294
330
  to read the contents of the file.
295
- desc (unicode): the (optional) description to associate to the
331
+ metadata (Dict[str, BaseModel]): the (optional) metadata to associate to the
296
332
  file.
297
333
 
298
334
  return (unicode): the digest of the stored file.
@@ -334,36 +370,45 @@ class FileCacher:
334
370
  # We read from the temporary file before moving it to
335
371
  # cache_file_path because the latter might be deleted before
336
372
  # we get a chance to open it.
337
- with open(dst.name, 'rb') as src:
338
- pending_file = self.backend.create_file(digest)
339
- if pending_file is not None:
340
- storage.copyfileobj(src, pending_file.fd, self.CHUNK_SIZE)
341
- self.backend.commit_file(pending_file, desc)
373
+ #
374
+ # Only store file when not in transient mode.
375
+ if not grading_context.is_transient():
376
+ with open(dst.name, 'rb') as src:
377
+ pending_file = self.backend.create_file(digest)
378
+ if pending_file is not None:
379
+ storage.copyfileobj(src, pending_file.fd, self.CHUNK_SIZE)
380
+ self.backend.commit_file(pending_file, metadata)
342
381
 
343
382
  os.rename(dst.name, cache_file_path)
344
383
 
345
384
  return digest
346
385
 
347
- def put_file_content(self, content: bytes, desc: str = '') -> str:
386
+ def put_file_content(
387
+ self, content: bytes, metadata: Optional[Dict[str, BaseModel]] = None
388
+ ) -> str:
348
389
  """Store a file in the storage.
349
390
 
350
391
  See `put_file_from_fobj'. This method will read the content of
351
392
  the file from the given binary string.
352
393
 
353
394
  content (bytes): the content of the file to store.
354
- desc (unicode): the (optional) description to associate to the
395
+ metadata (Dict[str, BaseModel]): the (optional) metadata to associate to the
355
396
  file.
356
397
 
357
398
  return (unicode): the digest of the stored file.
358
399
 
359
400
  """
360
401
  with io.BytesIO(content) as src:
361
- return self.put_file_from_fobj(src, desc)
402
+ return self.put_file_from_fobj(src, metadata)
362
403
 
363
- def put_file_text(self, text: str, desc: str = '') -> str:
364
- return self.put_file_content(text.encode('utf-8'), desc)
404
+ def put_file_text(
405
+ self, text: str, metadata: Optional[Dict[str, BaseModel]] = None
406
+ ) -> str:
407
+ return self.put_file_content(text.encode('utf-8'), metadata)
365
408
 
366
- def put_file_from_path(self, src_path: pathlib.Path, desc: str = '') -> str:
409
+ def put_file_from_path(
410
+ self, src_path: pathlib.Path, metadata: Optional[Dict[str, BaseModel]] = None
411
+ ) -> str:
367
412
  """Store a file in the storage.
368
413
 
369
414
  See `put_file_from_fobj'. This method will read the content of
@@ -371,28 +416,53 @@ class FileCacher:
371
416
 
372
417
  src_path (Path): an accessible location on the file-system
373
418
  from which to read the contents of the file.
374
- desc (unicode): the (optional) description to associate to the
419
+ metadata (Dict[str, BaseModel]): the (optional) metadata to associate to the
375
420
  file.
376
421
 
377
422
  return (unicode): the digest of the stored file.
378
423
 
379
424
  """
380
425
  with src_path.open('rb') as src:
381
- return self.put_file_from_fobj(src, desc)
426
+ return self.put_file_from_fobj(src, metadata)
427
+
428
+ def set_metadata(self, digest: str, key: str, value: Optional[BaseModel]):
429
+ """Set the description of a file given its digest.
430
+
431
+ digest (unicode): the digest of the file to add the description.
432
+ key (str): the key of the metadata to add.
433
+ value (BaseModel): the value of the metadata to add.
434
+ """
435
+ if grading_context.is_transient():
436
+ return
437
+ self.backend.set_metadata(digest, key, value)
382
438
 
383
- def describe(self, digest: str) -> str:
439
+ def get_metadata(
440
+ self, digest: str, key: str, model_cls: Type[storage.BaseModelT]
441
+ ) -> Optional[storage.BaseModelT]:
384
442
  """Return the description of a file given its digest.
385
443
 
386
444
  digest (unicode): the digest of the file to describe.
387
-
388
- return (unicode): the description of the file.
445
+ key (str): the key of the metadata to get.
446
+ model_cls (Type[storage.BaseModelT]): the model class of the metadata.
447
+ return (BaseModel): the metadata of the file.
389
448
 
390
449
  raise (KeyError): if the file cannot be found.
391
450
 
392
451
  """
393
452
  if digest == storage.TOMBSTONE:
394
453
  raise TombstoneError()
395
- return self.backend.describe(digest)
454
+ return typing.cast(
455
+ Optional[storage.BaseModelT],
456
+ self.backend.get_metadata(digest, key, model_cls),
457
+ )
458
+
459
+ def list_metadata(self, filename: str) -> List[str]:
460
+ """List the metadata of a file given its filename.
461
+
462
+ filename (str): the filename of the file to list the metadata.
463
+ return (List[str]): the list of metadata keys.
464
+ """
465
+ return self.backend.list_metadata(filename)
396
466
 
397
467
  def get_size(self, digest: str) -> int:
398
468
  """Return the size of a file given its digest.
@@ -431,6 +501,7 @@ class FileCacher:
431
501
  return
432
502
  cache_file_path: pathlib.Path = self.file_dir / digest
433
503
  cache_file_path.unlink(missing_ok=True)
504
+ self.existing.discard(digest)
434
505
 
435
506
  def purge_cache(self):
436
507
  """Empty the local cache.
@@ -442,6 +513,7 @@ class FileCacher:
442
513
  self.file_dir.mkdir(parents=True, exist_ok=True)
443
514
  if self.folder is not None:
444
515
  self.folder.mkdir(parents=True, exist_ok=True)
516
+ self.existing.clear()
445
517
 
446
518
  def destroy_cache(self):
447
519
  """Completely remove and destroy the cache.
@@ -456,7 +528,7 @@ class FileCacher:
456
528
  raise Exception('You may not destroy a shared cache.')
457
529
  shutil.rmtree(str(self.file_dir))
458
530
 
459
- def list(self) -> List[storage.FileWithDescription]:
531
+ def list(self) -> List[storage.FileWithMetadata]:
460
532
  """List the files available in the storage.
461
533
 
462
534
  return ([(unicode, unicode)]): a list of pairs, each
@@ -647,12 +647,15 @@ class SandboxBase(abc.ABC):
647
647
  return self.get_file_to_bytes(path, maxlen).decode('utf-8')
648
648
 
649
649
  def get_file_to_storage(
650
- self, path: pathlib.Path, description: str = '', trunc_len: Optional[int] = None
650
+ self,
651
+ path: pathlib.Path,
652
+ metadata: Optional[Dict[str, pydantic.BaseModel]] = None,
653
+ trunc_len: Optional[int] = None,
651
654
  ) -> str:
652
655
  """Put a sandbox file in FS and return its digest.
653
656
 
654
657
  path (Path): relative path of the file inside the sandbox.
655
- description (str): the description for FS.
658
+ metadata (Dict[str, pydantic.BaseModel]): the metadata for FS.
656
659
  trunc_len (int|None): if None, does nothing; otherwise, before
657
660
  returning truncate it at the specified length.
658
661
 
@@ -660,7 +663,7 @@ class SandboxBase(abc.ABC):
660
663
 
661
664
  """
662
665
  with self.get_file(path, trunc_len=trunc_len) as file_:
663
- return self.file_cacher.put_file_from_fobj(file_, description)
666
+ return self.file_cacher.put_file_from_fobj(file_, metadata)
664
667
 
665
668
  def stat_file(self, path: pathlib.Path) -> os.stat_result:
666
669
  """Return the stats of a file in the sandbox.
@@ -100,9 +100,9 @@ def create_tee(files, mode, buffer_size=4096, prefix=''):
100
100
  tee.file.write(tee.prefix)
101
101
  tee.file.write(bytes)
102
102
  tee.file.flush()
103
- new = bytes == b'\n'
104
103
  # TODO maybe add in fsync() here if the fileno() method
105
104
  # exists on file
105
+ new = bytes == b'\n'
106
106
  except Exception:
107
107
  pass
108
108
  finally: