rbx.cp 0.5.39__py3-none-any.whl → 0.5.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rbx/box/builder.py +6 -6
- rbx/box/checkers.py +105 -26
- rbx/box/cli.py +860 -0
- rbx/box/code.py +199 -84
- rbx/box/contest/statements.py +4 -2
- rbx/box/generators.py +55 -49
- rbx/box/generators_test.py +7 -7
- rbx/box/main.py +1 -852
- rbx/box/package.py +42 -1
- rbx/box/packaging/boca/packager.py +2 -1
- rbx/box/packaging/main.py +24 -7
- rbx/box/packaging/moj/packager.py +164 -0
- rbx/box/retries.py +5 -5
- rbx/box/schema.py +86 -4
- rbx/box/solutions.py +46 -108
- rbx/box/solutions_test.py +5 -6
- rbx/box/statements/build_statements.py +4 -2
- rbx/box/stresses.py +23 -12
- rbx/box/tasks.py +258 -0
- rbx/box/testcase_extractors.py +21 -21
- rbx/box/testcases/main.py +19 -14
- rbx/box/unit.py +116 -0
- rbx/box/validators.py +27 -18
- rbx/box/validators_test.py +3 -3
- rbx/grading/judge/sandbox.py +8 -0
- rbx/grading/judge/sandboxes/stupid_sandbox.py +12 -7
- rbx/grading/judge/sandboxes/timeit.py +8 -2
- rbx/grading/steps.py +76 -2
- rbx/grading/steps_with_caching.py +45 -3
- rbx/grading/steps_with_caching_run_test.py +51 -49
- rbx/resources/packagers/moj/scripts/compare.sh +101 -0
- rbx/test.py +6 -4
- rbx/testdata/interactive/checker.cpp +21 -0
- rbx/testdata/interactive/gen.cpp +11 -0
- rbx/testdata/interactive/interactor.cpp +63 -0
- rbx/testdata/interactive/problem.rbx.yml +40 -0
- rbx/testdata/interactive/sols/af_ac_pe.cpp +75 -0
- rbx/testdata/interactive/sols/af_ac_re.cpp +76 -0
- rbx/testdata/interactive/sols/af_ac_too_many_iter.cpp +72 -0
- rbx/testdata/interactive/sols/af_inf_cout_with_flush.cpp +79 -0
- rbx/testdata/interactive/sols/af_inf_cout_without_flush.cpp +78 -0
- rbx/testdata/interactive/sols/af_ml.cpp +78 -0
- rbx/testdata/interactive/sols/af_tl_after_ans.cpp +74 -0
- rbx/testdata/interactive/sols/af_wa.cpp +74 -0
- rbx/testdata/interactive/sols/interactive-binary-search_mm_naive_cin.cpp +17 -0
- rbx/testdata/interactive/sols/main.cpp +26 -0
- rbx/testdata/interactive/testplan.txt +6 -0
- rbx/testdata/interactive/validator.cpp +16 -0
- {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/METADATA +2 -1
- {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/RECORD +53 -32
- {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/LICENSE +0 -0
- {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/WHEEL +0 -0
- {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/entry_points.txt +0 -0
rbx/box/solutions.py
CHANGED
@@ -17,11 +17,13 @@ from pydantic import BaseModel
|
|
17
17
|
|
18
18
|
from rbx import console, utils
|
19
19
|
from rbx.box import checkers, package
|
20
|
-
from rbx.box.code import
|
20
|
+
from rbx.box.code import (
|
21
|
+
SanitizationLevel,
|
22
|
+
compile_item,
|
23
|
+
find_language_name,
|
24
|
+
)
|
21
25
|
from rbx.box.deferred import Deferred
|
22
26
|
from rbx.box.environment import (
|
23
|
-
EnvironmentSandbox,
|
24
|
-
ExecutionConfig,
|
25
27
|
VerificationLevel,
|
26
28
|
)
|
27
29
|
from rbx.box.formatting import get_formatted_memory, get_formatted_time
|
@@ -31,26 +33,26 @@ from rbx.box.generators import (
|
|
31
33
|
generate_output_for_testcase,
|
32
34
|
generate_standalone,
|
33
35
|
)
|
34
|
-
from rbx.box.retries import Retrier
|
35
36
|
from rbx.box.schema import (
|
36
37
|
ExpectedOutcome,
|
37
38
|
GeneratorCall,
|
38
39
|
Limits,
|
39
40
|
Solution,
|
41
|
+
TaskType,
|
40
42
|
Testcase,
|
41
43
|
TestcaseGroup,
|
42
44
|
)
|
45
|
+
from rbx.box.tasks import (
|
46
|
+
get_limits_for_language,
|
47
|
+
run_solution_on_testcase,
|
48
|
+
)
|
43
49
|
from rbx.box.testcase_extractors import extract_generation_testcases
|
44
50
|
from rbx.box.testcase_utils import TestcaseEntry, find_built_testcases
|
45
51
|
from rbx.grading.steps import (
|
46
|
-
DigestOrDest,
|
47
|
-
DigestOrSource,
|
48
52
|
Evaluation,
|
49
53
|
Outcome,
|
50
|
-
TestcaseIO,
|
51
|
-
TestcaseLog,
|
52
54
|
)
|
53
|
-
from rbx.utils import StatusProgress
|
55
|
+
from rbx.utils import StatusProgress
|
54
56
|
|
55
57
|
StructuredEvaluation = Dict[str, Dict[str, List[Optional[Deferred[Evaluation]]]]]
|
56
58
|
|
@@ -152,102 +154,13 @@ def compile_solutions(
|
|
152
154
|
return compiled_solutions
|
153
155
|
|
154
156
|
|
155
|
-
def get_limits_for_language(
|
156
|
-
lang: Optional[str],
|
157
|
-
verification: VerificationLevel,
|
158
|
-
timelimit_override: Optional[int],
|
159
|
-
) -> Limits:
|
160
|
-
pkg = package.find_problem_package_or_die()
|
161
|
-
time = timelimit_override or pkg.timelimit_for_language(lang)
|
162
|
-
isDoubleTL = verification.value >= VerificationLevel.FULL.value
|
163
|
-
memory = pkg.memorylimit_for_language(lang)
|
164
|
-
return Limits(
|
165
|
-
time=time, memory=memory, output=pkg.outputLimit, isDoubleTL=isDoubleTL
|
166
|
-
)
|
167
|
-
|
168
|
-
|
169
|
-
def _run_solution_on_testcase(
|
170
|
-
solution: Solution,
|
171
|
-
compiled_digest: str,
|
172
|
-
checker_digest: Optional[str],
|
173
|
-
testcase: Testcase,
|
174
|
-
output_dir: pathlib.Path,
|
175
|
-
testcase_index: int = 0,
|
176
|
-
verification: VerificationLevel = VerificationLevel.NONE,
|
177
|
-
timelimit_override: Optional[int] = None,
|
178
|
-
) -> Evaluation:
|
179
|
-
def run_fn(retry_index: int) -> Evaluation:
|
180
|
-
actual_sandbox = package.get_singleton_sandbox()
|
181
|
-
|
182
|
-
limits = get_limits_for_language(
|
183
|
-
solution.language, verification, timelimit_override
|
184
|
-
)
|
185
|
-
|
186
|
-
sandbox = EnvironmentSandbox()
|
187
|
-
sandbox.timeLimit = limits.time
|
188
|
-
if limits.isDoubleTL and sandbox.timeLimit is not None:
|
189
|
-
# Double TL.
|
190
|
-
sandbox.timeLimit = sandbox.timeLimit * 2
|
191
|
-
sandbox.wallTimeLimit = sandbox.timeLimit
|
192
|
-
if sandbox.timeLimit is not None and actual_sandbox.use_soft_timeout():
|
193
|
-
sandbox.wallTimeLimit = sandbox.timeLimit * 2
|
194
|
-
sandbox.memoryLimit = limits.memory
|
195
|
-
sandbox.fileSizeLimit = limits.output
|
196
|
-
extra_config = ExecutionConfig(sandbox=sandbox)
|
197
|
-
|
198
|
-
output_path = output_dir / testcase.inputPath.with_suffix('.out').name
|
199
|
-
error_path = output_path.with_suffix('.err')
|
200
|
-
log_path = output_path.with_suffix('.log')
|
201
|
-
output_path.parent.mkdir(parents=True, exist_ok=True)
|
202
|
-
|
203
|
-
run_log = run_item(
|
204
|
-
solution,
|
205
|
-
DigestOrSource.create(compiled_digest),
|
206
|
-
stdin=DigestOrSource.create(testcase.inputPath),
|
207
|
-
stdout=DigestOrDest.create(output_path),
|
208
|
-
stderr=DigestOrDest.create(error_path),
|
209
|
-
extra_config=extra_config,
|
210
|
-
retry_index=retry_index,
|
211
|
-
)
|
212
|
-
|
213
|
-
if checker_digest is not None:
|
214
|
-
checker_result = checkers.check(
|
215
|
-
checker_digest,
|
216
|
-
run_log,
|
217
|
-
testcase,
|
218
|
-
program_output=output_path,
|
219
|
-
)
|
220
|
-
else:
|
221
|
-
checker_result = checkers.check_with_no_output(run_log)
|
222
|
-
|
223
|
-
eval = Evaluation(
|
224
|
-
result=checker_result,
|
225
|
-
testcase=TestcaseIO(
|
226
|
-
index=testcase_index,
|
227
|
-
input=testcase.inputPath,
|
228
|
-
output=testcase.outputPath,
|
229
|
-
),
|
230
|
-
log=TestcaseLog(
|
231
|
-
**(run_log.model_dump() if run_log is not None else {}),
|
232
|
-
stdout_absolute_path=output_path.absolute(),
|
233
|
-
stderr_absolute_path=error_path.absolute(),
|
234
|
-
log_absolute_path=log_path.absolute(),
|
235
|
-
),
|
236
|
-
)
|
237
|
-
|
238
|
-
log_path.write_text(model_to_yaml(eval))
|
239
|
-
return eval
|
240
|
-
|
241
|
-
retrier = Retrier()
|
242
|
-
return retrier.repeat(run_fn)
|
243
|
-
|
244
|
-
|
245
157
|
def _run_solution(
|
246
158
|
solution: Solution,
|
247
159
|
compiled_digest: str,
|
248
160
|
checker_digest: Optional[str],
|
249
161
|
solution_index: int,
|
250
162
|
group_name: str,
|
163
|
+
interactor_digest: Optional[str] = None,
|
251
164
|
progress: Optional[StatusProgress] = None,
|
252
165
|
verification: VerificationLevel = VerificationLevel.NONE,
|
253
166
|
timelimit_override: Optional[int] = None,
|
@@ -267,12 +180,13 @@ def _run_solution(
|
|
267
180
|
)
|
268
181
|
|
269
182
|
async def run_fn(i=i, testcase=testcase, output_path=output_path):
|
270
|
-
return
|
183
|
+
return await run_solution_on_testcase(
|
271
184
|
solution,
|
272
185
|
compiled_digest,
|
273
186
|
checker_digest,
|
274
187
|
testcase,
|
275
188
|
output_path,
|
189
|
+
interactor_digest=interactor_digest,
|
276
190
|
testcase_index=i,
|
277
191
|
verification=verification,
|
278
192
|
timelimit_override=timelimit_override,
|
@@ -343,7 +257,15 @@ def _produce_solution_items(
|
|
343
257
|
) -> List[EvaluationItem]:
|
344
258
|
pkg = package.find_problem_package_or_die()
|
345
259
|
|
346
|
-
|
260
|
+
if pkg.type == TaskType.COMMUNICATION:
|
261
|
+
checker_digest = (
|
262
|
+
checkers.compile_checker() if check and pkg.checker is not None else None
|
263
|
+
)
|
264
|
+
interactor_digest = checkers.compile_interactor()
|
265
|
+
else:
|
266
|
+
checker_digest = checkers.compile_checker() if check else None
|
267
|
+
interactor_digest = None
|
268
|
+
|
347
269
|
compiled_solutions = compile_solutions(
|
348
270
|
progress=progress, tracked_solutions=tracked_solutions, sanitized=sanitized
|
349
271
|
)
|
@@ -374,6 +296,7 @@ def _produce_solution_items(
|
|
374
296
|
checker_digest,
|
375
297
|
solution_index,
|
376
298
|
group_name,
|
299
|
+
interactor_digest=interactor_digest,
|
377
300
|
progress=progress,
|
378
301
|
verification=verification,
|
379
302
|
timelimit_override=timelimit_override,
|
@@ -451,7 +374,7 @@ async def _generate_testcase_interactively(
|
|
451
374
|
copied_to=testcase,
|
452
375
|
)
|
453
376
|
elif testcase_entry is not None:
|
454
|
-
extracted = extract_generation_testcases([testcase_entry])
|
377
|
+
extracted = await extract_generation_testcases([testcase_entry])
|
455
378
|
if not extracted:
|
456
379
|
console.console.print(
|
457
380
|
f'[error]Failed searching for testcase [item]{testcase_entry}[/item].[/error]'
|
@@ -483,7 +406,7 @@ async def _generate_testcase_interactively(
|
|
483
406
|
|
484
407
|
# 1. Generate testcase.
|
485
408
|
if generation_metadata is not None:
|
486
|
-
generate_standalone(
|
409
|
+
await generate_standalone(
|
487
410
|
generation_metadata,
|
488
411
|
progress=progress,
|
489
412
|
validate=True,
|
@@ -531,10 +454,20 @@ async def _generate_testcase_interactively(
|
|
531
454
|
raise
|
532
455
|
|
533
456
|
if main_solution_digest is not None:
|
457
|
+
pkg = package.find_problem_package_or_die()
|
458
|
+
if pkg.type == TaskType.COMMUNICATION:
|
459
|
+
interactor_digest = checkers.compile_interactor(progress)
|
460
|
+
else:
|
461
|
+
interactor_digest = None
|
462
|
+
|
534
463
|
if progress:
|
535
464
|
progress.update('Generating output for test...')
|
536
465
|
# TODO: Add stderr path
|
537
|
-
generate_output_for_testcase(
|
466
|
+
await generate_output_for_testcase(
|
467
|
+
main_solution_digest,
|
468
|
+
testcase,
|
469
|
+
interactor_digest=interactor_digest,
|
470
|
+
)
|
538
471
|
|
539
472
|
if check and testcase.outputPath is not None and not testcase.outputPath.is_file():
|
540
473
|
# Output was not created, throw an error.
|
@@ -559,9 +492,13 @@ def _run_interactive_solutions(
|
|
559
492
|
) -> Iterator[EvaluationItem]:
|
560
493
|
pkg = package.find_problem_package_or_die()
|
561
494
|
|
562
|
-
if
|
563
|
-
|
564
|
-
|
495
|
+
if pkg.type == TaskType.COMMUNICATION:
|
496
|
+
checker_digest = checkers.compile_checker() if check else None
|
497
|
+
interactor_digest = checkers.compile_interactor()
|
498
|
+
else:
|
499
|
+
checker_digest = checkers.compile_checker() if check else None
|
500
|
+
interactor_digest = None
|
501
|
+
|
565
502
|
compiled_solutions = compile_solutions(
|
566
503
|
progress=progress, tracked_solutions=tracked_solutions, sanitized=sanitized
|
567
504
|
)
|
@@ -581,12 +518,13 @@ def _run_interactive_solutions(
|
|
581
518
|
output_dir = irun_dir / f'{i}'
|
582
519
|
|
583
520
|
async def run_fn(solution=solution, output_dir=output_dir):
|
584
|
-
return
|
521
|
+
return await run_solution_on_testcase(
|
585
522
|
solution,
|
586
523
|
compiled_solutions[solution.path],
|
587
524
|
checker_digest,
|
588
525
|
testcase,
|
589
526
|
output_dir,
|
527
|
+
interactor_digest=interactor_digest,
|
590
528
|
verification=verification,
|
591
529
|
)
|
592
530
|
|
rbx/box/solutions_test.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
import asyncio
|
2
1
|
import pathlib
|
3
2
|
|
4
3
|
import pytest
|
@@ -17,15 +16,15 @@ from rbx.grading.steps import Outcome
|
|
17
16
|
|
18
17
|
|
19
18
|
@pytest.mark.test_pkg('box1')
|
20
|
-
def test_solutions(pkg_from_testdata: pathlib.Path):
|
21
|
-
generate_testcases()
|
19
|
+
async def test_solutions(pkg_from_testdata: pathlib.Path):
|
20
|
+
await generate_testcases()
|
22
21
|
entries = [
|
23
|
-
entry.group_entry for entry in extract_generation_testcases_from_groups()
|
22
|
+
entry.group_entry for entry in await extract_generation_testcases_from_groups()
|
24
23
|
]
|
25
|
-
generate_outputs_for_testcases(entries)
|
24
|
+
await generate_outputs_for_testcases(entries)
|
26
25
|
|
27
26
|
result = run_solutions(verification=VerificationLevel.FULL)
|
28
|
-
res =
|
27
|
+
res = await convert_list_of_solution_evaluations_to_dict(result.items)
|
29
28
|
|
30
29
|
# First solution should pass all tests.
|
31
30
|
assert all(chk.result.outcome == Outcome.ACCEPTED for chk in res[0]['gen1'])
|
@@ -3,6 +3,7 @@ import tempfile
|
|
3
3
|
import typing
|
4
4
|
from typing import Annotated, Dict, List, Optional, Tuple
|
5
5
|
|
6
|
+
import syncer
|
6
7
|
import typer
|
7
8
|
|
8
9
|
from rbx import annotations, console
|
@@ -307,7 +308,8 @@ def build_statement(
|
|
307
308
|
|
308
309
|
@app.command('build, b', help='Build statements.')
|
309
310
|
@package.within_problem
|
310
|
-
|
311
|
+
@syncer.sync
|
312
|
+
async def build(
|
311
313
|
verification: environment.VerificationParam,
|
312
314
|
languages: Annotated[
|
313
315
|
Optional[List[str]],
|
@@ -335,7 +337,7 @@ def build(
|
|
335
337
|
if samples:
|
336
338
|
from rbx.box import builder
|
337
339
|
|
338
|
-
if not builder.build(
|
340
|
+
if not await builder.build(
|
339
341
|
verification=verification,
|
340
342
|
groups=set(['samples']),
|
341
343
|
output=None,
|
rbx/box/stresses.py
CHANGED
@@ -3,6 +3,7 @@ import time
|
|
3
3
|
from shutil import rmtree
|
4
4
|
from typing import List, Optional
|
5
5
|
|
6
|
+
import syncer
|
6
7
|
import typer
|
7
8
|
from pydantic import BaseModel
|
8
9
|
|
@@ -15,7 +16,7 @@ from rbx.box.generators import (
|
|
15
16
|
generate_standalone,
|
16
17
|
)
|
17
18
|
from rbx.box.retries import Retrier
|
18
|
-
from rbx.box.schema import CodeItem, GeneratorCall, Stress, Testcase
|
19
|
+
from rbx.box.schema import CodeItem, GeneratorCall, Stress, TaskType, Testcase
|
19
20
|
from rbx.box.solutions import compile_solutions, get_outcome_style_verdict
|
20
21
|
from rbx.box.stressing import finder_parser
|
21
22
|
from rbx.grading.steps import (
|
@@ -49,7 +50,8 @@ def _compile_finder(finder: CodeItem) -> str:
|
|
49
50
|
return digest
|
50
51
|
|
51
52
|
|
52
|
-
|
53
|
+
@syncer.sync
|
54
|
+
async def run_stress(
|
53
55
|
name: str,
|
54
56
|
timeoutInSeconds: int,
|
55
57
|
finder: Optional[str] = None,
|
@@ -59,6 +61,13 @@ def run_stress(
|
|
59
61
|
progress: Optional[StatusProgress] = None,
|
60
62
|
sanitized: bool = False,
|
61
63
|
) -> StressReport:
|
64
|
+
pkg = package.find_problem_package_or_die()
|
65
|
+
if pkg.type == TaskType.COMMUNICATION:
|
66
|
+
console.console.print(
|
67
|
+
'[error]Communication problems do not support stress testing.[/error]'
|
68
|
+
)
|
69
|
+
raise typer.Exit(1)
|
70
|
+
|
62
71
|
if finder:
|
63
72
|
stress = Stress(
|
64
73
|
name=f'{name}',
|
@@ -128,7 +137,7 @@ def run_stress(
|
|
128
137
|
input_path.parent.mkdir(parents=True, exist_ok=True)
|
129
138
|
|
130
139
|
expanded_generator_call = expand_generator_call(stress.generator)
|
131
|
-
generate_standalone(
|
140
|
+
await generate_standalone(
|
132
141
|
GenerationMetadata(
|
133
142
|
generator_call=expanded_generator_call,
|
134
143
|
copied_to=Testcase(inputPath=input_path),
|
@@ -140,7 +149,7 @@ def run_stress(
|
|
140
149
|
)
|
141
150
|
|
142
151
|
@functools.cache
|
143
|
-
def run_solution_fn(
|
152
|
+
async def run_solution_fn(
|
144
153
|
solution: str,
|
145
154
|
retry_index: Optional[int] = None,
|
146
155
|
input_path=input_path,
|
@@ -150,7 +159,7 @@ def run_stress(
|
|
150
159
|
output_path = input_path.with_stem(f'{index}').with_suffix('.out')
|
151
160
|
stderr_path = output_path.with_suffix('.err')
|
152
161
|
|
153
|
-
run_log = run_item(
|
162
|
+
run_log = await run_item(
|
154
163
|
sol,
|
155
164
|
DigestOrSource.create(solutions_digest[sol.path]),
|
156
165
|
stdin=DigestOrSource.create(input_path),
|
@@ -168,7 +177,7 @@ def run_stress(
|
|
168
177
|
# Get main solution output.
|
169
178
|
expected_output_path = empty_path
|
170
179
|
if needs_expected_output:
|
171
|
-
main_testcase_log = run_solution_fn(str(solutions[0].path))
|
180
|
+
main_testcase_log = await run_solution_fn(str(solutions[0].path))
|
172
181
|
main_checker_result = checkers.check_with_no_output(main_testcase_log)
|
173
182
|
if main_checker_result.outcome != Outcome.ACCEPTED:
|
174
183
|
console.console.print(
|
@@ -190,23 +199,23 @@ def run_stress(
|
|
190
199
|
expected_output_path = main_testcase_log.stdout_absolute_path
|
191
200
|
|
192
201
|
@functools.cache
|
193
|
-
def run_solution_and_checker_fn(
|
202
|
+
async def run_solution_and_checker_fn(
|
194
203
|
call: finder_parser.FinderCall,
|
195
204
|
input_path=input_path,
|
196
205
|
expected_output_path=expected_output_path,
|
197
206
|
) -> finder_parser.FinderResult:
|
198
|
-
def run_fn(retry_index: int) -> Evaluation:
|
207
|
+
async def run_fn(retry_index: int) -> Evaluation:
|
199
208
|
solution = call.solution
|
200
209
|
checker = call.checker
|
201
210
|
|
202
|
-
testcase_log = run_solution_fn(solution, retry_index=retry_index)
|
211
|
+
testcase_log = await run_solution_fn(solution, retry_index=retry_index)
|
203
212
|
assert testcase_log.stdout_absolute_path is not None
|
204
213
|
|
205
214
|
if checker is None:
|
206
215
|
checker_result = checkers.check_with_no_output(testcase_log)
|
207
216
|
else:
|
208
217
|
checker_digest = finders_digest[checker.path]
|
209
|
-
checker_result = checkers.check(
|
218
|
+
checker_result = await checkers.check(
|
210
219
|
checker_digest,
|
211
220
|
testcase_log,
|
212
221
|
Testcase(inputPath=input_path, outputPath=expected_output_path),
|
@@ -224,7 +233,7 @@ def run_stress(
|
|
224
233
|
)
|
225
234
|
|
226
235
|
retrier = Retrier(is_stress=True)
|
227
|
-
eval = retrier.repeat(run_fn)
|
236
|
+
eval = await retrier.repeat(run_fn)
|
228
237
|
|
229
238
|
return finder_parser.FinderResult(
|
230
239
|
solution=call.solution,
|
@@ -234,7 +243,9 @@ def run_stress(
|
|
234
243
|
checker_result=eval.result,
|
235
244
|
)
|
236
245
|
|
237
|
-
runner = finder_parser.FinderTreeRunner(
|
246
|
+
runner = finder_parser.FinderTreeRunner(
|
247
|
+
runner=syncer.sync(run_solution_and_checker_fn)
|
248
|
+
)
|
238
249
|
finder_outcome: finder_parser.FinderOutcome = runner.transform(parsed_finder)
|
239
250
|
|
240
251
|
internal_error_results = [
|
rbx/box/tasks.py
ADDED
@@ -0,0 +1,258 @@
|
|
1
|
+
import pathlib
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from rbx.box import checkers, package
|
5
|
+
from rbx.box.code import CommunicationItem, run_communication, run_item
|
6
|
+
from rbx.box.environment import EnvironmentSandbox, ExecutionConfig, VerificationLevel
|
7
|
+
from rbx.box.retries import Retrier
|
8
|
+
from rbx.box.schema import Limits, Solution, Testcase
|
9
|
+
from rbx.grading.judge.sandbox import SandboxBase
|
10
|
+
from rbx.grading.steps import (
|
11
|
+
DigestOrDest,
|
12
|
+
DigestOrSource,
|
13
|
+
Evaluation,
|
14
|
+
GradingFileInput,
|
15
|
+
GradingFileOutput,
|
16
|
+
TestcaseIO,
|
17
|
+
TestcaseLog,
|
18
|
+
)
|
19
|
+
from rbx.utils import model_to_yaml
|
20
|
+
|
21
|
+
|
22
|
+
def get_limits_for_language(
|
23
|
+
lang: Optional[str],
|
24
|
+
verification: VerificationLevel,
|
25
|
+
timelimit_override: Optional[int],
|
26
|
+
use_timelimit: bool = True,
|
27
|
+
) -> Limits:
|
28
|
+
pkg = package.find_problem_package_or_die()
|
29
|
+
time = timelimit_override or pkg.timelimit_for_language(lang)
|
30
|
+
isDoubleTL = verification.value >= VerificationLevel.FULL.value
|
31
|
+
memory = pkg.memorylimit_for_language(lang)
|
32
|
+
return Limits(
|
33
|
+
time=time if use_timelimit else None,
|
34
|
+
memory=memory,
|
35
|
+
output=pkg.outputLimit,
|
36
|
+
isDoubleTL=isDoubleTL,
|
37
|
+
)
|
38
|
+
|
39
|
+
|
40
|
+
async def run_solution_on_testcase(
|
41
|
+
solution: Solution,
|
42
|
+
compiled_digest: str,
|
43
|
+
checker_digest: Optional[str],
|
44
|
+
testcase: Testcase,
|
45
|
+
output_dir: pathlib.Path,
|
46
|
+
interactor_digest: Optional[str] = None,
|
47
|
+
testcase_index: int = 0,
|
48
|
+
verification: VerificationLevel = VerificationLevel.NONE,
|
49
|
+
timelimit_override: Optional[int] = None,
|
50
|
+
use_retries: bool = True,
|
51
|
+
use_timelimit: bool = True,
|
52
|
+
) -> Evaluation:
|
53
|
+
if interactor_digest is not None:
|
54
|
+
return await _run_communication_solution_on_testcase(
|
55
|
+
solution,
|
56
|
+
compiled_digest,
|
57
|
+
interactor_digest,
|
58
|
+
checker_digest,
|
59
|
+
testcase,
|
60
|
+
output_dir,
|
61
|
+
testcase_index=testcase_index,
|
62
|
+
verification=verification,
|
63
|
+
timelimit_override=timelimit_override,
|
64
|
+
use_retries=use_retries,
|
65
|
+
use_timelimit=use_timelimit,
|
66
|
+
)
|
67
|
+
|
68
|
+
async def run_fn(retry_index: int) -> Evaluation:
|
69
|
+
actual_sandbox = package.get_singleton_sandbox()
|
70
|
+
|
71
|
+
limits = get_limits_for_language(
|
72
|
+
solution.language,
|
73
|
+
verification,
|
74
|
+
timelimit_override,
|
75
|
+
use_timelimit=use_timelimit,
|
76
|
+
)
|
77
|
+
extra_config = _get_execution_config(limits, actual_sandbox)
|
78
|
+
|
79
|
+
output_path = output_dir / testcase.inputPath.with_suffix('.out').name
|
80
|
+
error_path = output_path.with_suffix('.err')
|
81
|
+
log_path = output_path.with_suffix('.log')
|
82
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
83
|
+
|
84
|
+
run_log = await run_item(
|
85
|
+
solution,
|
86
|
+
DigestOrSource.create(compiled_digest),
|
87
|
+
stdin=DigestOrSource.create(testcase.inputPath),
|
88
|
+
stdout=DigestOrDest.create(output_path),
|
89
|
+
stderr=DigestOrDest.create(error_path),
|
90
|
+
extra_config=extra_config,
|
91
|
+
retry_index=retry_index,
|
92
|
+
)
|
93
|
+
|
94
|
+
if checker_digest is not None:
|
95
|
+
checker_result = await checkers.check(
|
96
|
+
checker_digest,
|
97
|
+
run_log,
|
98
|
+
testcase,
|
99
|
+
program_output=output_path,
|
100
|
+
)
|
101
|
+
else:
|
102
|
+
checker_result = checkers.check_with_no_output(run_log)
|
103
|
+
|
104
|
+
eval = Evaluation(
|
105
|
+
result=checker_result,
|
106
|
+
testcase=TestcaseIO(
|
107
|
+
index=testcase_index,
|
108
|
+
input=testcase.inputPath,
|
109
|
+
output=testcase.outputPath,
|
110
|
+
),
|
111
|
+
log=TestcaseLog(
|
112
|
+
**(run_log.model_dump() if run_log is not None else {}),
|
113
|
+
stdout_absolute_path=output_path.absolute(),
|
114
|
+
stderr_absolute_path=error_path.absolute(),
|
115
|
+
log_absolute_path=log_path.absolute(),
|
116
|
+
),
|
117
|
+
)
|
118
|
+
|
119
|
+
log_path.write_text(model_to_yaml(eval))
|
120
|
+
return eval
|
121
|
+
|
122
|
+
if not use_retries:
|
123
|
+
return await run_fn(0)
|
124
|
+
|
125
|
+
retrier = Retrier()
|
126
|
+
return await retrier.repeat(run_fn)
|
127
|
+
|
128
|
+
|
129
|
+
def _get_execution_config(
|
130
|
+
limits: Limits,
|
131
|
+
actual_sandbox: SandboxBase,
|
132
|
+
) -> ExecutionConfig:
|
133
|
+
sandbox = EnvironmentSandbox()
|
134
|
+
sandbox.timeLimit = limits.time
|
135
|
+
if limits.isDoubleTL and sandbox.timeLimit is not None:
|
136
|
+
# Double TL.
|
137
|
+
sandbox.timeLimit = sandbox.timeLimit * 2
|
138
|
+
sandbox.wallTimeLimit = sandbox.timeLimit
|
139
|
+
if sandbox.timeLimit is not None and actual_sandbox.use_soft_timeout():
|
140
|
+
sandbox.wallTimeLimit = sandbox.timeLimit * 2
|
141
|
+
sandbox.memoryLimit = limits.memory
|
142
|
+
sandbox.fileSizeLimit = limits.output
|
143
|
+
return ExecutionConfig(sandbox=sandbox)
|
144
|
+
|
145
|
+
|
146
|
+
async def _run_communication_solution_on_testcase(
|
147
|
+
solution: Solution,
|
148
|
+
compiled_digest: str,
|
149
|
+
interactor_digest: str,
|
150
|
+
checker_digest: Optional[str],
|
151
|
+
testcase: Testcase,
|
152
|
+
output_dir: pathlib.Path,
|
153
|
+
testcase_index: int = 0,
|
154
|
+
verification: VerificationLevel = VerificationLevel.NONE,
|
155
|
+
timelimit_override: Optional[int] = None,
|
156
|
+
use_retries: bool = True,
|
157
|
+
use_timelimit: bool = True,
|
158
|
+
) -> Evaluation:
|
159
|
+
async def run_fn(retry_index: int) -> Evaluation:
|
160
|
+
actual_sandbox = package.get_singleton_sandbox()
|
161
|
+
interactor_sandbox = package.get_singleton_interactor_sandbox()
|
162
|
+
|
163
|
+
limits = get_limits_for_language(
|
164
|
+
solution.language,
|
165
|
+
verification,
|
166
|
+
timelimit_override,
|
167
|
+
use_timelimit=use_timelimit,
|
168
|
+
)
|
169
|
+
|
170
|
+
extra_config = _get_execution_config(limits, actual_sandbox)
|
171
|
+
interactor_extra_config = _get_execution_config(limits, interactor_sandbox)
|
172
|
+
if (
|
173
|
+
interactor_extra_config.sandbox is not None
|
174
|
+
and interactor_extra_config.sandbox.wallTimeLimit is not None
|
175
|
+
and extra_config.sandbox is not None
|
176
|
+
and extra_config.sandbox.wallTimeLimit is not None
|
177
|
+
):
|
178
|
+
interactor_extra_config.sandbox.wallTimeLimit += (
|
179
|
+
extra_config.sandbox.wallTimeLimit
|
180
|
+
)
|
181
|
+
# TODO: maybe combine wall time limits?
|
182
|
+
|
183
|
+
output_path = output_dir / testcase.inputPath.with_suffix('.out').name
|
184
|
+
error_path = output_path.with_suffix('.err')
|
185
|
+
log_path = output_path.with_suffix('.log')
|
186
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
187
|
+
|
188
|
+
interactor_item = CommunicationItem(
|
189
|
+
code=package.get_interactor(),
|
190
|
+
executable=DigestOrSource.create(interactor_digest),
|
191
|
+
stderr=DigestOrDest.create(error_path),
|
192
|
+
extra_config=interactor_extra_config,
|
193
|
+
extra_args='interactor.in interactor.out',
|
194
|
+
inputs=[
|
195
|
+
GradingFileInput(
|
196
|
+
src=testcase.inputPath,
|
197
|
+
dest=pathlib.PosixPath('interactor.in'),
|
198
|
+
)
|
199
|
+
],
|
200
|
+
outputs=[
|
201
|
+
GradingFileOutput(
|
202
|
+
src=pathlib.PosixPath('interactor.out'),
|
203
|
+
dest=output_path,
|
204
|
+
touch=True,
|
205
|
+
)
|
206
|
+
],
|
207
|
+
)
|
208
|
+
solution_item = CommunicationItem(
|
209
|
+
code=solution,
|
210
|
+
executable=DigestOrSource.create(compiled_digest),
|
211
|
+
extra_config=extra_config,
|
212
|
+
)
|
213
|
+
|
214
|
+
interactor_run_log, run_log = await run_communication(
|
215
|
+
interactor=interactor_item,
|
216
|
+
solution=solution_item,
|
217
|
+
retry_index=retry_index,
|
218
|
+
)
|
219
|
+
|
220
|
+
checker_result = await checkers.check_communication(
|
221
|
+
checker_digest,
|
222
|
+
run_log,
|
223
|
+
interactor_run_log,
|
224
|
+
error_path,
|
225
|
+
testcase,
|
226
|
+
output_path,
|
227
|
+
)
|
228
|
+
|
229
|
+
eval = Evaluation(
|
230
|
+
result=checker_result,
|
231
|
+
testcase=TestcaseIO(
|
232
|
+
index=testcase_index,
|
233
|
+
input=testcase.inputPath,
|
234
|
+
output=testcase.outputPath,
|
235
|
+
),
|
236
|
+
log=TestcaseLog(
|
237
|
+
**(run_log.model_dump() if run_log is not None else {}),
|
238
|
+
stdout_absolute_path=output_path.absolute(),
|
239
|
+
stderr_absolute_path=error_path.absolute(),
|
240
|
+
log_absolute_path=log_path.absolute(),
|
241
|
+
),
|
242
|
+
)
|
243
|
+
|
244
|
+
log_path.write_text(model_to_yaml(eval))
|
245
|
+
|
246
|
+
if interactor_run_log is not None:
|
247
|
+
interactor_log_path = output_path.with_suffix('.int.log')
|
248
|
+
interactor_log_path.write_text(model_to_yaml(interactor_run_log))
|
249
|
+
if run_log is not None:
|
250
|
+
solution_log_path = output_path.with_suffix('.sol.log')
|
251
|
+
solution_log_path.write_text(model_to_yaml(run_log))
|
252
|
+
return eval
|
253
|
+
|
254
|
+
if not use_retries:
|
255
|
+
return await run_fn(0)
|
256
|
+
|
257
|
+
retrier = Retrier()
|
258
|
+
return await retrier.repeat(run_fn)
|