rbx.cp 0.5.39__py3-none-any.whl → 0.5.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. rbx/box/builder.py +6 -6
  2. rbx/box/checkers.py +105 -26
  3. rbx/box/cli.py +860 -0
  4. rbx/box/code.py +199 -84
  5. rbx/box/contest/statements.py +4 -2
  6. rbx/box/generators.py +55 -49
  7. rbx/box/generators_test.py +7 -7
  8. rbx/box/main.py +1 -852
  9. rbx/box/package.py +42 -1
  10. rbx/box/packaging/boca/packager.py +2 -1
  11. rbx/box/packaging/main.py +24 -7
  12. rbx/box/packaging/moj/packager.py +164 -0
  13. rbx/box/retries.py +5 -5
  14. rbx/box/schema.py +86 -4
  15. rbx/box/solutions.py +46 -108
  16. rbx/box/solutions_test.py +5 -6
  17. rbx/box/statements/build_statements.py +4 -2
  18. rbx/box/stresses.py +23 -12
  19. rbx/box/tasks.py +258 -0
  20. rbx/box/testcase_extractors.py +21 -21
  21. rbx/box/testcases/main.py +19 -14
  22. rbx/box/unit.py +116 -0
  23. rbx/box/validators.py +27 -18
  24. rbx/box/validators_test.py +3 -3
  25. rbx/grading/judge/sandbox.py +8 -0
  26. rbx/grading/judge/sandboxes/stupid_sandbox.py +12 -7
  27. rbx/grading/judge/sandboxes/timeit.py +8 -2
  28. rbx/grading/steps.py +76 -2
  29. rbx/grading/steps_with_caching.py +45 -3
  30. rbx/grading/steps_with_caching_run_test.py +51 -49
  31. rbx/resources/packagers/moj/scripts/compare.sh +101 -0
  32. rbx/test.py +6 -4
  33. rbx/testdata/interactive/checker.cpp +21 -0
  34. rbx/testdata/interactive/gen.cpp +11 -0
  35. rbx/testdata/interactive/interactor.cpp +63 -0
  36. rbx/testdata/interactive/problem.rbx.yml +40 -0
  37. rbx/testdata/interactive/sols/af_ac_pe.cpp +75 -0
  38. rbx/testdata/interactive/sols/af_ac_re.cpp +76 -0
  39. rbx/testdata/interactive/sols/af_ac_too_many_iter.cpp +72 -0
  40. rbx/testdata/interactive/sols/af_inf_cout_with_flush.cpp +79 -0
  41. rbx/testdata/interactive/sols/af_inf_cout_without_flush.cpp +78 -0
  42. rbx/testdata/interactive/sols/af_ml.cpp +78 -0
  43. rbx/testdata/interactive/sols/af_tl_after_ans.cpp +74 -0
  44. rbx/testdata/interactive/sols/af_wa.cpp +74 -0
  45. rbx/testdata/interactive/sols/interactive-binary-search_mm_naive_cin.cpp +17 -0
  46. rbx/testdata/interactive/sols/main.cpp +26 -0
  47. rbx/testdata/interactive/testplan.txt +6 -0
  48. rbx/testdata/interactive/validator.cpp +16 -0
  49. {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/METADATA +2 -1
  50. {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/RECORD +53 -32
  51. {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/LICENSE +0 -0
  52. {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/WHEEL +0 -0
  53. {rbx_cp-0.5.39.dist-info → rbx_cp-0.5.42.dist-info}/entry_points.txt +0 -0
rbx/box/solutions.py CHANGED
@@ -17,11 +17,13 @@ from pydantic import BaseModel
17
17
 
18
18
  from rbx import console, utils
19
19
  from rbx.box import checkers, package
20
- from rbx.box.code import SanitizationLevel, compile_item, find_language_name, run_item
20
+ from rbx.box.code import (
21
+ SanitizationLevel,
22
+ compile_item,
23
+ find_language_name,
24
+ )
21
25
  from rbx.box.deferred import Deferred
22
26
  from rbx.box.environment import (
23
- EnvironmentSandbox,
24
- ExecutionConfig,
25
27
  VerificationLevel,
26
28
  )
27
29
  from rbx.box.formatting import get_formatted_memory, get_formatted_time
@@ -31,26 +33,26 @@ from rbx.box.generators import (
31
33
  generate_output_for_testcase,
32
34
  generate_standalone,
33
35
  )
34
- from rbx.box.retries import Retrier
35
36
  from rbx.box.schema import (
36
37
  ExpectedOutcome,
37
38
  GeneratorCall,
38
39
  Limits,
39
40
  Solution,
41
+ TaskType,
40
42
  Testcase,
41
43
  TestcaseGroup,
42
44
  )
45
+ from rbx.box.tasks import (
46
+ get_limits_for_language,
47
+ run_solution_on_testcase,
48
+ )
43
49
  from rbx.box.testcase_extractors import extract_generation_testcases
44
50
  from rbx.box.testcase_utils import TestcaseEntry, find_built_testcases
45
51
  from rbx.grading.steps import (
46
- DigestOrDest,
47
- DigestOrSource,
48
52
  Evaluation,
49
53
  Outcome,
50
- TestcaseIO,
51
- TestcaseLog,
52
54
  )
53
- from rbx.utils import StatusProgress, model_to_yaml
55
+ from rbx.utils import StatusProgress
54
56
 
55
57
  StructuredEvaluation = Dict[str, Dict[str, List[Optional[Deferred[Evaluation]]]]]
56
58
 
@@ -152,102 +154,13 @@ def compile_solutions(
152
154
  return compiled_solutions
153
155
 
154
156
 
155
- def get_limits_for_language(
156
- lang: Optional[str],
157
- verification: VerificationLevel,
158
- timelimit_override: Optional[int],
159
- ) -> Limits:
160
- pkg = package.find_problem_package_or_die()
161
- time = timelimit_override or pkg.timelimit_for_language(lang)
162
- isDoubleTL = verification.value >= VerificationLevel.FULL.value
163
- memory = pkg.memorylimit_for_language(lang)
164
- return Limits(
165
- time=time, memory=memory, output=pkg.outputLimit, isDoubleTL=isDoubleTL
166
- )
167
-
168
-
169
- def _run_solution_on_testcase(
170
- solution: Solution,
171
- compiled_digest: str,
172
- checker_digest: Optional[str],
173
- testcase: Testcase,
174
- output_dir: pathlib.Path,
175
- testcase_index: int = 0,
176
- verification: VerificationLevel = VerificationLevel.NONE,
177
- timelimit_override: Optional[int] = None,
178
- ) -> Evaluation:
179
- def run_fn(retry_index: int) -> Evaluation:
180
- actual_sandbox = package.get_singleton_sandbox()
181
-
182
- limits = get_limits_for_language(
183
- solution.language, verification, timelimit_override
184
- )
185
-
186
- sandbox = EnvironmentSandbox()
187
- sandbox.timeLimit = limits.time
188
- if limits.isDoubleTL and sandbox.timeLimit is not None:
189
- # Double TL.
190
- sandbox.timeLimit = sandbox.timeLimit * 2
191
- sandbox.wallTimeLimit = sandbox.timeLimit
192
- if sandbox.timeLimit is not None and actual_sandbox.use_soft_timeout():
193
- sandbox.wallTimeLimit = sandbox.timeLimit * 2
194
- sandbox.memoryLimit = limits.memory
195
- sandbox.fileSizeLimit = limits.output
196
- extra_config = ExecutionConfig(sandbox=sandbox)
197
-
198
- output_path = output_dir / testcase.inputPath.with_suffix('.out').name
199
- error_path = output_path.with_suffix('.err')
200
- log_path = output_path.with_suffix('.log')
201
- output_path.parent.mkdir(parents=True, exist_ok=True)
202
-
203
- run_log = run_item(
204
- solution,
205
- DigestOrSource.create(compiled_digest),
206
- stdin=DigestOrSource.create(testcase.inputPath),
207
- stdout=DigestOrDest.create(output_path),
208
- stderr=DigestOrDest.create(error_path),
209
- extra_config=extra_config,
210
- retry_index=retry_index,
211
- )
212
-
213
- if checker_digest is not None:
214
- checker_result = checkers.check(
215
- checker_digest,
216
- run_log,
217
- testcase,
218
- program_output=output_path,
219
- )
220
- else:
221
- checker_result = checkers.check_with_no_output(run_log)
222
-
223
- eval = Evaluation(
224
- result=checker_result,
225
- testcase=TestcaseIO(
226
- index=testcase_index,
227
- input=testcase.inputPath,
228
- output=testcase.outputPath,
229
- ),
230
- log=TestcaseLog(
231
- **(run_log.model_dump() if run_log is not None else {}),
232
- stdout_absolute_path=output_path.absolute(),
233
- stderr_absolute_path=error_path.absolute(),
234
- log_absolute_path=log_path.absolute(),
235
- ),
236
- )
237
-
238
- log_path.write_text(model_to_yaml(eval))
239
- return eval
240
-
241
- retrier = Retrier()
242
- return retrier.repeat(run_fn)
243
-
244
-
245
157
  def _run_solution(
246
158
  solution: Solution,
247
159
  compiled_digest: str,
248
160
  checker_digest: Optional[str],
249
161
  solution_index: int,
250
162
  group_name: str,
163
+ interactor_digest: Optional[str] = None,
251
164
  progress: Optional[StatusProgress] = None,
252
165
  verification: VerificationLevel = VerificationLevel.NONE,
253
166
  timelimit_override: Optional[int] = None,
@@ -267,12 +180,13 @@ def _run_solution(
267
180
  )
268
181
 
269
182
  async def run_fn(i=i, testcase=testcase, output_path=output_path):
270
- return _run_solution_on_testcase(
183
+ return await run_solution_on_testcase(
271
184
  solution,
272
185
  compiled_digest,
273
186
  checker_digest,
274
187
  testcase,
275
188
  output_path,
189
+ interactor_digest=interactor_digest,
276
190
  testcase_index=i,
277
191
  verification=verification,
278
192
  timelimit_override=timelimit_override,
@@ -343,7 +257,15 @@ def _produce_solution_items(
343
257
  ) -> List[EvaluationItem]:
344
258
  pkg = package.find_problem_package_or_die()
345
259
 
346
- checker_digest = checkers.compile_checker() if check else None
260
+ if pkg.type == TaskType.COMMUNICATION:
261
+ checker_digest = (
262
+ checkers.compile_checker() if check and pkg.checker is not None else None
263
+ )
264
+ interactor_digest = checkers.compile_interactor()
265
+ else:
266
+ checker_digest = checkers.compile_checker() if check else None
267
+ interactor_digest = None
268
+
347
269
  compiled_solutions = compile_solutions(
348
270
  progress=progress, tracked_solutions=tracked_solutions, sanitized=sanitized
349
271
  )
@@ -374,6 +296,7 @@ def _produce_solution_items(
374
296
  checker_digest,
375
297
  solution_index,
376
298
  group_name,
299
+ interactor_digest=interactor_digest,
377
300
  progress=progress,
378
301
  verification=verification,
379
302
  timelimit_override=timelimit_override,
@@ -451,7 +374,7 @@ async def _generate_testcase_interactively(
451
374
  copied_to=testcase,
452
375
  )
453
376
  elif testcase_entry is not None:
454
- extracted = extract_generation_testcases([testcase_entry])
377
+ extracted = await extract_generation_testcases([testcase_entry])
455
378
  if not extracted:
456
379
  console.console.print(
457
380
  f'[error]Failed searching for testcase [item]{testcase_entry}[/item].[/error]'
@@ -483,7 +406,7 @@ async def _generate_testcase_interactively(
483
406
 
484
407
  # 1. Generate testcase.
485
408
  if generation_metadata is not None:
486
- generate_standalone(
409
+ await generate_standalone(
487
410
  generation_metadata,
488
411
  progress=progress,
489
412
  validate=True,
@@ -531,10 +454,20 @@ async def _generate_testcase_interactively(
531
454
  raise
532
455
 
533
456
  if main_solution_digest is not None:
457
+ pkg = package.find_problem_package_or_die()
458
+ if pkg.type == TaskType.COMMUNICATION:
459
+ interactor_digest = checkers.compile_interactor(progress)
460
+ else:
461
+ interactor_digest = None
462
+
534
463
  if progress:
535
464
  progress.update('Generating output for test...')
536
465
  # TODO: Add stderr path
537
- generate_output_for_testcase(main_solution_digest, testcase)
466
+ await generate_output_for_testcase(
467
+ main_solution_digest,
468
+ testcase,
469
+ interactor_digest=interactor_digest,
470
+ )
538
471
 
539
472
  if check and testcase.outputPath is not None and not testcase.outputPath.is_file():
540
473
  # Output was not created, throw an error.
@@ -559,9 +492,13 @@ def _run_interactive_solutions(
559
492
  ) -> Iterator[EvaluationItem]:
560
493
  pkg = package.find_problem_package_or_die()
561
494
 
562
- if check and progress:
563
- progress.update('Compiling checker...')
564
- checker_digest = checkers.compile_checker() if check else None
495
+ if pkg.type == TaskType.COMMUNICATION:
496
+ checker_digest = checkers.compile_checker() if check else None
497
+ interactor_digest = checkers.compile_interactor()
498
+ else:
499
+ checker_digest = checkers.compile_checker() if check else None
500
+ interactor_digest = None
501
+
565
502
  compiled_solutions = compile_solutions(
566
503
  progress=progress, tracked_solutions=tracked_solutions, sanitized=sanitized
567
504
  )
@@ -581,12 +518,13 @@ def _run_interactive_solutions(
581
518
  output_dir = irun_dir / f'{i}'
582
519
 
583
520
  async def run_fn(solution=solution, output_dir=output_dir):
584
- return _run_solution_on_testcase(
521
+ return await run_solution_on_testcase(
585
522
  solution,
586
523
  compiled_solutions[solution.path],
587
524
  checker_digest,
588
525
  testcase,
589
526
  output_dir,
527
+ interactor_digest=interactor_digest,
590
528
  verification=verification,
591
529
  )
592
530
 
rbx/box/solutions_test.py CHANGED
@@ -1,4 +1,3 @@
1
- import asyncio
2
1
  import pathlib
3
2
 
4
3
  import pytest
@@ -17,15 +16,15 @@ from rbx.grading.steps import Outcome
17
16
 
18
17
 
19
18
  @pytest.mark.test_pkg('box1')
20
- def test_solutions(pkg_from_testdata: pathlib.Path):
21
- generate_testcases()
19
+ async def test_solutions(pkg_from_testdata: pathlib.Path):
20
+ await generate_testcases()
22
21
  entries = [
23
- entry.group_entry for entry in extract_generation_testcases_from_groups()
22
+ entry.group_entry for entry in await extract_generation_testcases_from_groups()
24
23
  ]
25
- generate_outputs_for_testcases(entries)
24
+ await generate_outputs_for_testcases(entries)
26
25
 
27
26
  result = run_solutions(verification=VerificationLevel.FULL)
28
- res = asyncio.run(convert_list_of_solution_evaluations_to_dict(result.items))
27
+ res = await convert_list_of_solution_evaluations_to_dict(result.items)
29
28
 
30
29
  # First solution should pass all tests.
31
30
  assert all(chk.result.outcome == Outcome.ACCEPTED for chk in res[0]['gen1'])
@@ -3,6 +3,7 @@ import tempfile
3
3
  import typing
4
4
  from typing import Annotated, Dict, List, Optional, Tuple
5
5
 
6
+ import syncer
6
7
  import typer
7
8
 
8
9
  from rbx import annotations, console
@@ -307,7 +308,8 @@ def build_statement(
307
308
 
308
309
  @app.command('build, b', help='Build statements.')
309
310
  @package.within_problem
310
- def build(
311
+ @syncer.sync
312
+ async def build(
311
313
  verification: environment.VerificationParam,
312
314
  languages: Annotated[
313
315
  Optional[List[str]],
@@ -335,7 +337,7 @@ def build(
335
337
  if samples:
336
338
  from rbx.box import builder
337
339
 
338
- if not builder.build(
340
+ if not await builder.build(
339
341
  verification=verification,
340
342
  groups=set(['samples']),
341
343
  output=None,
rbx/box/stresses.py CHANGED
@@ -3,6 +3,7 @@ import time
3
3
  from shutil import rmtree
4
4
  from typing import List, Optional
5
5
 
6
+ import syncer
6
7
  import typer
7
8
  from pydantic import BaseModel
8
9
 
@@ -15,7 +16,7 @@ from rbx.box.generators import (
15
16
  generate_standalone,
16
17
  )
17
18
  from rbx.box.retries import Retrier
18
- from rbx.box.schema import CodeItem, GeneratorCall, Stress, Testcase
19
+ from rbx.box.schema import CodeItem, GeneratorCall, Stress, TaskType, Testcase
19
20
  from rbx.box.solutions import compile_solutions, get_outcome_style_verdict
20
21
  from rbx.box.stressing import finder_parser
21
22
  from rbx.grading.steps import (
@@ -49,7 +50,8 @@ def _compile_finder(finder: CodeItem) -> str:
49
50
  return digest
50
51
 
51
52
 
52
- def run_stress(
53
+ @syncer.sync
54
+ async def run_stress(
53
55
  name: str,
54
56
  timeoutInSeconds: int,
55
57
  finder: Optional[str] = None,
@@ -59,6 +61,13 @@ def run_stress(
59
61
  progress: Optional[StatusProgress] = None,
60
62
  sanitized: bool = False,
61
63
  ) -> StressReport:
64
+ pkg = package.find_problem_package_or_die()
65
+ if pkg.type == TaskType.COMMUNICATION:
66
+ console.console.print(
67
+ '[error]Communication problems do not support stress testing.[/error]'
68
+ )
69
+ raise typer.Exit(1)
70
+
62
71
  if finder:
63
72
  stress = Stress(
64
73
  name=f'{name}',
@@ -128,7 +137,7 @@ def run_stress(
128
137
  input_path.parent.mkdir(parents=True, exist_ok=True)
129
138
 
130
139
  expanded_generator_call = expand_generator_call(stress.generator)
131
- generate_standalone(
140
+ await generate_standalone(
132
141
  GenerationMetadata(
133
142
  generator_call=expanded_generator_call,
134
143
  copied_to=Testcase(inputPath=input_path),
@@ -140,7 +149,7 @@ def run_stress(
140
149
  )
141
150
 
142
151
  @functools.cache
143
- def run_solution_fn(
152
+ async def run_solution_fn(
144
153
  solution: str,
145
154
  retry_index: Optional[int] = None,
146
155
  input_path=input_path,
@@ -150,7 +159,7 @@ def run_stress(
150
159
  output_path = input_path.with_stem(f'{index}').with_suffix('.out')
151
160
  stderr_path = output_path.with_suffix('.err')
152
161
 
153
- run_log = run_item(
162
+ run_log = await run_item(
154
163
  sol,
155
164
  DigestOrSource.create(solutions_digest[sol.path]),
156
165
  stdin=DigestOrSource.create(input_path),
@@ -168,7 +177,7 @@ def run_stress(
168
177
  # Get main solution output.
169
178
  expected_output_path = empty_path
170
179
  if needs_expected_output:
171
- main_testcase_log = run_solution_fn(str(solutions[0].path))
180
+ main_testcase_log = await run_solution_fn(str(solutions[0].path))
172
181
  main_checker_result = checkers.check_with_no_output(main_testcase_log)
173
182
  if main_checker_result.outcome != Outcome.ACCEPTED:
174
183
  console.console.print(
@@ -190,23 +199,23 @@ def run_stress(
190
199
  expected_output_path = main_testcase_log.stdout_absolute_path
191
200
 
192
201
  @functools.cache
193
- def run_solution_and_checker_fn(
202
+ async def run_solution_and_checker_fn(
194
203
  call: finder_parser.FinderCall,
195
204
  input_path=input_path,
196
205
  expected_output_path=expected_output_path,
197
206
  ) -> finder_parser.FinderResult:
198
- def run_fn(retry_index: int) -> Evaluation:
207
+ async def run_fn(retry_index: int) -> Evaluation:
199
208
  solution = call.solution
200
209
  checker = call.checker
201
210
 
202
- testcase_log = run_solution_fn(solution, retry_index=retry_index)
211
+ testcase_log = await run_solution_fn(solution, retry_index=retry_index)
203
212
  assert testcase_log.stdout_absolute_path is not None
204
213
 
205
214
  if checker is None:
206
215
  checker_result = checkers.check_with_no_output(testcase_log)
207
216
  else:
208
217
  checker_digest = finders_digest[checker.path]
209
- checker_result = checkers.check(
218
+ checker_result = await checkers.check(
210
219
  checker_digest,
211
220
  testcase_log,
212
221
  Testcase(inputPath=input_path, outputPath=expected_output_path),
@@ -224,7 +233,7 @@ def run_stress(
224
233
  )
225
234
 
226
235
  retrier = Retrier(is_stress=True)
227
- eval = retrier.repeat(run_fn)
236
+ eval = await retrier.repeat(run_fn)
228
237
 
229
238
  return finder_parser.FinderResult(
230
239
  solution=call.solution,
@@ -234,7 +243,9 @@ def run_stress(
234
243
  checker_result=eval.result,
235
244
  )
236
245
 
237
- runner = finder_parser.FinderTreeRunner(runner=run_solution_and_checker_fn)
246
+ runner = finder_parser.FinderTreeRunner(
247
+ runner=syncer.sync(run_solution_and_checker_fn)
248
+ )
238
249
  finder_outcome: finder_parser.FinderOutcome = runner.transform(parsed_finder)
239
250
 
240
251
  internal_error_results = [
rbx/box/tasks.py ADDED
@@ -0,0 +1,258 @@
1
+ import pathlib
2
+ from typing import Optional
3
+
4
+ from rbx.box import checkers, package
5
+ from rbx.box.code import CommunicationItem, run_communication, run_item
6
+ from rbx.box.environment import EnvironmentSandbox, ExecutionConfig, VerificationLevel
7
+ from rbx.box.retries import Retrier
8
+ from rbx.box.schema import Limits, Solution, Testcase
9
+ from rbx.grading.judge.sandbox import SandboxBase
10
+ from rbx.grading.steps import (
11
+ DigestOrDest,
12
+ DigestOrSource,
13
+ Evaluation,
14
+ GradingFileInput,
15
+ GradingFileOutput,
16
+ TestcaseIO,
17
+ TestcaseLog,
18
+ )
19
+ from rbx.utils import model_to_yaml
20
+
21
+
22
+ def get_limits_for_language(
23
+ lang: Optional[str],
24
+ verification: VerificationLevel,
25
+ timelimit_override: Optional[int],
26
+ use_timelimit: bool = True,
27
+ ) -> Limits:
28
+ pkg = package.find_problem_package_or_die()
29
+ time = timelimit_override or pkg.timelimit_for_language(lang)
30
+ isDoubleTL = verification.value >= VerificationLevel.FULL.value
31
+ memory = pkg.memorylimit_for_language(lang)
32
+ return Limits(
33
+ time=time if use_timelimit else None,
34
+ memory=memory,
35
+ output=pkg.outputLimit,
36
+ isDoubleTL=isDoubleTL,
37
+ )
38
+
39
+
40
+ async def run_solution_on_testcase(
41
+ solution: Solution,
42
+ compiled_digest: str,
43
+ checker_digest: Optional[str],
44
+ testcase: Testcase,
45
+ output_dir: pathlib.Path,
46
+ interactor_digest: Optional[str] = None,
47
+ testcase_index: int = 0,
48
+ verification: VerificationLevel = VerificationLevel.NONE,
49
+ timelimit_override: Optional[int] = None,
50
+ use_retries: bool = True,
51
+ use_timelimit: bool = True,
52
+ ) -> Evaluation:
53
+ if interactor_digest is not None:
54
+ return await _run_communication_solution_on_testcase(
55
+ solution,
56
+ compiled_digest,
57
+ interactor_digest,
58
+ checker_digest,
59
+ testcase,
60
+ output_dir,
61
+ testcase_index=testcase_index,
62
+ verification=verification,
63
+ timelimit_override=timelimit_override,
64
+ use_retries=use_retries,
65
+ use_timelimit=use_timelimit,
66
+ )
67
+
68
+ async def run_fn(retry_index: int) -> Evaluation:
69
+ actual_sandbox = package.get_singleton_sandbox()
70
+
71
+ limits = get_limits_for_language(
72
+ solution.language,
73
+ verification,
74
+ timelimit_override,
75
+ use_timelimit=use_timelimit,
76
+ )
77
+ extra_config = _get_execution_config(limits, actual_sandbox)
78
+
79
+ output_path = output_dir / testcase.inputPath.with_suffix('.out').name
80
+ error_path = output_path.with_suffix('.err')
81
+ log_path = output_path.with_suffix('.log')
82
+ output_path.parent.mkdir(parents=True, exist_ok=True)
83
+
84
+ run_log = await run_item(
85
+ solution,
86
+ DigestOrSource.create(compiled_digest),
87
+ stdin=DigestOrSource.create(testcase.inputPath),
88
+ stdout=DigestOrDest.create(output_path),
89
+ stderr=DigestOrDest.create(error_path),
90
+ extra_config=extra_config,
91
+ retry_index=retry_index,
92
+ )
93
+
94
+ if checker_digest is not None:
95
+ checker_result = await checkers.check(
96
+ checker_digest,
97
+ run_log,
98
+ testcase,
99
+ program_output=output_path,
100
+ )
101
+ else:
102
+ checker_result = checkers.check_with_no_output(run_log)
103
+
104
+ eval = Evaluation(
105
+ result=checker_result,
106
+ testcase=TestcaseIO(
107
+ index=testcase_index,
108
+ input=testcase.inputPath,
109
+ output=testcase.outputPath,
110
+ ),
111
+ log=TestcaseLog(
112
+ **(run_log.model_dump() if run_log is not None else {}),
113
+ stdout_absolute_path=output_path.absolute(),
114
+ stderr_absolute_path=error_path.absolute(),
115
+ log_absolute_path=log_path.absolute(),
116
+ ),
117
+ )
118
+
119
+ log_path.write_text(model_to_yaml(eval))
120
+ return eval
121
+
122
+ if not use_retries:
123
+ return await run_fn(0)
124
+
125
+ retrier = Retrier()
126
+ return await retrier.repeat(run_fn)
127
+
128
+
129
+ def _get_execution_config(
130
+ limits: Limits,
131
+ actual_sandbox: SandboxBase,
132
+ ) -> ExecutionConfig:
133
+ sandbox = EnvironmentSandbox()
134
+ sandbox.timeLimit = limits.time
135
+ if limits.isDoubleTL and sandbox.timeLimit is not None:
136
+ # Double TL.
137
+ sandbox.timeLimit = sandbox.timeLimit * 2
138
+ sandbox.wallTimeLimit = sandbox.timeLimit
139
+ if sandbox.timeLimit is not None and actual_sandbox.use_soft_timeout():
140
+ sandbox.wallTimeLimit = sandbox.timeLimit * 2
141
+ sandbox.memoryLimit = limits.memory
142
+ sandbox.fileSizeLimit = limits.output
143
+ return ExecutionConfig(sandbox=sandbox)
144
+
145
+
146
+ async def _run_communication_solution_on_testcase(
147
+ solution: Solution,
148
+ compiled_digest: str,
149
+ interactor_digest: str,
150
+ checker_digest: Optional[str],
151
+ testcase: Testcase,
152
+ output_dir: pathlib.Path,
153
+ testcase_index: int = 0,
154
+ verification: VerificationLevel = VerificationLevel.NONE,
155
+ timelimit_override: Optional[int] = None,
156
+ use_retries: bool = True,
157
+ use_timelimit: bool = True,
158
+ ) -> Evaluation:
159
+ async def run_fn(retry_index: int) -> Evaluation:
160
+ actual_sandbox = package.get_singleton_sandbox()
161
+ interactor_sandbox = package.get_singleton_interactor_sandbox()
162
+
163
+ limits = get_limits_for_language(
164
+ solution.language,
165
+ verification,
166
+ timelimit_override,
167
+ use_timelimit=use_timelimit,
168
+ )
169
+
170
+ extra_config = _get_execution_config(limits, actual_sandbox)
171
+ interactor_extra_config = _get_execution_config(limits, interactor_sandbox)
172
+ if (
173
+ interactor_extra_config.sandbox is not None
174
+ and interactor_extra_config.sandbox.wallTimeLimit is not None
175
+ and extra_config.sandbox is not None
176
+ and extra_config.sandbox.wallTimeLimit is not None
177
+ ):
178
+ interactor_extra_config.sandbox.wallTimeLimit += (
179
+ extra_config.sandbox.wallTimeLimit
180
+ )
181
+ # TODO: maybe combine wall time limits?
182
+
183
+ output_path = output_dir / testcase.inputPath.with_suffix('.out').name
184
+ error_path = output_path.with_suffix('.err')
185
+ log_path = output_path.with_suffix('.log')
186
+ output_path.parent.mkdir(parents=True, exist_ok=True)
187
+
188
+ interactor_item = CommunicationItem(
189
+ code=package.get_interactor(),
190
+ executable=DigestOrSource.create(interactor_digest),
191
+ stderr=DigestOrDest.create(error_path),
192
+ extra_config=interactor_extra_config,
193
+ extra_args='interactor.in interactor.out',
194
+ inputs=[
195
+ GradingFileInput(
196
+ src=testcase.inputPath,
197
+ dest=pathlib.PosixPath('interactor.in'),
198
+ )
199
+ ],
200
+ outputs=[
201
+ GradingFileOutput(
202
+ src=pathlib.PosixPath('interactor.out'),
203
+ dest=output_path,
204
+ touch=True,
205
+ )
206
+ ],
207
+ )
208
+ solution_item = CommunicationItem(
209
+ code=solution,
210
+ executable=DigestOrSource.create(compiled_digest),
211
+ extra_config=extra_config,
212
+ )
213
+
214
+ interactor_run_log, run_log = await run_communication(
215
+ interactor=interactor_item,
216
+ solution=solution_item,
217
+ retry_index=retry_index,
218
+ )
219
+
220
+ checker_result = await checkers.check_communication(
221
+ checker_digest,
222
+ run_log,
223
+ interactor_run_log,
224
+ error_path,
225
+ testcase,
226
+ output_path,
227
+ )
228
+
229
+ eval = Evaluation(
230
+ result=checker_result,
231
+ testcase=TestcaseIO(
232
+ index=testcase_index,
233
+ input=testcase.inputPath,
234
+ output=testcase.outputPath,
235
+ ),
236
+ log=TestcaseLog(
237
+ **(run_log.model_dump() if run_log is not None else {}),
238
+ stdout_absolute_path=output_path.absolute(),
239
+ stderr_absolute_path=error_path.absolute(),
240
+ log_absolute_path=log_path.absolute(),
241
+ ),
242
+ )
243
+
244
+ log_path.write_text(model_to_yaml(eval))
245
+
246
+ if interactor_run_log is not None:
247
+ interactor_log_path = output_path.with_suffix('.int.log')
248
+ interactor_log_path.write_text(model_to_yaml(interactor_run_log))
249
+ if run_log is not None:
250
+ solution_log_path = output_path.with_suffix('.sol.log')
251
+ solution_log_path.write_text(model_to_yaml(run_log))
252
+ return eval
253
+
254
+ if not use_retries:
255
+ return await run_fn(0)
256
+
257
+ retrier = Retrier()
258
+ return await retrier.repeat(run_fn)