rbx.cp 0.5.61__py3-none-any.whl → 0.5.63__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rbx/box/cd.py +14 -0
- rbx/box/cli.py +6 -0
- rbx/box/code.py +34 -5
- rbx/box/contest/main.py +6 -2
- rbx/box/git_utils.py +28 -0
- rbx/box/package.py +23 -0
- rbx/box/packaging/boca/packager.py +3 -18
- rbx/box/packaging/moj/packager.py +1 -1
- rbx/box/packaging/polygon/upload.py +7 -5
- rbx/box/presets/__init__.py +80 -6
- rbx/box/presets/fetch.py +18 -1
- rbx/box/retries.py +2 -0
- rbx/box/solutions.py +242 -114
- rbx/box/solutions_test.py +3 -1
- rbx/box/tasks.py +6 -1
- rbx/box/testcase_utils.py +3 -0
- rbx/box/ui/css/app.tcss +14 -2
- rbx/box/ui/main.py +3 -5
- rbx/box/ui/screens/error.py +19 -0
- rbx/box/ui/screens/run.py +4 -12
- rbx/box/ui/screens/run_explorer.py +77 -1
- rbx/box/ui/screens/run_test_explorer.py +166 -0
- rbx/box/ui/screens/selector.py +26 -0
- rbx/box/ui/screens/test_explorer.py +33 -5
- rbx/box/ui/utils/__init__.py +0 -0
- rbx/box/ui/utils/run_ui.py +95 -0
- rbx/box/ui/widgets/__init__.py +0 -0
- rbx/box/ui/widgets/file_log.py +3 -1
- rbx/box/ui/widgets/interaction_box.py +59 -0
- rbx/box/ui/widgets/test_output_box.py +113 -0
- rbx/box/ui/widgets/two_sided_test_output_box.py +60 -0
- rbx/grading/steps.py +1 -0
- rbx/resources/packagers/boca/compile/java +55 -59
- rbx/resources/packagers/boca/interactive/java +2 -2
- rbx/resources/packagers/boca/run/java +2 -2
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.63.dist-info}/METADATA +1 -1
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.63.dist-info}/RECORD +40 -30
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.63.dist-info}/LICENSE +0 -0
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.63.dist-info}/WHEEL +0 -0
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.63.dist-info}/entry_points.txt +0 -0
rbx/box/solutions.py
CHANGED
@@ -16,7 +16,7 @@ import typer
|
|
16
16
|
from pydantic import BaseModel
|
17
17
|
|
18
18
|
from rbx import console, utils
|
19
|
-
from rbx.box import checkers, environment, package
|
19
|
+
from rbx.box import checkers, environment, package, state
|
20
20
|
from rbx.box.code import (
|
21
21
|
SanitizationLevel,
|
22
22
|
compile_item,
|
@@ -64,9 +64,8 @@ StructuredEvaluation = Dict[str, Dict[str, List[Optional[Deferred[Evaluation]]]]
|
|
64
64
|
|
65
65
|
@dataclasses.dataclass(frozen=True)
|
66
66
|
class EvaluationItem:
|
67
|
-
|
68
|
-
|
69
|
-
testcase_index: int
|
67
|
+
solution: Solution
|
68
|
+
testcase_entry: TestcaseEntry
|
70
69
|
eval: Deferred[Evaluation]
|
71
70
|
|
72
71
|
|
@@ -75,11 +74,20 @@ class GroupSkeleton(BaseModel):
|
|
75
74
|
testcases: List[Testcase]
|
76
75
|
|
77
76
|
|
77
|
+
class SolutionSkeleton(Solution):
|
78
|
+
runs_dir: pathlib.Path
|
79
|
+
|
80
|
+
def get_entry_prefix(self, entry: TestcaseEntry) -> pathlib.Path:
|
81
|
+
return self.runs_dir / entry.group / f'{entry.index:03d}'
|
82
|
+
|
83
|
+
|
78
84
|
class SolutionReportSkeleton(BaseModel):
|
79
|
-
solutions: List[
|
85
|
+
solutions: List[SolutionSkeleton]
|
80
86
|
entries: List[TestcaseEntry]
|
81
87
|
groups: List[GroupSkeleton]
|
82
88
|
limits: Dict[str, Limits]
|
89
|
+
verification: VerificationLevel
|
90
|
+
capture_pipes: bool = False
|
83
91
|
|
84
92
|
def find_group_skeleton(self, group_name: str) -> Optional[GroupSkeleton]:
|
85
93
|
groups = [group for group in self.groups if group.name == group_name]
|
@@ -87,6 +95,21 @@ class SolutionReportSkeleton(BaseModel):
|
|
87
95
|
return None
|
88
96
|
return groups[0]
|
89
97
|
|
98
|
+
def find_solution_skeleton(self, solution: Solution) -> Optional[SolutionSkeleton]:
|
99
|
+
for sol in self.solutions:
|
100
|
+
if sol.path == solution.path:
|
101
|
+
return sol
|
102
|
+
return None
|
103
|
+
|
104
|
+
def find_solution_skeleton_index(self, solution: Solution) -> Optional[int]:
|
105
|
+
for i, sol in enumerate(self.solutions):
|
106
|
+
if sol.path == solution.path:
|
107
|
+
return i
|
108
|
+
return None
|
109
|
+
|
110
|
+
def get_solution_path_set(self) -> Set[str]:
|
111
|
+
return set(str(sol.path) for sol in self.solutions)
|
112
|
+
|
90
113
|
def empty_structured_evaluation(self) -> StructuredEvaluation:
|
91
114
|
res: StructuredEvaluation = {}
|
92
115
|
for solution in self.solutions:
|
@@ -164,21 +187,20 @@ def _run_solution(
|
|
164
187
|
solution: Solution,
|
165
188
|
compiled_digest: str,
|
166
189
|
checker_digest: Optional[str],
|
167
|
-
|
190
|
+
runs_dir: pathlib.Path,
|
168
191
|
group_name: str,
|
169
192
|
interactor_digest: Optional[str] = None,
|
170
193
|
progress: Optional[StatusProgress] = None,
|
171
194
|
verification: VerificationLevel = VerificationLevel.NONE,
|
172
195
|
timelimit_override: Optional[int] = None,
|
173
196
|
) -> List[Deferred[Evaluation]]:
|
174
|
-
runs_dir = package.get_problem_runs_dir()
|
175
|
-
|
176
197
|
group = package.get_testgroup(group_name)
|
177
198
|
testcases = find_built_testcases(group)
|
178
199
|
res: List[Deferred[Evaluation]] = []
|
179
200
|
for i, testcase in enumerate(testcases):
|
180
201
|
assert testcase.outputPath is not None
|
181
|
-
output_path = runs_dir /
|
202
|
+
output_path = runs_dir / group.name
|
203
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
182
204
|
|
183
205
|
if progress:
|
184
206
|
progress.update(
|
@@ -204,6 +226,7 @@ def _run_solution(
|
|
204
226
|
|
205
227
|
|
206
228
|
async def convert_list_of_solution_evaluations_to_dict(
|
229
|
+
skeleton: SolutionReportSkeleton,
|
207
230
|
items: Iterable[EvaluationItem],
|
208
231
|
) -> List[Dict[str, List[Evaluation]]]:
|
209
232
|
pkg = package.find_problem_package_or_die()
|
@@ -212,16 +235,18 @@ async def convert_list_of_solution_evaluations_to_dict(
|
|
212
235
|
]
|
213
236
|
|
214
237
|
for item in items:
|
215
|
-
|
238
|
+
sol_idx = skeleton.find_solution_skeleton_index(item.solution)
|
239
|
+
if sol_idx is not None:
|
240
|
+
to_append = await item.eval()
|
241
|
+
res[sol_idx][item.testcase_entry.group].append(to_append)
|
216
242
|
|
217
243
|
return res
|
218
244
|
|
219
245
|
|
220
|
-
def
|
246
|
+
def _get_solutions_for_skeleton(
|
221
247
|
tracked_solutions: Optional[Set[str]] = None,
|
222
248
|
verification: VerificationLevel = VerificationLevel.NONE,
|
223
|
-
|
224
|
-
) -> SolutionReportSkeleton:
|
249
|
+
) -> List[Solution]:
|
225
250
|
pkg = package.find_problem_package_or_die()
|
226
251
|
solutions = [
|
227
252
|
sol
|
@@ -234,6 +259,16 @@ def _get_report_skeleton(
|
|
234
259
|
for solution in solutions
|
235
260
|
if str(solution.path) in tracked_solutions
|
236
261
|
]
|
262
|
+
return solutions
|
263
|
+
|
264
|
+
|
265
|
+
def _get_report_skeleton(
|
266
|
+
tracked_solutions: Optional[Set[str]] = None,
|
267
|
+
verification: VerificationLevel = VerificationLevel.NONE,
|
268
|
+
timelimit_override: Optional[int] = None,
|
269
|
+
) -> SolutionReportSkeleton:
|
270
|
+
pkg = package.find_problem_package_or_die()
|
271
|
+
solutions = _get_solutions_for_skeleton(tracked_solutions, verification)
|
237
272
|
|
238
273
|
langs = set(find_language_name(solution) for solution in solutions)
|
239
274
|
limits = {
|
@@ -251,17 +286,36 @@ def _get_report_skeleton(
|
|
251
286
|
for group in groups
|
252
287
|
for i in range(len(group.testcases))
|
253
288
|
]
|
254
|
-
|
255
|
-
|
289
|
+
|
290
|
+
# Prepare directory.
|
291
|
+
runs_dir = package.get_problem_runs_dir()
|
292
|
+
shutil.rmtree(str(runs_dir), ignore_errors=True)
|
293
|
+
runs_dir.mkdir(parents=True, exist_ok=True)
|
294
|
+
|
295
|
+
skeleton = SolutionReportSkeleton(
|
296
|
+
solutions=[
|
297
|
+
SolutionSkeleton(
|
298
|
+
**solution.model_dump(),
|
299
|
+
runs_dir=package.get_problem_runs_dir() / f'{i}',
|
300
|
+
)
|
301
|
+
for i, solution in enumerate(solutions)
|
302
|
+
],
|
256
303
|
groups=groups,
|
257
304
|
limits=limits,
|
258
305
|
entries=entries,
|
306
|
+
verification=verification,
|
307
|
+
capture_pipes=state.STATE.debug_logs,
|
259
308
|
)
|
260
309
|
|
310
|
+
skeleton_file = runs_dir / 'skeleton.yml'
|
311
|
+
skeleton_file.write_text(utils.model_to_yaml(skeleton))
|
312
|
+
|
313
|
+
return skeleton
|
314
|
+
|
261
315
|
|
262
316
|
def _produce_solution_items(
|
317
|
+
skeleton: SolutionReportSkeleton,
|
263
318
|
progress: Optional[StatusProgress] = None,
|
264
|
-
tracked_solutions: Optional[Set[str]] = None,
|
265
319
|
verification: VerificationLevel = VerificationLevel.NONE,
|
266
320
|
check: bool = True,
|
267
321
|
timelimit_override: Optional[int] = None,
|
@@ -279,26 +333,13 @@ def _produce_solution_items(
|
|
279
333
|
interactor_digest = None
|
280
334
|
|
281
335
|
compiled_solutions = compile_solutions(
|
282
|
-
progress=progress,
|
283
|
-
|
284
|
-
|
285
|
-
# Clear run directory and rely on cache to
|
286
|
-
# repopulate it.
|
287
|
-
runs_dir = package.get_problem_runs_dir()
|
288
|
-
shutil.rmtree(str(runs_dir), ignore_errors=True)
|
289
|
-
runs_dir.mkdir(parents=True, exist_ok=True)
|
290
|
-
solutions = list(
|
291
|
-
(i, sol)
|
292
|
-
for i, sol in enumerate(pkg.solutions)
|
293
|
-
if verification.value >= VerificationLevel.ALL_SOLUTIONS.value or is_fast(sol)
|
336
|
+
progress=progress,
|
337
|
+
tracked_solutions=skeleton.get_solution_path_set(),
|
338
|
+
sanitized=sanitized,
|
294
339
|
)
|
295
|
-
if tracked_solutions is not None:
|
296
|
-
solutions = [
|
297
|
-
(i, sol) for i, sol in solutions if str(sol.path) in tracked_solutions
|
298
|
-
]
|
299
340
|
|
300
341
|
def yield_items(
|
301
|
-
|
342
|
+
solution: SolutionSkeleton, group_name: str
|
302
343
|
) -> List[EvaluationItem]:
|
303
344
|
res: List[EvaluationItem] = []
|
304
345
|
for i, eval in enumerate(
|
@@ -306,7 +347,7 @@ def _produce_solution_items(
|
|
306
347
|
solution,
|
307
348
|
compiled_solutions[solution.path],
|
308
349
|
checker_digest,
|
309
|
-
|
350
|
+
solution.runs_dir,
|
310
351
|
group_name,
|
311
352
|
interactor_digest=interactor_digest,
|
312
353
|
progress=progress,
|
@@ -316,9 +357,8 @@ def _produce_solution_items(
|
|
316
357
|
):
|
317
358
|
res.append(
|
318
359
|
EvaluationItem(
|
319
|
-
|
320
|
-
|
321
|
-
testcase_index=i,
|
360
|
+
solution=solution,
|
361
|
+
testcase_entry=TestcaseEntry(group=group_name, index=i),
|
322
362
|
eval=eval,
|
323
363
|
)
|
324
364
|
)
|
@@ -328,9 +368,9 @@ def _produce_solution_items(
|
|
328
368
|
res: List[EvaluationItem] = []
|
329
369
|
|
330
370
|
groups = pkg.testcases
|
331
|
-
for
|
371
|
+
for solution in skeleton.solutions:
|
332
372
|
for group in groups:
|
333
|
-
res.extend(yield_items(
|
373
|
+
res.extend(yield_items(solution, group.name))
|
334
374
|
|
335
375
|
return res
|
336
376
|
|
@@ -356,24 +396,22 @@ def run_solutions(
|
|
356
396
|
timelimit_override: Optional[int] = None,
|
357
397
|
sanitized: bool = False,
|
358
398
|
) -> RunSolutionResult:
|
399
|
+
skeleton = _get_report_skeleton(
|
400
|
+
tracked_solutions,
|
401
|
+
verification=verification,
|
402
|
+
timelimit_override=timelimit_override,
|
403
|
+
)
|
359
404
|
result = RunSolutionResult(
|
360
|
-
skeleton=
|
361
|
-
tracked_solutions,
|
362
|
-
verification=verification,
|
363
|
-
timelimit_override=timelimit_override,
|
364
|
-
),
|
405
|
+
skeleton=skeleton,
|
365
406
|
items=_produce_solution_items(
|
407
|
+
skeleton=skeleton,
|
366
408
|
progress=progress,
|
367
|
-
tracked_solutions=tracked_solutions,
|
368
409
|
verification=verification,
|
369
410
|
check=check,
|
370
411
|
timelimit_override=timelimit_override,
|
371
412
|
sanitized=sanitized,
|
372
413
|
),
|
373
414
|
)
|
374
|
-
skeleton_file = package.get_problem_runs_dir() / 'skeleton.yml'
|
375
|
-
skeleton_file.parent.mkdir(parents=True, exist_ok=True)
|
376
|
-
skeleton_file.write_text(utils.model_to_yaml(result.skeleton))
|
377
415
|
return result
|
378
416
|
|
379
417
|
|
@@ -515,8 +553,8 @@ async def _generate_testcase_interactively(
|
|
515
553
|
|
516
554
|
def _run_interactive_solutions(
|
517
555
|
testcase: Testcase,
|
556
|
+
skeleton: SolutionReportSkeleton,
|
518
557
|
progress: Optional[StatusProgress] = None,
|
519
|
-
tracked_solutions: Optional[Set[str]] = None,
|
520
558
|
verification: VerificationLevel = VerificationLevel.NONE,
|
521
559
|
check: bool = True,
|
522
560
|
sanitized: bool = False,
|
@@ -531,22 +569,16 @@ def _run_interactive_solutions(
|
|
531
569
|
interactor_digest = None
|
532
570
|
|
533
571
|
compiled_solutions = compile_solutions(
|
534
|
-
progress=progress,
|
572
|
+
progress=progress,
|
573
|
+
tracked_solutions=skeleton.get_solution_path_set(),
|
574
|
+
sanitized=sanitized,
|
535
575
|
)
|
536
576
|
|
537
|
-
solutions = list(enumerate(pkg.solutions))
|
538
|
-
if tracked_solutions is not None:
|
539
|
-
solutions = [
|
540
|
-
(i, sol) for i, sol in solutions if str(sol.path) in tracked_solutions
|
541
|
-
]
|
542
|
-
|
543
|
-
irun_dir = package.get_problem_iruns_dir()
|
544
|
-
|
545
577
|
if progress:
|
546
578
|
progress.update('Running solutions...')
|
547
579
|
|
548
|
-
for
|
549
|
-
output_dir =
|
580
|
+
for solution in skeleton.solutions:
|
581
|
+
output_dir = solution.runs_dir
|
550
582
|
|
551
583
|
async def run_fn(solution=solution, output_dir=output_dir):
|
552
584
|
return await run_solution_on_testcase(
|
@@ -561,13 +593,51 @@ def _run_interactive_solutions(
|
|
561
593
|
)
|
562
594
|
|
563
595
|
yield EvaluationItem(
|
564
|
-
|
565
|
-
|
566
|
-
testcase_index=0,
|
596
|
+
solution=solution,
|
597
|
+
testcase_entry=TestcaseEntry(group='irun', index=0),
|
567
598
|
eval=Deferred(run_fn),
|
568
599
|
)
|
569
600
|
|
570
601
|
|
602
|
+
def _get_interactive_skeleton(
|
603
|
+
tracked_solutions: Optional[Set[str]] = None,
|
604
|
+
verification: VerificationLevel = VerificationLevel.NONE,
|
605
|
+
) -> SolutionReportSkeleton:
|
606
|
+
solutions = _get_solutions_for_skeleton(tracked_solutions, verification)
|
607
|
+
|
608
|
+
langs = set(find_language_name(solution) for solution in solutions)
|
609
|
+
limits = {
|
610
|
+
lang: get_limits_for_language(lang, verification, timelimit_override=None)
|
611
|
+
for lang in langs
|
612
|
+
if lang is not None
|
613
|
+
}
|
614
|
+
|
615
|
+
# Ensure path is new.
|
616
|
+
irun_dir = package.get_problem_iruns_dir()
|
617
|
+
shutil.rmtree(str(irun_dir), ignore_errors=True)
|
618
|
+
irun_dir.mkdir(parents=True, exist_ok=True)
|
619
|
+
|
620
|
+
skeleton = SolutionReportSkeleton(
|
621
|
+
solutions=[
|
622
|
+
SolutionSkeleton(
|
623
|
+
**solution.model_dump(),
|
624
|
+
runs_dir=irun_dir / f'{i}',
|
625
|
+
)
|
626
|
+
for i, solution in enumerate(solutions)
|
627
|
+
],
|
628
|
+
groups=[],
|
629
|
+
limits=limits,
|
630
|
+
entries=[],
|
631
|
+
verification=verification,
|
632
|
+
capture_pipes=True,
|
633
|
+
)
|
634
|
+
|
635
|
+
skeleton_file = irun_dir / 'skeleton.yml'
|
636
|
+
skeleton_file.write_text(utils.model_to_yaml(skeleton))
|
637
|
+
|
638
|
+
return skeleton
|
639
|
+
|
640
|
+
|
571
641
|
async def run_and_print_interactive_solutions(
|
572
642
|
progress: Optional[StatusProgress] = None,
|
573
643
|
tracked_solutions: Optional[Set[str]] = None,
|
@@ -579,12 +649,11 @@ async def run_and_print_interactive_solutions(
|
|
579
649
|
print: bool = False,
|
580
650
|
sanitized: bool = False,
|
581
651
|
):
|
582
|
-
# Ensure path is new.
|
583
|
-
irun_dir = package.get_problem_iruns_dir()
|
584
|
-
shutil.rmtree(str(irun_dir), ignore_errors=True)
|
585
|
-
irun_dir.mkdir(parents=True, exist_ok=True)
|
586
|
-
|
587
652
|
pkg = package.find_problem_package_or_die()
|
653
|
+
skeleton = _get_interactive_skeleton(
|
654
|
+
tracked_solutions,
|
655
|
+
verification=verification,
|
656
|
+
)
|
588
657
|
testcase = await _generate_testcase_interactively(
|
589
658
|
progress=progress,
|
590
659
|
generator=generator,
|
@@ -596,15 +665,16 @@ async def run_and_print_interactive_solutions(
|
|
596
665
|
)
|
597
666
|
items = _run_interactive_solutions(
|
598
667
|
testcase,
|
668
|
+
skeleton=skeleton,
|
599
669
|
progress=progress,
|
600
|
-
tracked_solutions=tracked_solutions,
|
601
670
|
verification=verification,
|
602
671
|
check=check,
|
603
672
|
sanitized=sanitized,
|
604
673
|
)
|
605
674
|
|
606
675
|
for item in items:
|
607
|
-
sol =
|
676
|
+
sol = skeleton.find_solution_skeleton(item.solution)
|
677
|
+
assert sol is not None
|
608
678
|
|
609
679
|
if progress:
|
610
680
|
progress.update(f'Running [item]{sol.path}[/item]...')
|
@@ -613,7 +683,7 @@ async def run_and_print_interactive_solutions(
|
|
613
683
|
|
614
684
|
with utils.no_progress(progress):
|
615
685
|
console.console.print(get_testcase_markup_verdict(eval), end=' ')
|
616
|
-
_print_solution_header(sol, console.console
|
686
|
+
_print_solution_header(sol, console.console)
|
617
687
|
_print_solution_outcome(
|
618
688
|
sol, [eval], console.console, verification, subset=True
|
619
689
|
)
|
@@ -703,6 +773,12 @@ def get_outcome_markup_verdict(outcome: Outcome) -> str:
|
|
703
773
|
return res
|
704
774
|
|
705
775
|
|
776
|
+
def get_full_outcome_markup_verdict(outcome: Outcome) -> str:
|
777
|
+
style = get_outcome_style_verdict(outcome)
|
778
|
+
res = f'[{style}]{outcome.name}[/{style}]'
|
779
|
+
return res
|
780
|
+
|
781
|
+
|
706
782
|
def get_testcase_markup_verdict(eval: Evaluation) -> str:
|
707
783
|
# if eval.log.stdout_absolute_path:
|
708
784
|
# output_path = eval.log.stdout_absolute_path.resolve()
|
@@ -711,6 +787,10 @@ def get_testcase_markup_verdict(eval: Evaluation) -> str:
|
|
711
787
|
return get_outcome_markup_verdict(eval.result.outcome)
|
712
788
|
|
713
789
|
|
790
|
+
def get_full_testcase_markup_verdict(eval: Evaluation) -> str:
|
791
|
+
return get_full_outcome_markup_verdict(eval.result.outcome)
|
792
|
+
|
793
|
+
|
714
794
|
def _get_evals_time_in_ms(evals: List[Evaluation]) -> int:
|
715
795
|
if not evals:
|
716
796
|
return 0
|
@@ -766,13 +846,60 @@ def get_worst_outcome(evals: List[Evaluation]) -> Outcome:
|
|
766
846
|
return Outcome.worst_outcome(eval.result.outcome for eval in evals)
|
767
847
|
|
768
848
|
|
769
|
-
|
849
|
+
class SolutionOutcomeReport(BaseModel):
|
850
|
+
solution: Solution
|
851
|
+
evals: List[Evaluation]
|
852
|
+
ok: bool
|
853
|
+
expectedOutcome: Optional[ExpectedOutcome]
|
854
|
+
gotVerdicts: Set[Outcome]
|
855
|
+
runUnderDoubleTl: bool
|
856
|
+
doubleTlVerdicts: Set[Outcome]
|
857
|
+
sanitizerWarnings: bool
|
858
|
+
verification: VerificationLevel
|
859
|
+
|
860
|
+
def get_verdict_markup(self, incomplete: bool = False) -> str:
|
861
|
+
success_str = '[bold green]OK[/bold green]'
|
862
|
+
if not self.ok:
|
863
|
+
success_str = '[bold red]FAILED[/bold red]'
|
864
|
+
if incomplete:
|
865
|
+
success_str = '[bold yellow]INCOMPLETE[/bold yellow]'
|
866
|
+
|
867
|
+
gotVerdicts = self.gotVerdicts if not incomplete else {}
|
868
|
+
|
869
|
+
got_verdict_names = ' '.join(v.name for v in self.gotVerdicts)
|
870
|
+
verdict_str = ''
|
871
|
+
if self.expectedOutcome is not None:
|
872
|
+
verdict_str = f'Expected: {self.expectedOutcome}'
|
873
|
+
if gotVerdicts:
|
874
|
+
verdict_str += f', got: {got_verdict_names}'
|
875
|
+
elif gotVerdicts:
|
876
|
+
verdict_str = f'Got: {got_verdict_names}'
|
877
|
+
return f'{success_str} {verdict_str}'
|
878
|
+
|
879
|
+
def get_verdict_markup_with_warnings(self) -> str:
|
880
|
+
res = self.get_verdict_markup()
|
881
|
+
if self.runUnderDoubleTl:
|
882
|
+
if self.doubleTlVerdicts:
|
883
|
+
res += f'\n[bold yellow]WARNING[/bold yellow] The solution still passed in double TL, but failed with [item]{" ".join(v.name for v in self.doubleTlVerdicts)}[/item].'
|
884
|
+
else:
|
885
|
+
res += '\n[bold yellow]WARNING[/bold yellow] The solution still passed in double TL.'
|
886
|
+
if self.sanitizerWarnings:
|
887
|
+
res += '\n[bold yellow]WARNING[/bold yellow] The solution had sanitizer errors or warnings, marked with [bold yellow]*[/bold yellow]. See their stderr for more details.'
|
888
|
+
return res
|
889
|
+
|
890
|
+
def get_outcome_markup(self) -> str:
|
891
|
+
res = self.get_verdict_markup_with_warnings()
|
892
|
+
res += f'\nTime: {get_capped_evals_formatted_time(self.solution, self.evals, self.verification)}'
|
893
|
+
res += f'\nMemory: {get_evals_formatted_memory(self.evals)}'
|
894
|
+
return res
|
895
|
+
|
896
|
+
|
897
|
+
def get_solution_outcome_report(
|
770
898
|
solution: Solution,
|
771
899
|
evals: List[Evaluation],
|
772
|
-
console: rich.console.Console,
|
773
900
|
verification: VerificationLevel = VerificationLevel.NONE,
|
774
901
|
subset: bool = False,
|
775
|
-
) ->
|
902
|
+
) -> SolutionOutcomeReport:
|
776
903
|
pkg = package.find_problem_package_or_die()
|
777
904
|
|
778
905
|
has_plain_tle = False
|
@@ -805,26 +932,24 @@ def _print_solution_outcome(
|
|
805
932
|
has_failed = unmatched_bad_verdicts or (
|
806
933
|
expected_outcome_is_bad and not matched_bad_verdicts and not subset
|
807
934
|
)
|
808
|
-
if has_failed:
|
809
|
-
console.print('[error]FAILED[/error]', end=' ')
|
810
|
-
else:
|
811
|
-
console.print('[success]OK[/success]', end=' ')
|
812
935
|
|
936
|
+
report_expected_outcome = None
|
937
|
+
report_got_verdicts = set()
|
938
|
+
report_run_under_double_tl = False
|
939
|
+
report_double_tl_verdicts = set()
|
940
|
+
report_sanitizer_warnings = False
|
813
941
|
if has_failed or not subset:
|
814
|
-
|
942
|
+
report_expected_outcome = solution.outcome
|
815
943
|
elif subset:
|
816
|
-
|
817
|
-
console.print(f'Got: {all_verdicts_names}', end='')
|
944
|
+
report_got_verdicts = all_verdicts
|
818
945
|
|
819
946
|
if has_failed or not subset:
|
820
947
|
# Only print verdicts if not subset.
|
821
948
|
if unmatched_bad_verdicts:
|
822
|
-
|
823
|
-
console.print(f', got: {" ".join(unmatched_bad_verdicts_names)}', end='')
|
949
|
+
report_got_verdicts = unmatched_bad_verdicts
|
824
950
|
elif expected_outcome_is_bad and not matched_bad_verdicts and not subset:
|
825
|
-
|
951
|
+
report_got_verdicts = {Outcome.ACCEPTED}
|
826
952
|
|
827
|
-
console.print()
|
828
953
|
evals_time = _get_evals_time_in_ms(evals)
|
829
954
|
expected_outcome_is_tle = solution.outcome.match(
|
830
955
|
Outcome.TIME_LIMIT_EXCEEDED
|
@@ -846,26 +971,37 @@ def _print_solution_outcome(
|
|
846
971
|
}
|
847
972
|
if not other_verdicts:
|
848
973
|
# The solution has no other bad verdicts except for TLEs in double TL.
|
849
|
-
|
850
|
-
'[yellow]WARNING[/yellow] The solution still passed in double TL.'
|
851
|
-
)
|
974
|
+
report_run_under_double_tl = True
|
852
975
|
elif not (bad_verdicts - {Outcome.TIME_LIMIT_EXCEEDED}):
|
853
976
|
# The solution has other bad soft TLE outcomes.
|
854
|
-
|
855
|
-
console.print(
|
856
|
-
f'[yellow]WARNING[/yellow] The solution could still run under double TL, but failed with [item]{other_verdicts_names}[/item].'
|
857
|
-
)
|
977
|
+
report_double_tl_verdicts = other_verdicts
|
858
978
|
|
859
979
|
if has_sanitizer_warnings:
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
980
|
+
report_sanitizer_warnings = True
|
981
|
+
|
982
|
+
return SolutionOutcomeReport(
|
983
|
+
solution=solution,
|
984
|
+
evals=evals,
|
985
|
+
ok=not has_failed,
|
986
|
+
expectedOutcome=report_expected_outcome,
|
987
|
+
gotVerdicts=report_got_verdicts,
|
988
|
+
runUnderDoubleTl=report_run_under_double_tl,
|
989
|
+
doubleTlVerdicts=report_double_tl_verdicts,
|
990
|
+
sanitizerWarnings=report_sanitizer_warnings,
|
991
|
+
verification=verification,
|
866
992
|
)
|
867
|
-
|
868
|
-
|
993
|
+
|
994
|
+
|
995
|
+
def _print_solution_outcome(
|
996
|
+
solution: Solution,
|
997
|
+
evals: List[Evaluation],
|
998
|
+
console: rich.console.Console,
|
999
|
+
verification: VerificationLevel = VerificationLevel.NONE,
|
1000
|
+
subset: bool = False,
|
1001
|
+
) -> bool:
|
1002
|
+
report = get_solution_outcome_report(solution, evals, verification, subset)
|
1003
|
+
console.print(report.get_outcome_markup())
|
1004
|
+
return report.ok
|
869
1005
|
|
870
1006
|
|
871
1007
|
def _consume_and_key_evaluation_items(
|
@@ -876,30 +1012,22 @@ def _consume_and_key_evaluation_items(
|
|
876
1012
|
Consumes EvaluationItems from a run_solutions call and build a view
|
877
1013
|
with them, possibly marking with optional unprocessed items.
|
878
1014
|
"""
|
879
|
-
pkg = package.find_problem_package_or_die()
|
880
1015
|
res = skeleton.empty_structured_evaluation()
|
881
1016
|
|
882
1017
|
for item in items:
|
883
|
-
solution
|
884
|
-
|
1018
|
+
res[str(item.solution.path)][item.testcase_entry.group][
|
1019
|
+
item.testcase_entry.index
|
1020
|
+
] = item.eval
|
885
1021
|
|
886
1022
|
return res
|
887
1023
|
|
888
1024
|
|
889
1025
|
def _print_solution_header(
|
890
|
-
solution:
|
1026
|
+
solution: SolutionSkeleton,
|
1027
|
+
console: rich.console.Console,
|
891
1028
|
):
|
892
|
-
solutions = package.get_solutions()
|
893
|
-
solution_index = [
|
894
|
-
i for i, sol in enumerate(solutions) if sol.path == solution.path
|
895
|
-
][0]
|
896
|
-
solution_testdir = (
|
897
|
-
package.get_problem_iruns_dir() / f'{solution_index}'
|
898
|
-
if is_irun
|
899
|
-
else package.get_problem_runs_dir() / f'{solution_index}'
|
900
|
-
)
|
901
1029
|
console.print(f'[item]{solution.path}[/item]', end=' ')
|
902
|
-
console.print(f'({
|
1030
|
+
console.print(f'({solution.runs_dir})')
|
903
1031
|
|
904
1032
|
|
905
1033
|
@dataclasses.dataclass
|
rbx/box/solutions_test.py
CHANGED
@@ -24,7 +24,9 @@ async def test_solutions(pkg_from_testdata: pathlib.Path):
|
|
24
24
|
await generate_outputs_for_testcases(entries)
|
25
25
|
|
26
26
|
result = run_solutions(verification=VerificationLevel.FULL)
|
27
|
-
res = await convert_list_of_solution_evaluations_to_dict(
|
27
|
+
res = await convert_list_of_solution_evaluations_to_dict(
|
28
|
+
result.skeleton, result.items
|
29
|
+
)
|
28
30
|
|
29
31
|
# First solution should pass all tests.
|
30
32
|
assert all(chk.result.outcome == Outcome.ACCEPTED for chk in res[0]['gen1'])
|
rbx/box/tasks.py
CHANGED
@@ -86,6 +86,7 @@ async def run_solution_on_testcase(
|
|
86
86
|
output_path = output_dir / testcase.inputPath.with_suffix('.out').name
|
87
87
|
error_path = output_path.with_suffix('.err')
|
88
88
|
log_path = output_path.with_suffix('.log')
|
89
|
+
eval_path = output_path.with_suffix('.eval')
|
89
90
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
90
91
|
|
91
92
|
run_log = await run_item(
|
@@ -120,10 +121,12 @@ async def run_solution_on_testcase(
|
|
120
121
|
stdout_absolute_path=output_path.absolute(),
|
121
122
|
stderr_absolute_path=error_path.absolute(),
|
122
123
|
log_absolute_path=log_path.absolute(),
|
124
|
+
eval_absolute_path=eval_path.absolute(),
|
123
125
|
),
|
124
126
|
)
|
125
127
|
|
126
128
|
log_path.write_text(model_to_yaml(eval))
|
129
|
+
eval_path.write_text(model_to_yaml(eval))
|
127
130
|
return eval
|
128
131
|
|
129
132
|
if not use_retries:
|
@@ -198,6 +201,7 @@ async def _run_communication_solution_on_testcase(
|
|
198
201
|
solution_error_path = output_path.with_suffix('.sol.err')
|
199
202
|
interactor_error_path = output_path.with_suffix('.int.err')
|
200
203
|
log_path = output_path.with_suffix('.log')
|
204
|
+
eval_path = output_path.with_suffix('.eval')
|
201
205
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
202
206
|
|
203
207
|
interactor_capture_path = (
|
@@ -268,11 +272,12 @@ async def _run_communication_solution_on_testcase(
|
|
268
272
|
stdout_absolute_path=output_path.absolute(),
|
269
273
|
stderr_absolute_path=solution_error_path.absolute(),
|
270
274
|
log_absolute_path=log_path.absolute(),
|
275
|
+
eval_absolute_path=eval_path.absolute(),
|
271
276
|
),
|
272
277
|
)
|
273
278
|
|
274
279
|
log_path.write_text(model_to_yaml(eval))
|
275
|
-
|
280
|
+
eval_path.write_text(model_to_yaml(eval))
|
276
281
|
interactor_log_path = output_path.with_suffix('.int.log')
|
277
282
|
interactor_log_path.unlink(missing_ok=True)
|
278
283
|
if interactor_run_log is not None:
|
rbx/box/testcase_utils.py
CHANGED
@@ -33,6 +33,9 @@ class TestcaseEntry(BaseModel):
|
|
33
33
|
group, index = spec.split('/')
|
34
34
|
return TestcaseEntry(group=group.strip(), index=int(index))
|
35
35
|
|
36
|
+
def get_prefix_path(self) -> pathlib.Path:
|
37
|
+
return package.get_build_testgroup_path(self.group) / f'{self.index:03d}'
|
38
|
+
|
36
39
|
|
37
40
|
class TestcasePattern(BaseModel):
|
38
41
|
group_prefix: List[str]
|