rbx.cp 0.5.61__py3-none-any.whl → 0.5.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rbx/box/cd.py +14 -0
- rbx/box/cli.py +6 -0
- rbx/box/code.py +34 -5
- rbx/box/contest/main.py +6 -2
- rbx/box/git_utils.py +28 -0
- rbx/box/package.py +23 -0
- rbx/box/packaging/boca/packager.py +3 -18
- rbx/box/packaging/moj/packager.py +1 -1
- rbx/box/packaging/polygon/upload.py +7 -5
- rbx/box/presets/__init__.py +80 -6
- rbx/box/presets/fetch.py +18 -1
- rbx/box/retries.py +2 -0
- rbx/box/solutions.py +238 -113
- rbx/box/solutions_test.py +3 -1
- rbx/box/tasks.py +6 -1
- rbx/box/testcase_utils.py +3 -0
- rbx/box/ui/css/app.tcss +14 -2
- rbx/box/ui/main.py +3 -5
- rbx/box/ui/screens/error.py +19 -0
- rbx/box/ui/screens/run.py +4 -12
- rbx/box/ui/screens/run_explorer.py +77 -1
- rbx/box/ui/screens/run_test_explorer.py +155 -0
- rbx/box/ui/screens/selector.py +26 -0
- rbx/box/ui/screens/test_explorer.py +20 -5
- rbx/box/ui/utils/__init__.py +0 -0
- rbx/box/ui/utils/run_ui.py +95 -0
- rbx/box/ui/widgets/__init__.py +0 -0
- rbx/box/ui/widgets/file_log.py +3 -1
- rbx/box/ui/widgets/test_output_box.py +104 -0
- rbx/box/ui/widgets/two_sided_test_output_box.py +56 -0
- rbx/grading/steps.py +1 -0
- rbx/resources/packagers/boca/compile/java +55 -59
- rbx/resources/packagers/boca/interactive/java +2 -2
- rbx/resources/packagers/boca/run/java +2 -2
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.62.dist-info}/METADATA +1 -1
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.62.dist-info}/RECORD +39 -30
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.62.dist-info}/LICENSE +0 -0
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.62.dist-info}/WHEEL +0 -0
- {rbx_cp-0.5.61.dist-info → rbx_cp-0.5.62.dist-info}/entry_points.txt +0 -0
rbx/box/solutions.py
CHANGED
@@ -64,9 +64,8 @@ StructuredEvaluation = Dict[str, Dict[str, List[Optional[Deferred[Evaluation]]]]
|
|
64
64
|
|
65
65
|
@dataclasses.dataclass(frozen=True)
|
66
66
|
class EvaluationItem:
|
67
|
-
|
68
|
-
|
69
|
-
testcase_index: int
|
67
|
+
solution: Solution
|
68
|
+
testcase_entry: TestcaseEntry
|
70
69
|
eval: Deferred[Evaluation]
|
71
70
|
|
72
71
|
|
@@ -75,11 +74,19 @@ class GroupSkeleton(BaseModel):
|
|
75
74
|
testcases: List[Testcase]
|
76
75
|
|
77
76
|
|
77
|
+
class SolutionSkeleton(Solution):
|
78
|
+
runs_dir: pathlib.Path
|
79
|
+
|
80
|
+
def get_entry_prefix(self, entry: TestcaseEntry) -> pathlib.Path:
|
81
|
+
return self.runs_dir / entry.group / f'{entry.index:03d}'
|
82
|
+
|
83
|
+
|
78
84
|
class SolutionReportSkeleton(BaseModel):
|
79
|
-
solutions: List[
|
85
|
+
solutions: List[SolutionSkeleton]
|
80
86
|
entries: List[TestcaseEntry]
|
81
87
|
groups: List[GroupSkeleton]
|
82
88
|
limits: Dict[str, Limits]
|
89
|
+
verification: VerificationLevel
|
83
90
|
|
84
91
|
def find_group_skeleton(self, group_name: str) -> Optional[GroupSkeleton]:
|
85
92
|
groups = [group for group in self.groups if group.name == group_name]
|
@@ -87,6 +94,21 @@ class SolutionReportSkeleton(BaseModel):
|
|
87
94
|
return None
|
88
95
|
return groups[0]
|
89
96
|
|
97
|
+
def find_solution_skeleton(self, solution: Solution) -> Optional[SolutionSkeleton]:
|
98
|
+
for sol in self.solutions:
|
99
|
+
if sol.path == solution.path:
|
100
|
+
return sol
|
101
|
+
return None
|
102
|
+
|
103
|
+
def find_solution_skeleton_index(self, solution: Solution) -> Optional[int]:
|
104
|
+
for i, sol in enumerate(self.solutions):
|
105
|
+
if sol.path == solution.path:
|
106
|
+
return i
|
107
|
+
return None
|
108
|
+
|
109
|
+
def get_solution_path_set(self) -> Set[str]:
|
110
|
+
return set(str(sol.path) for sol in self.solutions)
|
111
|
+
|
90
112
|
def empty_structured_evaluation(self) -> StructuredEvaluation:
|
91
113
|
res: StructuredEvaluation = {}
|
92
114
|
for solution in self.solutions:
|
@@ -164,21 +186,20 @@ def _run_solution(
|
|
164
186
|
solution: Solution,
|
165
187
|
compiled_digest: str,
|
166
188
|
checker_digest: Optional[str],
|
167
|
-
|
189
|
+
runs_dir: pathlib.Path,
|
168
190
|
group_name: str,
|
169
191
|
interactor_digest: Optional[str] = None,
|
170
192
|
progress: Optional[StatusProgress] = None,
|
171
193
|
verification: VerificationLevel = VerificationLevel.NONE,
|
172
194
|
timelimit_override: Optional[int] = None,
|
173
195
|
) -> List[Deferred[Evaluation]]:
|
174
|
-
runs_dir = package.get_problem_runs_dir()
|
175
|
-
|
176
196
|
group = package.get_testgroup(group_name)
|
177
197
|
testcases = find_built_testcases(group)
|
178
198
|
res: List[Deferred[Evaluation]] = []
|
179
199
|
for i, testcase in enumerate(testcases):
|
180
200
|
assert testcase.outputPath is not None
|
181
|
-
output_path = runs_dir /
|
201
|
+
output_path = runs_dir / group.name
|
202
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
182
203
|
|
183
204
|
if progress:
|
184
205
|
progress.update(
|
@@ -204,6 +225,7 @@ def _run_solution(
|
|
204
225
|
|
205
226
|
|
206
227
|
async def convert_list_of_solution_evaluations_to_dict(
|
228
|
+
skeleton: SolutionReportSkeleton,
|
207
229
|
items: Iterable[EvaluationItem],
|
208
230
|
) -> List[Dict[str, List[Evaluation]]]:
|
209
231
|
pkg = package.find_problem_package_or_die()
|
@@ -212,16 +234,18 @@ async def convert_list_of_solution_evaluations_to_dict(
|
|
212
234
|
]
|
213
235
|
|
214
236
|
for item in items:
|
215
|
-
|
237
|
+
sol_idx = skeleton.find_solution_skeleton_index(item.solution)
|
238
|
+
if sol_idx is not None:
|
239
|
+
to_append = await item.eval()
|
240
|
+
res[sol_idx][item.testcase_entry.group].append(to_append)
|
216
241
|
|
217
242
|
return res
|
218
243
|
|
219
244
|
|
220
|
-
def
|
245
|
+
def _get_solutions_for_skeleton(
|
221
246
|
tracked_solutions: Optional[Set[str]] = None,
|
222
247
|
verification: VerificationLevel = VerificationLevel.NONE,
|
223
|
-
|
224
|
-
) -> SolutionReportSkeleton:
|
248
|
+
) -> List[Solution]:
|
225
249
|
pkg = package.find_problem_package_or_die()
|
226
250
|
solutions = [
|
227
251
|
sol
|
@@ -234,6 +258,16 @@ def _get_report_skeleton(
|
|
234
258
|
for solution in solutions
|
235
259
|
if str(solution.path) in tracked_solutions
|
236
260
|
]
|
261
|
+
return solutions
|
262
|
+
|
263
|
+
|
264
|
+
def _get_report_skeleton(
|
265
|
+
tracked_solutions: Optional[Set[str]] = None,
|
266
|
+
verification: VerificationLevel = VerificationLevel.NONE,
|
267
|
+
timelimit_override: Optional[int] = None,
|
268
|
+
) -> SolutionReportSkeleton:
|
269
|
+
pkg = package.find_problem_package_or_die()
|
270
|
+
solutions = _get_solutions_for_skeleton(tracked_solutions, verification)
|
237
271
|
|
238
272
|
langs = set(find_language_name(solution) for solution in solutions)
|
239
273
|
limits = {
|
@@ -251,17 +285,35 @@ def _get_report_skeleton(
|
|
251
285
|
for group in groups
|
252
286
|
for i in range(len(group.testcases))
|
253
287
|
]
|
254
|
-
|
255
|
-
|
288
|
+
|
289
|
+
# Prepare directory.
|
290
|
+
runs_dir = package.get_problem_runs_dir()
|
291
|
+
shutil.rmtree(str(runs_dir), ignore_errors=True)
|
292
|
+
runs_dir.mkdir(parents=True, exist_ok=True)
|
293
|
+
|
294
|
+
skeleton = SolutionReportSkeleton(
|
295
|
+
solutions=[
|
296
|
+
SolutionSkeleton(
|
297
|
+
**solution.model_dump(),
|
298
|
+
runs_dir=package.get_problem_runs_dir() / f'{i}',
|
299
|
+
)
|
300
|
+
for i, solution in enumerate(solutions)
|
301
|
+
],
|
256
302
|
groups=groups,
|
257
303
|
limits=limits,
|
258
304
|
entries=entries,
|
305
|
+
verification=verification,
|
259
306
|
)
|
260
307
|
|
308
|
+
skeleton_file = runs_dir / 'skeleton.yml'
|
309
|
+
skeleton_file.write_text(utils.model_to_yaml(skeleton))
|
310
|
+
|
311
|
+
return skeleton
|
312
|
+
|
261
313
|
|
262
314
|
def _produce_solution_items(
|
315
|
+
skeleton: SolutionReportSkeleton,
|
263
316
|
progress: Optional[StatusProgress] = None,
|
264
|
-
tracked_solutions: Optional[Set[str]] = None,
|
265
317
|
verification: VerificationLevel = VerificationLevel.NONE,
|
266
318
|
check: bool = True,
|
267
319
|
timelimit_override: Optional[int] = None,
|
@@ -279,26 +331,13 @@ def _produce_solution_items(
|
|
279
331
|
interactor_digest = None
|
280
332
|
|
281
333
|
compiled_solutions = compile_solutions(
|
282
|
-
progress=progress,
|
283
|
-
|
284
|
-
|
285
|
-
# Clear run directory and rely on cache to
|
286
|
-
# repopulate it.
|
287
|
-
runs_dir = package.get_problem_runs_dir()
|
288
|
-
shutil.rmtree(str(runs_dir), ignore_errors=True)
|
289
|
-
runs_dir.mkdir(parents=True, exist_ok=True)
|
290
|
-
solutions = list(
|
291
|
-
(i, sol)
|
292
|
-
for i, sol in enumerate(pkg.solutions)
|
293
|
-
if verification.value >= VerificationLevel.ALL_SOLUTIONS.value or is_fast(sol)
|
334
|
+
progress=progress,
|
335
|
+
tracked_solutions=skeleton.get_solution_path_set(),
|
336
|
+
sanitized=sanitized,
|
294
337
|
)
|
295
|
-
if tracked_solutions is not None:
|
296
|
-
solutions = [
|
297
|
-
(i, sol) for i, sol in solutions if str(sol.path) in tracked_solutions
|
298
|
-
]
|
299
338
|
|
300
339
|
def yield_items(
|
301
|
-
|
340
|
+
solution: SolutionSkeleton, group_name: str
|
302
341
|
) -> List[EvaluationItem]:
|
303
342
|
res: List[EvaluationItem] = []
|
304
343
|
for i, eval in enumerate(
|
@@ -306,7 +345,7 @@ def _produce_solution_items(
|
|
306
345
|
solution,
|
307
346
|
compiled_solutions[solution.path],
|
308
347
|
checker_digest,
|
309
|
-
|
348
|
+
solution.runs_dir,
|
310
349
|
group_name,
|
311
350
|
interactor_digest=interactor_digest,
|
312
351
|
progress=progress,
|
@@ -316,9 +355,8 @@ def _produce_solution_items(
|
|
316
355
|
):
|
317
356
|
res.append(
|
318
357
|
EvaluationItem(
|
319
|
-
|
320
|
-
|
321
|
-
testcase_index=i,
|
358
|
+
solution=solution,
|
359
|
+
testcase_entry=TestcaseEntry(group=group_name, index=i),
|
322
360
|
eval=eval,
|
323
361
|
)
|
324
362
|
)
|
@@ -328,9 +366,9 @@ def _produce_solution_items(
|
|
328
366
|
res: List[EvaluationItem] = []
|
329
367
|
|
330
368
|
groups = pkg.testcases
|
331
|
-
for
|
369
|
+
for solution in skeleton.solutions:
|
332
370
|
for group in groups:
|
333
|
-
res.extend(yield_items(
|
371
|
+
res.extend(yield_items(solution, group.name))
|
334
372
|
|
335
373
|
return res
|
336
374
|
|
@@ -356,24 +394,22 @@ def run_solutions(
|
|
356
394
|
timelimit_override: Optional[int] = None,
|
357
395
|
sanitized: bool = False,
|
358
396
|
) -> RunSolutionResult:
|
397
|
+
skeleton = _get_report_skeleton(
|
398
|
+
tracked_solutions,
|
399
|
+
verification=verification,
|
400
|
+
timelimit_override=timelimit_override,
|
401
|
+
)
|
359
402
|
result = RunSolutionResult(
|
360
|
-
skeleton=
|
361
|
-
tracked_solutions,
|
362
|
-
verification=verification,
|
363
|
-
timelimit_override=timelimit_override,
|
364
|
-
),
|
403
|
+
skeleton=skeleton,
|
365
404
|
items=_produce_solution_items(
|
405
|
+
skeleton=skeleton,
|
366
406
|
progress=progress,
|
367
|
-
tracked_solutions=tracked_solutions,
|
368
407
|
verification=verification,
|
369
408
|
check=check,
|
370
409
|
timelimit_override=timelimit_override,
|
371
410
|
sanitized=sanitized,
|
372
411
|
),
|
373
412
|
)
|
374
|
-
skeleton_file = package.get_problem_runs_dir() / 'skeleton.yml'
|
375
|
-
skeleton_file.parent.mkdir(parents=True, exist_ok=True)
|
376
|
-
skeleton_file.write_text(utils.model_to_yaml(result.skeleton))
|
377
413
|
return result
|
378
414
|
|
379
415
|
|
@@ -515,8 +551,8 @@ async def _generate_testcase_interactively(
|
|
515
551
|
|
516
552
|
def _run_interactive_solutions(
|
517
553
|
testcase: Testcase,
|
554
|
+
skeleton: SolutionReportSkeleton,
|
518
555
|
progress: Optional[StatusProgress] = None,
|
519
|
-
tracked_solutions: Optional[Set[str]] = None,
|
520
556
|
verification: VerificationLevel = VerificationLevel.NONE,
|
521
557
|
check: bool = True,
|
522
558
|
sanitized: bool = False,
|
@@ -531,22 +567,16 @@ def _run_interactive_solutions(
|
|
531
567
|
interactor_digest = None
|
532
568
|
|
533
569
|
compiled_solutions = compile_solutions(
|
534
|
-
progress=progress,
|
570
|
+
progress=progress,
|
571
|
+
tracked_solutions=skeleton.get_solution_path_set(),
|
572
|
+
sanitized=sanitized,
|
535
573
|
)
|
536
574
|
|
537
|
-
solutions = list(enumerate(pkg.solutions))
|
538
|
-
if tracked_solutions is not None:
|
539
|
-
solutions = [
|
540
|
-
(i, sol) for i, sol in solutions if str(sol.path) in tracked_solutions
|
541
|
-
]
|
542
|
-
|
543
|
-
irun_dir = package.get_problem_iruns_dir()
|
544
|
-
|
545
575
|
if progress:
|
546
576
|
progress.update('Running solutions...')
|
547
577
|
|
548
|
-
for
|
549
|
-
output_dir =
|
578
|
+
for solution in skeleton.solutions:
|
579
|
+
output_dir = solution.runs_dir
|
550
580
|
|
551
581
|
async def run_fn(solution=solution, output_dir=output_dir):
|
552
582
|
return await run_solution_on_testcase(
|
@@ -561,13 +591,50 @@ def _run_interactive_solutions(
|
|
561
591
|
)
|
562
592
|
|
563
593
|
yield EvaluationItem(
|
564
|
-
|
565
|
-
|
566
|
-
testcase_index=0,
|
594
|
+
solution=solution,
|
595
|
+
testcase_entry=TestcaseEntry(group='irun', index=0),
|
567
596
|
eval=Deferred(run_fn),
|
568
597
|
)
|
569
598
|
|
570
599
|
|
600
|
+
def _get_interactive_skeleton(
|
601
|
+
tracked_solutions: Optional[Set[str]] = None,
|
602
|
+
verification: VerificationLevel = VerificationLevel.NONE,
|
603
|
+
) -> SolutionReportSkeleton:
|
604
|
+
solutions = _get_solutions_for_skeleton(tracked_solutions, verification)
|
605
|
+
|
606
|
+
langs = set(find_language_name(solution) for solution in solutions)
|
607
|
+
limits = {
|
608
|
+
lang: get_limits_for_language(lang, verification, timelimit_override=None)
|
609
|
+
for lang in langs
|
610
|
+
if lang is not None
|
611
|
+
}
|
612
|
+
|
613
|
+
# Ensure path is new.
|
614
|
+
irun_dir = package.get_problem_iruns_dir()
|
615
|
+
shutil.rmtree(str(irun_dir), ignore_errors=True)
|
616
|
+
irun_dir.mkdir(parents=True, exist_ok=True)
|
617
|
+
|
618
|
+
skeleton = SolutionReportSkeleton(
|
619
|
+
solutions=[
|
620
|
+
SolutionSkeleton(
|
621
|
+
**solution.model_dump(),
|
622
|
+
runs_dir=irun_dir / f'{i}',
|
623
|
+
)
|
624
|
+
for i, solution in enumerate(solutions)
|
625
|
+
],
|
626
|
+
groups=[],
|
627
|
+
limits=limits,
|
628
|
+
entries=[],
|
629
|
+
verification=verification,
|
630
|
+
)
|
631
|
+
|
632
|
+
skeleton_file = irun_dir / 'skeleton.yml'
|
633
|
+
skeleton_file.write_text(utils.model_to_yaml(skeleton))
|
634
|
+
|
635
|
+
return skeleton
|
636
|
+
|
637
|
+
|
571
638
|
async def run_and_print_interactive_solutions(
|
572
639
|
progress: Optional[StatusProgress] = None,
|
573
640
|
tracked_solutions: Optional[Set[str]] = None,
|
@@ -579,12 +646,11 @@ async def run_and_print_interactive_solutions(
|
|
579
646
|
print: bool = False,
|
580
647
|
sanitized: bool = False,
|
581
648
|
):
|
582
|
-
# Ensure path is new.
|
583
|
-
irun_dir = package.get_problem_iruns_dir()
|
584
|
-
shutil.rmtree(str(irun_dir), ignore_errors=True)
|
585
|
-
irun_dir.mkdir(parents=True, exist_ok=True)
|
586
|
-
|
587
649
|
pkg = package.find_problem_package_or_die()
|
650
|
+
skeleton = _get_interactive_skeleton(
|
651
|
+
tracked_solutions,
|
652
|
+
verification=verification,
|
653
|
+
)
|
588
654
|
testcase = await _generate_testcase_interactively(
|
589
655
|
progress=progress,
|
590
656
|
generator=generator,
|
@@ -596,15 +662,16 @@ async def run_and_print_interactive_solutions(
|
|
596
662
|
)
|
597
663
|
items = _run_interactive_solutions(
|
598
664
|
testcase,
|
665
|
+
skeleton=skeleton,
|
599
666
|
progress=progress,
|
600
|
-
tracked_solutions=tracked_solutions,
|
601
667
|
verification=verification,
|
602
668
|
check=check,
|
603
669
|
sanitized=sanitized,
|
604
670
|
)
|
605
671
|
|
606
672
|
for item in items:
|
607
|
-
sol =
|
673
|
+
sol = skeleton.find_solution_skeleton(item.solution)
|
674
|
+
assert sol is not None
|
608
675
|
|
609
676
|
if progress:
|
610
677
|
progress.update(f'Running [item]{sol.path}[/item]...')
|
@@ -613,7 +680,7 @@ async def run_and_print_interactive_solutions(
|
|
613
680
|
|
614
681
|
with utils.no_progress(progress):
|
615
682
|
console.console.print(get_testcase_markup_verdict(eval), end=' ')
|
616
|
-
_print_solution_header(sol, console.console
|
683
|
+
_print_solution_header(sol, console.console)
|
617
684
|
_print_solution_outcome(
|
618
685
|
sol, [eval], console.console, verification, subset=True
|
619
686
|
)
|
@@ -703,6 +770,12 @@ def get_outcome_markup_verdict(outcome: Outcome) -> str:
|
|
703
770
|
return res
|
704
771
|
|
705
772
|
|
773
|
+
def get_full_outcome_markup_verdict(outcome: Outcome) -> str:
|
774
|
+
style = get_outcome_style_verdict(outcome)
|
775
|
+
res = f'[{style}]{outcome.name}[/{style}]'
|
776
|
+
return res
|
777
|
+
|
778
|
+
|
706
779
|
def get_testcase_markup_verdict(eval: Evaluation) -> str:
|
707
780
|
# if eval.log.stdout_absolute_path:
|
708
781
|
# output_path = eval.log.stdout_absolute_path.resolve()
|
@@ -711,6 +784,10 @@ def get_testcase_markup_verdict(eval: Evaluation) -> str:
|
|
711
784
|
return get_outcome_markup_verdict(eval.result.outcome)
|
712
785
|
|
713
786
|
|
787
|
+
def get_full_testcase_markup_verdict(eval: Evaluation) -> str:
|
788
|
+
return get_full_outcome_markup_verdict(eval.result.outcome)
|
789
|
+
|
790
|
+
|
714
791
|
def _get_evals_time_in_ms(evals: List[Evaluation]) -> int:
|
715
792
|
if not evals:
|
716
793
|
return 0
|
@@ -766,13 +843,60 @@ def get_worst_outcome(evals: List[Evaluation]) -> Outcome:
|
|
766
843
|
return Outcome.worst_outcome(eval.result.outcome for eval in evals)
|
767
844
|
|
768
845
|
|
769
|
-
|
846
|
+
class SolutionOutcomeReport(BaseModel):
|
847
|
+
solution: Solution
|
848
|
+
evals: List[Evaluation]
|
849
|
+
ok: bool
|
850
|
+
expectedOutcome: Optional[ExpectedOutcome]
|
851
|
+
gotVerdicts: Set[Outcome]
|
852
|
+
runUnderDoubleTl: bool
|
853
|
+
doubleTlVerdicts: Set[Outcome]
|
854
|
+
sanitizerWarnings: bool
|
855
|
+
verification: VerificationLevel
|
856
|
+
|
857
|
+
def get_verdict_markup(self, incomplete: bool = False) -> str:
|
858
|
+
success_str = '[bold green]OK[/bold green]'
|
859
|
+
if not self.ok:
|
860
|
+
success_str = '[bold red]FAILED[/bold red]'
|
861
|
+
if incomplete:
|
862
|
+
success_str = '[bold yellow]INCOMPLETE[/bold yellow]'
|
863
|
+
|
864
|
+
gotVerdicts = self.gotVerdicts if not incomplete else {}
|
865
|
+
|
866
|
+
got_verdict_names = ' '.join(v.name for v in self.gotVerdicts)
|
867
|
+
verdict_str = ''
|
868
|
+
if self.expectedOutcome is not None:
|
869
|
+
verdict_str = f'Expected: {self.expectedOutcome}'
|
870
|
+
if gotVerdicts:
|
871
|
+
verdict_str += f', got: {got_verdict_names}'
|
872
|
+
elif gotVerdicts:
|
873
|
+
verdict_str = f'Got: {got_verdict_names}'
|
874
|
+
return f'{success_str} {verdict_str}'
|
875
|
+
|
876
|
+
def get_verdict_markup_with_warnings(self) -> str:
|
877
|
+
res = self.get_verdict_markup()
|
878
|
+
if self.runUnderDoubleTl:
|
879
|
+
if self.doubleTlVerdicts:
|
880
|
+
res += f'\n[bold yellow]WARNING[/bold yellow] The solution still passed in double TL, but failed with [item]{" ".join(v.name for v in self.doubleTlVerdicts)}[/item].'
|
881
|
+
else:
|
882
|
+
res += '\n[bold yellow]WARNING[/bold yellow] The solution still passed in double TL.'
|
883
|
+
if self.sanitizerWarnings:
|
884
|
+
res += '\n[bold yellow]WARNING[/bold yellow] The solution had sanitizer errors or warnings, marked with [bold yellow]*[/bold yellow]. See their stderr for more details.'
|
885
|
+
return res
|
886
|
+
|
887
|
+
def get_outcome_markup(self) -> str:
|
888
|
+
res = self.get_verdict_markup_with_warnings()
|
889
|
+
res += f'\nTime: {get_capped_evals_formatted_time(self.solution, self.evals, self.verification)}'
|
890
|
+
res += f'\nMemory: {get_evals_formatted_memory(self.evals)}'
|
891
|
+
return res
|
892
|
+
|
893
|
+
|
894
|
+
def get_solution_outcome_report(
|
770
895
|
solution: Solution,
|
771
896
|
evals: List[Evaluation],
|
772
|
-
console: rich.console.Console,
|
773
897
|
verification: VerificationLevel = VerificationLevel.NONE,
|
774
898
|
subset: bool = False,
|
775
|
-
) ->
|
899
|
+
) -> SolutionOutcomeReport:
|
776
900
|
pkg = package.find_problem_package_or_die()
|
777
901
|
|
778
902
|
has_plain_tle = False
|
@@ -805,26 +929,24 @@ def _print_solution_outcome(
|
|
805
929
|
has_failed = unmatched_bad_verdicts or (
|
806
930
|
expected_outcome_is_bad and not matched_bad_verdicts and not subset
|
807
931
|
)
|
808
|
-
if has_failed:
|
809
|
-
console.print('[error]FAILED[/error]', end=' ')
|
810
|
-
else:
|
811
|
-
console.print('[success]OK[/success]', end=' ')
|
812
932
|
|
933
|
+
report_expected_outcome = None
|
934
|
+
report_got_verdicts = set()
|
935
|
+
report_run_under_double_tl = False
|
936
|
+
report_double_tl_verdicts = set()
|
937
|
+
report_sanitizer_warnings = False
|
813
938
|
if has_failed or not subset:
|
814
|
-
|
939
|
+
report_expected_outcome = solution.outcome
|
815
940
|
elif subset:
|
816
|
-
|
817
|
-
console.print(f'Got: {all_verdicts_names}', end='')
|
941
|
+
report_got_verdicts = all_verdicts
|
818
942
|
|
819
943
|
if has_failed or not subset:
|
820
944
|
# Only print verdicts if not subset.
|
821
945
|
if unmatched_bad_verdicts:
|
822
|
-
|
823
|
-
console.print(f', got: {" ".join(unmatched_bad_verdicts_names)}', end='')
|
946
|
+
report_got_verdicts = unmatched_bad_verdicts
|
824
947
|
elif expected_outcome_is_bad and not matched_bad_verdicts and not subset:
|
825
|
-
|
948
|
+
report_got_verdicts = {Outcome.ACCEPTED}
|
826
949
|
|
827
|
-
console.print()
|
828
950
|
evals_time = _get_evals_time_in_ms(evals)
|
829
951
|
expected_outcome_is_tle = solution.outcome.match(
|
830
952
|
Outcome.TIME_LIMIT_EXCEEDED
|
@@ -846,26 +968,37 @@ def _print_solution_outcome(
|
|
846
968
|
}
|
847
969
|
if not other_verdicts:
|
848
970
|
# The solution has no other bad verdicts except for TLEs in double TL.
|
849
|
-
|
850
|
-
'[yellow]WARNING[/yellow] The solution still passed in double TL.'
|
851
|
-
)
|
971
|
+
report_run_under_double_tl = True
|
852
972
|
elif not (bad_verdicts - {Outcome.TIME_LIMIT_EXCEEDED}):
|
853
973
|
# The solution has other bad soft TLE outcomes.
|
854
|
-
|
855
|
-
console.print(
|
856
|
-
f'[yellow]WARNING[/yellow] The solution could still run under double TL, but failed with [item]{other_verdicts_names}[/item].'
|
857
|
-
)
|
974
|
+
report_double_tl_verdicts = other_verdicts
|
858
975
|
|
859
976
|
if has_sanitizer_warnings:
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
977
|
+
report_sanitizer_warnings = True
|
978
|
+
|
979
|
+
return SolutionOutcomeReport(
|
980
|
+
solution=solution,
|
981
|
+
evals=evals,
|
982
|
+
ok=not has_failed,
|
983
|
+
expectedOutcome=report_expected_outcome,
|
984
|
+
gotVerdicts=report_got_verdicts,
|
985
|
+
runUnderDoubleTl=report_run_under_double_tl,
|
986
|
+
doubleTlVerdicts=report_double_tl_verdicts,
|
987
|
+
sanitizerWarnings=report_sanitizer_warnings,
|
988
|
+
verification=verification,
|
866
989
|
)
|
867
|
-
|
868
|
-
|
990
|
+
|
991
|
+
|
992
|
+
def _print_solution_outcome(
|
993
|
+
solution: Solution,
|
994
|
+
evals: List[Evaluation],
|
995
|
+
console: rich.console.Console,
|
996
|
+
verification: VerificationLevel = VerificationLevel.NONE,
|
997
|
+
subset: bool = False,
|
998
|
+
) -> bool:
|
999
|
+
report = get_solution_outcome_report(solution, evals, verification, subset)
|
1000
|
+
console.print(report.get_outcome_markup())
|
1001
|
+
return report.ok
|
869
1002
|
|
870
1003
|
|
871
1004
|
def _consume_and_key_evaluation_items(
|
@@ -876,30 +1009,22 @@ def _consume_and_key_evaluation_items(
|
|
876
1009
|
Consumes EvaluationItems from a run_solutions call and build a view
|
877
1010
|
with them, possibly marking with optional unprocessed items.
|
878
1011
|
"""
|
879
|
-
pkg = package.find_problem_package_or_die()
|
880
1012
|
res = skeleton.empty_structured_evaluation()
|
881
1013
|
|
882
1014
|
for item in items:
|
883
|
-
solution
|
884
|
-
|
1015
|
+
res[str(item.solution.path)][item.testcase_entry.group][
|
1016
|
+
item.testcase_entry.index
|
1017
|
+
] = item.eval
|
885
1018
|
|
886
1019
|
return res
|
887
1020
|
|
888
1021
|
|
889
1022
|
def _print_solution_header(
|
890
|
-
solution:
|
1023
|
+
solution: SolutionSkeleton,
|
1024
|
+
console: rich.console.Console,
|
891
1025
|
):
|
892
|
-
solutions = package.get_solutions()
|
893
|
-
solution_index = [
|
894
|
-
i for i, sol in enumerate(solutions) if sol.path == solution.path
|
895
|
-
][0]
|
896
|
-
solution_testdir = (
|
897
|
-
package.get_problem_iruns_dir() / f'{solution_index}'
|
898
|
-
if is_irun
|
899
|
-
else package.get_problem_runs_dir() / f'{solution_index}'
|
900
|
-
)
|
901
1026
|
console.print(f'[item]{solution.path}[/item]', end=' ')
|
902
|
-
console.print(f'({
|
1027
|
+
console.print(f'({solution.runs_dir})')
|
903
1028
|
|
904
1029
|
|
905
1030
|
@dataclasses.dataclass
|
rbx/box/solutions_test.py
CHANGED
@@ -24,7 +24,9 @@ async def test_solutions(pkg_from_testdata: pathlib.Path):
|
|
24
24
|
await generate_outputs_for_testcases(entries)
|
25
25
|
|
26
26
|
result = run_solutions(verification=VerificationLevel.FULL)
|
27
|
-
res = await convert_list_of_solution_evaluations_to_dict(
|
27
|
+
res = await convert_list_of_solution_evaluations_to_dict(
|
28
|
+
result.skeleton, result.items
|
29
|
+
)
|
28
30
|
|
29
31
|
# First solution should pass all tests.
|
30
32
|
assert all(chk.result.outcome == Outcome.ACCEPTED for chk in res[0]['gen1'])
|
rbx/box/tasks.py
CHANGED
@@ -86,6 +86,7 @@ async def run_solution_on_testcase(
|
|
86
86
|
output_path = output_dir / testcase.inputPath.with_suffix('.out').name
|
87
87
|
error_path = output_path.with_suffix('.err')
|
88
88
|
log_path = output_path.with_suffix('.log')
|
89
|
+
eval_path = output_path.with_suffix('.eval')
|
89
90
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
90
91
|
|
91
92
|
run_log = await run_item(
|
@@ -120,10 +121,12 @@ async def run_solution_on_testcase(
|
|
120
121
|
stdout_absolute_path=output_path.absolute(),
|
121
122
|
stderr_absolute_path=error_path.absolute(),
|
122
123
|
log_absolute_path=log_path.absolute(),
|
124
|
+
eval_absolute_path=eval_path.absolute(),
|
123
125
|
),
|
124
126
|
)
|
125
127
|
|
126
128
|
log_path.write_text(model_to_yaml(eval))
|
129
|
+
eval_path.write_text(model_to_yaml(eval))
|
127
130
|
return eval
|
128
131
|
|
129
132
|
if not use_retries:
|
@@ -198,6 +201,7 @@ async def _run_communication_solution_on_testcase(
|
|
198
201
|
solution_error_path = output_path.with_suffix('.sol.err')
|
199
202
|
interactor_error_path = output_path.with_suffix('.int.err')
|
200
203
|
log_path = output_path.with_suffix('.log')
|
204
|
+
eval_path = output_path.with_suffix('.eval')
|
201
205
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
202
206
|
|
203
207
|
interactor_capture_path = (
|
@@ -268,11 +272,12 @@ async def _run_communication_solution_on_testcase(
|
|
268
272
|
stdout_absolute_path=output_path.absolute(),
|
269
273
|
stderr_absolute_path=solution_error_path.absolute(),
|
270
274
|
log_absolute_path=log_path.absolute(),
|
275
|
+
eval_absolute_path=eval_path.absolute(),
|
271
276
|
),
|
272
277
|
)
|
273
278
|
|
274
279
|
log_path.write_text(model_to_yaml(eval))
|
275
|
-
|
280
|
+
eval_path.write_text(model_to_yaml(eval))
|
276
281
|
interactor_log_path = output_path.with_suffix('.int.log')
|
277
282
|
interactor_log_path.unlink(missing_ok=True)
|
278
283
|
if interactor_run_log is not None:
|
rbx/box/testcase_utils.py
CHANGED
@@ -33,6 +33,9 @@ class TestcaseEntry(BaseModel):
|
|
33
33
|
group, index = spec.split('/')
|
34
34
|
return TestcaseEntry(group=group.strip(), index=int(index))
|
35
35
|
|
36
|
+
def get_prefix_path(self) -> pathlib.Path:
|
37
|
+
return package.get_build_testgroup_path(self.group) / f'{self.index:03d}'
|
38
|
+
|
36
39
|
|
37
40
|
class TestcasePattern(BaseModel):
|
38
41
|
group_prefix: List[str]
|