ape-framework 1.0.0__tar.gz → 2.0.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ape_framework-2.0.0.dev1/LICENSE +14 -0
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/PKG-INFO +22 -2
- ape_framework-2.0.0.dev1/ape/__init__.py +50 -0
- ape_framework-2.0.0.dev1/ape/__main__.py +10 -0
- ape_framework-2.0.0.dev1/ape/adapter.py +3 -0
- ape_framework-2.0.0.dev1/ape/checker/__init__.py +11 -0
- ape_framework-2.0.0.dev1/ape/checker/checker.py +32 -0
- ape_framework-2.0.0.dev1/ape/checker/orchestrator.py +92 -0
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/ape/cli.py +95 -96
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/ape/combine.py +6 -5
- ape_framework-2.0.0.dev1/ape/data/__init__.py +10 -0
- ape_framework-2.0.0.dev1/ape/data/data.py +102 -0
- ape_framework-2.0.0.dev1/ape/data/data_provider.py +117 -0
- ape_framework-2.0.0.dev1/ape/llm/__init__.py +5 -0
- ape_framework-2.0.0.dev1/ape/llm/orchestrator.py +204 -0
- ape_framework-2.0.0.dev1/ape/llm/session_log.py +238 -0
- ape_framework-2.0.0.dev1/ape/llm/solver.py +369 -0
- ape_framework-2.0.0.dev1/ape/llm/solver_config.py +83 -0
- ape_framework-2.0.0.dev1/ape/llm/utils.py +29 -0
- ape_framework-2.0.0.dev1/ape/llm/workflow.py +425 -0
- ape_framework-2.0.0.dev1/ape/report.py +17 -0
- ape_framework-2.0.0.dev1/ape/run.py +417 -0
- ape_framework-2.0.0.dev1/ape/runner/__init__.py +23 -0
- ape_framework-2.0.0.dev1/ape/runner/monitor.py +210 -0
- ape_framework-2.0.0.dev1/ape/runner/output.py +47 -0
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/ape/runner/run_params.py +0 -7
- ape_framework-2.0.0.dev1/ape/runner/runner.py +96 -0
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/ape/save_handler.py +53 -38
- ape_framework-2.0.0.dev1/ape/tools/__init__.py +5 -0
- ape_framework-2.0.0.dev1/ape/tools/base.py +17 -0
- ape_framework-2.0.0.dev1/ape/tools/julia.py +130 -0
- ape_framework-2.0.0.dev1/ape/tools/lmfdb.py +331 -0
- ape_framework-2.0.0.dev1/ape/tools/lmfdb_backend/metadata_formatter.py +68 -0
- ape_framework-2.0.0.dev1/ape/tools/notebook.py +151 -0
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/ape/visualizer.py +22 -20
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/ape_framework.egg-info/PKG-INFO +22 -2
- ape_framework-2.0.0.dev1/ape_framework.egg-info/SOURCES.txt +51 -0
- ape_framework-2.0.0.dev1/ape_framework.egg-info/requires.txt +9 -0
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/pyproject.toml +10 -4
- ape_framework-2.0.0.dev1/tests/test_data.py +113 -0
- ape_framework-2.0.0.dev1/tests/test_julia.py +100 -0
- ape_framework-2.0.0.dev1/tests/test_lmfdb.py +98 -0
- ape_framework-2.0.0.dev1/tests/test_notebook.py +58 -0
- ape_framework-2.0.0.dev1/tests/test_orchestrators.py +267 -0
- ape_framework-2.0.0.dev1/tests/test_output.py +52 -0
- ape_framework-2.0.0.dev1/tests/test_save_handler.py +70 -0
- ape_framework-2.0.0.dev1/tests/test_session_log.py +103 -0
- ape_framework-2.0.0.dev1/tests/test_solver_config.py +83 -0
- ape_framework-2.0.0.dev1/tests/test_workflow.py +310 -0
- ape_framework-1.0.0/LICENSE +0 -0
- ape_framework-1.0.0/ape/__init__.py +0 -24
- ape_framework-1.0.0/ape/adapter.py +0 -11
- ape_framework-1.0.0/ape/checker.py +0 -39
- ape_framework-1.0.0/ape/data/__init__.py +0 -8
- ape_framework-1.0.0/ape/data/data.py +0 -122
- ape_framework-1.0.0/ape/data/data_provider.py +0 -127
- ape_framework-1.0.0/ape/llm/__init__.py +0 -12
- ape_framework-1.0.0/ape/llm/gemini_model.py +0 -35
- ape_framework-1.0.0/ape/llm/llm_solver.py +0 -121
- ape_framework-1.0.0/ape/llm/model.py +0 -23
- ape_framework-1.0.0/ape/report.py +0 -20
- ape_framework-1.0.0/ape/runner/__init__.py +0 -6
- ape_framework-1.0.0/ape/runner/checker_worker.py +0 -54
- ape_framework-1.0.0/ape/runner/monitor.py +0 -149
- ape_framework-1.0.0/ape/runner/output.py +0 -86
- ape_framework-1.0.0/ape/runner/runner.py +0 -188
- ape_framework-1.0.0/ape/runner/solver_worker.py +0 -256
- ape_framework-1.0.0/ape/solver.py +0 -46
- ape_framework-1.0.0/ape_framework.egg-info/SOURCES.txt +0 -31
- ape_framework-1.0.0/ape_framework.egg-info/requires.txt +0 -3
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/README.md +0 -0
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/ape_framework.egg-info/dependency_links.txt +0 -0
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/ape_framework.egg-info/top_level.txt +0 -0
- {ape_framework-1.0.0 → ape_framework-2.0.0.dev1}/setup.cfg +0 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
APE (Algebra Problems Evaluator) is a framework for building and running benchmarks that evaluate LLMs on their ability to solve algebra problems.
|
|
2
|
+
Copyright (C) 2025-2026 Adrian Boguszewski, Kacper Grzybowski, Maciej Teterycz, Mateusz Kasprzak
|
|
3
|
+
|
|
4
|
+
This program is free software: you can redistribute it and/or modify
|
|
5
|
+
it under the terms of the GNU General Public License as published by
|
|
6
|
+
the Free Software Foundation, either version 3 of the License, or any later version.
|
|
7
|
+
|
|
8
|
+
This program is distributed in the hope that it will be useful,
|
|
9
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11
|
+
GNU General Public License for more details.
|
|
12
|
+
|
|
13
|
+
You should have received a copy of the GNU General Public License
|
|
14
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
@@ -1,15 +1,35 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ape-framework
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0.dev1
|
|
4
4
|
Summary: Package for evaluating algebra problems using AI systems.
|
|
5
|
+
License: APE (Algebra Problems Evaluator) is a framework for building and running benchmarks that evaluate LLMs on their ability to solve algebra problems.
|
|
6
|
+
Copyright (C) 2025-2026 Adrian Boguszewski, Kacper Grzybowski, Maciej Teterycz, Mateusz Kasprzak
|
|
7
|
+
|
|
8
|
+
This program is free software: you can redistribute it and/or modify
|
|
9
|
+
it under the terms of the GNU General Public License as published by
|
|
10
|
+
the Free Software Foundation, either version 3 of the License, or any later version.
|
|
11
|
+
|
|
12
|
+
This program is distributed in the hope that it will be useful,
|
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
GNU General Public License for more details.
|
|
16
|
+
|
|
17
|
+
You should have received a copy of the GNU General Public License
|
|
18
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
5
19
|
Classifier: Programming Language :: Python :: 3
|
|
6
20
|
Classifier: Operating System :: OS Independent
|
|
7
21
|
Requires-Python: >=3.12
|
|
8
22
|
Description-Content-Type: text/markdown
|
|
9
23
|
License-File: LICENSE
|
|
10
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: langchain
|
|
25
|
+
Requires-Dist: langgraph
|
|
11
26
|
Requires-Dist: rich
|
|
12
27
|
Requires-Dist: jsonpickle
|
|
28
|
+
Requires-Dist: docker
|
|
29
|
+
Requires-Dist: psycopg[binary,pool]
|
|
30
|
+
Requires-Dist: pglast
|
|
31
|
+
Requires-Dist: tiktoken
|
|
32
|
+
Requires-Dist: pydantic
|
|
13
33
|
Dynamic: license-file
|
|
14
34
|
|
|
15
35
|
# Algebra Problems Evaluator (APE)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from .adapter import SolutionAdapter
|
|
4
|
+
from .checker import Checker, CheckerFactory, CheckResult
|
|
5
|
+
from .cli import CLI
|
|
6
|
+
from .llm import EntryKind, Session, Solver, SolverConfig
|
|
7
|
+
from .report import Report
|
|
8
|
+
from .run import run
|
|
9
|
+
from .runner import (
|
|
10
|
+
CheckerState,
|
|
11
|
+
CheckerStatus,
|
|
12
|
+
NullObserver,
|
|
13
|
+
OutputBatch,
|
|
14
|
+
RunObserver,
|
|
15
|
+
RunParams,
|
|
16
|
+
SolverState,
|
|
17
|
+
SolverStatus,
|
|
18
|
+
WorkerSnapshot,
|
|
19
|
+
)
|
|
20
|
+
from .runner.runner import Runner
|
|
21
|
+
from .visualizer import Visualizer
|
|
22
|
+
|
|
23
|
+
# Prevent logging if the user did not configure it
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
logger.addHandler(logging.NullHandler())
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"SolutionAdapter",
|
|
29
|
+
"Checker",
|
|
30
|
+
"CheckerFactory",
|
|
31
|
+
"CheckResult",
|
|
32
|
+
"CLI",
|
|
33
|
+
"Report",
|
|
34
|
+
"Visualizer",
|
|
35
|
+
"Runner",
|
|
36
|
+
"RunParams",
|
|
37
|
+
"Solver",
|
|
38
|
+
"SolverConfig",
|
|
39
|
+
"OutputBatch",
|
|
40
|
+
"RunObserver",
|
|
41
|
+
"NullObserver",
|
|
42
|
+
"WorkerSnapshot",
|
|
43
|
+
"SolverState",
|
|
44
|
+
"CheckerState",
|
|
45
|
+
"SolverStatus",
|
|
46
|
+
"CheckerStatus",
|
|
47
|
+
"Session",
|
|
48
|
+
"EntryKind",
|
|
49
|
+
"run",
|
|
50
|
+
]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Allow ``python -m ape <config.toml>`` as the package entry point.
|
|
2
|
+
|
|
3
|
+
This avoids the ``RuntimeWarning`` that ``python -m ape.run`` triggers (because
|
|
4
|
+
``ape/__init__.py`` imports ``ape.run``); ``python -m ape.run`` still works.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .run import main
|
|
8
|
+
|
|
9
|
+
if __name__ == "__main__":
|
|
10
|
+
main()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from datetime import timedelta
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
5
|
+
|
|
6
|
+
from ..data import DataModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class Passed:
|
|
11
|
+
runtime: Optional[timedelta] = None
|
|
12
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class Failed:
|
|
17
|
+
reason: str
|
|
18
|
+
runtime: Optional[timedelta] = None
|
|
19
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
type CheckResult = Passed | Failed
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Checker[DataT: DataModel, SolutionT](ABC):
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def check(self, data: DataT, solution: SolutionT) -> CheckResult: ...
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
type CheckerFactory[DataT: DataModel, SolutionT] = Callable[
|
|
31
|
+
[], Checker[DataT, SolutionT]
|
|
32
|
+
]
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from concurrent.futures.thread import ThreadPoolExecutor
|
|
3
|
+
from queue import Queue
|
|
4
|
+
from threading import Event
|
|
5
|
+
|
|
6
|
+
from ..data import DataModel
|
|
7
|
+
from ..runner.monitor import (
|
|
8
|
+
CheckerState,
|
|
9
|
+
CheckerStatus,
|
|
10
|
+
WorkerStatusTracker,
|
|
11
|
+
)
|
|
12
|
+
from .checker import Checker, CheckerFactory, CheckResult
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CheckerOrchestrator[DataT: DataModel, SolutionT]:
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
checker_factory: CheckerFactory[DataT, SolutionT],
|
|
22
|
+
checker_workers: int,
|
|
23
|
+
tracker: WorkerStatusTracker,
|
|
24
|
+
retry_timeout: int = 10,
|
|
25
|
+
) -> None:
|
|
26
|
+
self._tracker = tracker
|
|
27
|
+
self._retry_timeout = retry_timeout
|
|
28
|
+
|
|
29
|
+
self._cancel = Event()
|
|
30
|
+
self._executor = ThreadPoolExecutor(max_workers=checker_workers)
|
|
31
|
+
self._checker_carousel: Queue[tuple[Checker[DataT, SolutionT], int]] = Queue()
|
|
32
|
+
for checker_id in range(checker_workers):
|
|
33
|
+
self._checker_carousel.put((checker_factory(), checker_id))
|
|
34
|
+
|
|
35
|
+
def check(self, data: DataT, solution: SolutionT, solver_id: int) -> CheckResult:
|
|
36
|
+
# This check is TOCTOU, but it doesn't matter - best effort to not put any new futures into the pool
|
|
37
|
+
if self._cancel.is_set():
|
|
38
|
+
raise InterruptedError
|
|
39
|
+
|
|
40
|
+
fut = self._executor.submit(self._checker_worker, data, solution, solver_id)
|
|
41
|
+
while True:
|
|
42
|
+
try:
|
|
43
|
+
check_result: CheckResult = fut.result(timeout=self._retry_timeout)
|
|
44
|
+
return check_result
|
|
45
|
+
except TimeoutError:
|
|
46
|
+
if self._cancel.is_set():
|
|
47
|
+
raise InterruptedError
|
|
48
|
+
continue
|
|
49
|
+
except Exception:
|
|
50
|
+
logger.exception(
|
|
51
|
+
"Checker orchestrator caught an exception from checker worker, propagating."
|
|
52
|
+
)
|
|
53
|
+
raise
|
|
54
|
+
raise RuntimeError("unreachable") # Make PyCharm happy
|
|
55
|
+
|
|
56
|
+
def stop(self) -> None:
|
|
57
|
+
self._cancel.set()
|
|
58
|
+
self._executor.shutdown(cancel_futures=True)
|
|
59
|
+
|
|
60
|
+
def _checker_worker(
|
|
61
|
+
self, data: DataT, solution: SolutionT, solver_id: int
|
|
62
|
+
) -> CheckResult:
|
|
63
|
+
checker, checker_id = self._checker_carousel.get()
|
|
64
|
+
|
|
65
|
+
logger.debug("Checker worker started, got checker with id %d.", checker_id)
|
|
66
|
+
self._tracker.update_checker(
|
|
67
|
+
CheckerStatus(
|
|
68
|
+
id=checker_id,
|
|
69
|
+
state=CheckerState.BUSY,
|
|
70
|
+
data_id=data.data_id,
|
|
71
|
+
solver_id=solver_id,
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
check_result: CheckResult = checker.check(data, solution)
|
|
77
|
+
except Exception:
|
|
78
|
+
logger.exception("Checker worker encountered an unexpected exception.")
|
|
79
|
+
raise
|
|
80
|
+
finally:
|
|
81
|
+
self._tracker.update_checker(
|
|
82
|
+
CheckerStatus(
|
|
83
|
+
id=checker_id,
|
|
84
|
+
state=CheckerState.IDLE,
|
|
85
|
+
data_id=None,
|
|
86
|
+
solver_id=None,
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
self._checker_carousel.put((checker, checker_id))
|
|
90
|
+
|
|
91
|
+
logger.debug("Checker worker for checker with id %d has finished.", checker_id)
|
|
92
|
+
return check_result
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
import logging.config
|
|
5
4
|
from collections import deque
|
|
6
5
|
from contextlib import AbstractContextManager
|
|
7
6
|
from datetime import datetime
|
|
@@ -10,7 +9,7 @@ from types import TracebackType
|
|
|
10
9
|
from typing import Any, Callable, Literal, Optional
|
|
11
10
|
|
|
12
11
|
from rich import box
|
|
13
|
-
from rich.console import Console
|
|
12
|
+
from rich.console import Console, ConsoleOptions, RenderResult
|
|
14
13
|
from rich.layout import Layout
|
|
15
14
|
from rich.live import Live
|
|
16
15
|
from rich.panel import Panel
|
|
@@ -24,8 +23,22 @@ from rich.progress import (
|
|
|
24
23
|
TimeElapsedColumn,
|
|
25
24
|
)
|
|
26
25
|
from rich.table import Table
|
|
26
|
+
from rich.text import Text
|
|
27
27
|
|
|
28
|
-
from .
|
|
28
|
+
from .llm.session_log import configure_session_log_file
|
|
29
|
+
from .runner.monitor import CheckerStatus, SolverStatus, WorkerSnapshot
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class _DashboardRenderable:
|
|
33
|
+
"""Thin wrapper so Live's refresh thread calls _render_dashboard each cycle."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, cli: CLI) -> None:
|
|
36
|
+
self._cli = cli
|
|
37
|
+
|
|
38
|
+
def __rich_console__(
|
|
39
|
+
self, console: Console, options: ConsoleOptions
|
|
40
|
+
) -> RenderResult:
|
|
41
|
+
yield self._cli._render_dashboard()
|
|
29
42
|
|
|
30
43
|
|
|
31
44
|
class LogBufferHandler(logging.Handler):
|
|
@@ -35,10 +48,12 @@ class LogBufferHandler(logging.Handler):
|
|
|
35
48
|
|
|
36
49
|
def emit(self, record: logging.LogRecord) -> None:
|
|
37
50
|
try:
|
|
51
|
+
# Show only the last segment of the logger name to save space
|
|
52
|
+
short_name = record.name.rsplit(".", 1)[-1]
|
|
38
53
|
entry = {
|
|
39
54
|
"time": datetime.fromtimestamp(record.created).strftime("%H:%M:%S"),
|
|
40
55
|
"level": record.levelname,
|
|
41
|
-
"logger":
|
|
56
|
+
"logger": short_name,
|
|
42
57
|
"message": record.getMessage(),
|
|
43
58
|
}
|
|
44
59
|
self._add_entry(entry)
|
|
@@ -47,20 +62,27 @@ class LogBufferHandler(logging.Handler):
|
|
|
47
62
|
|
|
48
63
|
|
|
49
64
|
class CLI(AbstractContextManager):
|
|
65
|
+
"""Rich-based dashboard. Implements ``RunObserver`` — pass the CLI
|
|
66
|
+
instance straight to ``Runner.run(observer=...)``::
|
|
67
|
+
|
|
68
|
+
with CLI(...) as cli:
|
|
69
|
+
Runner.run(..., observer=cli)
|
|
70
|
+
"""
|
|
71
|
+
|
|
50
72
|
def __init__(
|
|
51
73
|
self,
|
|
52
|
-
runner: Runner,
|
|
53
74
|
*,
|
|
54
75
|
log_file: Path | str = Path("output/run.log"),
|
|
76
|
+
session_log_file: Path | str = Path("output/sessions.log"),
|
|
55
77
|
log_level: int | str = logging.INFO,
|
|
56
78
|
) -> None:
|
|
57
|
-
self._runner = runner
|
|
58
79
|
self._log_file = Path(log_file)
|
|
80
|
+
self._session_log_file = Path(session_log_file)
|
|
59
81
|
self._log_level = self._resolve_log_level(log_level)
|
|
60
82
|
self._progress_task_id: Optional[TaskID] = None
|
|
61
83
|
self._log_buffer: deque[dict[str, str]] = deque(maxlen=50)
|
|
62
|
-
self._solver_statuses: list[
|
|
63
|
-
self._checker_statuses: list[
|
|
84
|
+
self._solver_statuses: list[SolverStatus] = []
|
|
85
|
+
self._checker_statuses: list[CheckerStatus] = []
|
|
64
86
|
|
|
65
87
|
def __enter__(self) -> CLI:
|
|
66
88
|
self._setup_logging()
|
|
@@ -81,14 +103,13 @@ class CLI(AbstractContextManager):
|
|
|
81
103
|
self._log_handler.setLevel(self._log_level)
|
|
82
104
|
logging.getLogger().addHandler(self._log_handler)
|
|
83
105
|
self._live = Live(
|
|
84
|
-
self
|
|
106
|
+
_DashboardRenderable(self),
|
|
85
107
|
console=self._console,
|
|
86
108
|
refresh_per_second=8,
|
|
87
|
-
|
|
109
|
+
screen=True,
|
|
88
110
|
)
|
|
89
111
|
self._live.start()
|
|
90
|
-
self.
|
|
91
|
-
self._refresh_dashboard()
|
|
112
|
+
self._logger.info("CLI dashboard started")
|
|
92
113
|
return self
|
|
93
114
|
|
|
94
115
|
def __exit__(
|
|
@@ -97,69 +118,54 @@ class CLI(AbstractContextManager):
|
|
|
97
118
|
exc: BaseException | None,
|
|
98
119
|
tb: TracebackType | None,
|
|
99
120
|
) -> Literal[False]:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if self._log_handler:
|
|
105
|
-
logging.getLogger().removeHandler(self._log_handler)
|
|
106
|
-
self._log_handler.close()
|
|
121
|
+
live = getattr(self, "_live", None)
|
|
122
|
+
if live is not None:
|
|
123
|
+
live.stop()
|
|
124
|
+
self._teardown_logging()
|
|
107
125
|
return False
|
|
108
126
|
|
|
109
|
-
|
|
110
|
-
self._logger.info("CLI runner starting")
|
|
111
|
-
|
|
112
|
-
forwarded = dict(kwargs)
|
|
113
|
-
forwarded.setdefault("progress_hook", self._progress_hook)
|
|
114
|
-
forwarded.setdefault("worker_status_hook", self._status_hook)
|
|
115
|
-
|
|
116
|
-
result = self._runner.run(*args, **forwarded)
|
|
117
|
-
|
|
118
|
-
self._logger.info("CLI runner finished")
|
|
119
|
-
return result
|
|
127
|
+
# RunObserver interface
|
|
120
128
|
|
|
121
|
-
def
|
|
122
|
-
# Initialize task on first call
|
|
129
|
+
def on_progress(self, done: int, total: int) -> None:
|
|
123
130
|
if self._progress_task_id is None:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
131
|
+
self._progress_task_id = self._progress.add_task(
|
|
132
|
+
"Processing data", total=total
|
|
133
|
+
)
|
|
134
|
+
self._progress.update(self._progress_task_id, completed=done, total=total)
|
|
135
|
+
|
|
136
|
+
def on_worker_status(self, snapshot: WorkerSnapshot) -> None:
|
|
137
|
+
self._solver_statuses = list(snapshot.solvers)
|
|
138
|
+
self._checker_statuses = list(snapshot.checkers)
|
|
139
|
+
|
|
140
|
+
# Internal
|
|
141
|
+
|
|
142
|
+
def _teardown_logging(self) -> None:
|
|
143
|
+
root = logging.getLogger()
|
|
144
|
+
log_handler = getattr(self, "_log_handler", None)
|
|
145
|
+
if log_handler is not None:
|
|
146
|
+
root.removeHandler(log_handler)
|
|
147
|
+
log_handler.close()
|
|
148
|
+
file_handler = getattr(self, "_file_handler", None)
|
|
149
|
+
if file_handler is not None:
|
|
150
|
+
root.removeHandler(file_handler)
|
|
151
|
+
file_handler.close()
|
|
152
|
+
teardown_session_log = getattr(self, "_teardown_session_log", None)
|
|
153
|
+
if teardown_session_log is not None:
|
|
154
|
+
teardown_session_log()
|
|
135
155
|
|
|
136
156
|
def _setup_logging(self) -> None:
|
|
137
157
|
self._log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
138
158
|
|
|
139
|
-
logging.
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
"formatters": {
|
|
144
|
-
"default": {
|
|
145
|
-
"format": "%(asctime)s %(name)s %(levelname)s %(message)s",
|
|
146
|
-
}
|
|
147
|
-
},
|
|
148
|
-
"handlers": {
|
|
149
|
-
"file": {
|
|
150
|
-
"class": "logging.FileHandler",
|
|
151
|
-
"level": self._log_level,
|
|
152
|
-
"formatter": "default",
|
|
153
|
-
"filename": str(self._log_file),
|
|
154
|
-
"encoding": "utf-8",
|
|
155
|
-
}
|
|
156
|
-
},
|
|
157
|
-
"root": {
|
|
158
|
-
"level": self._log_level,
|
|
159
|
-
"handlers": ["file"],
|
|
160
|
-
},
|
|
161
|
-
}
|
|
159
|
+
self._file_handler = logging.FileHandler(self._log_file, encoding="utf-8")
|
|
160
|
+
self._file_handler.setLevel(self._log_level)
|
|
161
|
+
self._file_handler.setFormatter(
|
|
162
|
+
logging.Formatter("%(asctime)s %(name)s %(levelname)s %(message)s")
|
|
162
163
|
)
|
|
164
|
+
root = logging.getLogger()
|
|
165
|
+
root.setLevel(self._log_level)
|
|
166
|
+
root.addHandler(self._file_handler)
|
|
167
|
+
|
|
168
|
+
self._teardown_session_log = configure_session_log_file(self._session_log_file)
|
|
163
169
|
|
|
164
170
|
@staticmethod
|
|
165
171
|
def _resolve_log_level(level: int | str) -> int:
|
|
@@ -180,14 +186,6 @@ class CLI(AbstractContextManager):
|
|
|
180
186
|
|
|
181
187
|
def _add_log_entry(self, entry: dict[str, str]) -> None:
|
|
182
188
|
self._log_buffer.append(entry)
|
|
183
|
-
self._refresh_dashboard()
|
|
184
|
-
|
|
185
|
-
def _refresh_dashboard(self) -> None:
|
|
186
|
-
try:
|
|
187
|
-
self._live.update(self._render_dashboard(), refresh=True)
|
|
188
|
-
except Exception:
|
|
189
|
-
# Avoid crashing the run due to rendering issues.
|
|
190
|
-
return
|
|
191
189
|
|
|
192
190
|
def _render_dashboard(self) -> Layout:
|
|
193
191
|
layout = Layout(name="root")
|
|
@@ -199,11 +197,11 @@ class CLI(AbstractContextManager):
|
|
|
199
197
|
return layout
|
|
200
198
|
|
|
201
199
|
def _render_progress_panel(self) -> Panel:
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
200
|
+
body: Any = (
|
|
201
|
+
self._progress
|
|
202
|
+
if self._progress_task_id is not None
|
|
203
|
+
else "Waiting for progress updates..."
|
|
204
|
+
)
|
|
207
205
|
return Panel(body, title="Run Progress", box=box.SIMPLE)
|
|
208
206
|
|
|
209
207
|
def _render_workers_panel(self) -> Panel:
|
|
@@ -225,13 +223,13 @@ class CLI(AbstractContextManager):
|
|
|
225
223
|
if not self._solver_statuses:
|
|
226
224
|
solvers.add_row("-", "-", "-", "-", "-")
|
|
227
225
|
else:
|
|
228
|
-
for
|
|
226
|
+
for solver_st in self._solver_statuses:
|
|
229
227
|
solvers.add_row(
|
|
230
|
-
str(
|
|
231
|
-
str(
|
|
232
|
-
str(
|
|
233
|
-
f"{
|
|
234
|
-
str(
|
|
228
|
+
str(solver_st.id),
|
|
229
|
+
str(solver_st.state),
|
|
230
|
+
str(solver_st.data_id) if solver_st.data_id is not None else "-",
|
|
231
|
+
f"{solver_st.curr_run}/{solver_st.total_runs}",
|
|
232
|
+
str(solver_st.session_length),
|
|
235
233
|
)
|
|
236
234
|
|
|
237
235
|
checkers = Table(
|
|
@@ -251,12 +249,16 @@ class CLI(AbstractContextManager):
|
|
|
251
249
|
if not self._checker_statuses:
|
|
252
250
|
checkers.add_row("-", "-", "-", "-")
|
|
253
251
|
else:
|
|
254
|
-
for
|
|
252
|
+
for checker_st in self._checker_statuses:
|
|
255
253
|
checkers.add_row(
|
|
256
|
-
str(
|
|
257
|
-
str(
|
|
258
|
-
str(
|
|
259
|
-
|
|
254
|
+
str(checker_st.id),
|
|
255
|
+
str(checker_st.state),
|
|
256
|
+
str(checker_st.data_id) if checker_st.data_id is not None else "-",
|
|
257
|
+
(
|
|
258
|
+
str(checker_st.solver_id)
|
|
259
|
+
if checker_st.solver_id is not None
|
|
260
|
+
else "-"
|
|
261
|
+
),
|
|
260
262
|
)
|
|
261
263
|
|
|
262
264
|
grid = Table.grid(expand=True)
|
|
@@ -276,23 +278,20 @@ class CLI(AbstractContextManager):
|
|
|
276
278
|
)
|
|
277
279
|
table.add_column("Time", style="dim", width=8, no_wrap=True)
|
|
278
280
|
table.add_column("Level", width=8, no_wrap=True)
|
|
279
|
-
table.add_column("Logger", style="dim", width=
|
|
281
|
+
table.add_column("Logger", style="dim", width=14, no_wrap=True)
|
|
280
282
|
table.add_column("Message", overflow="fold")
|
|
281
283
|
|
|
282
284
|
if not self._log_buffer:
|
|
283
285
|
table.add_row("-", "-", "-", "Waiting for logs...")
|
|
284
286
|
else:
|
|
285
|
-
max_rows = 6
|
|
286
|
-
if self._console is not None:
|
|
287
|
-
max_rows = max(6, min(30, self._console.size.height - 12))
|
|
287
|
+
max_rows = max(6, min(30, self._console.size.height - 12))
|
|
288
288
|
|
|
289
289
|
for entry in reversed(list(self._log_buffer)[-max_rows:]):
|
|
290
290
|
table.add_row(
|
|
291
291
|
entry.get("time", ""),
|
|
292
292
|
entry.get("level", ""),
|
|
293
293
|
entry.get("logger", ""),
|
|
294
|
-
entry.get("message", ""),
|
|
294
|
+
Text(entry.get("message", "")),
|
|
295
295
|
)
|
|
296
|
-
table.add_section()
|
|
297
296
|
|
|
298
297
|
return Panel(table, title="Latest Logs", box=box.SIMPLE, padding=(0, 0))
|
|
@@ -8,10 +8,8 @@ from pathlib import Path
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def _merge_batches(dst: dict, src: dict) -> None:
|
|
11
|
-
dst["
|
|
12
|
-
dst["
|
|
13
|
-
dst["solution_total_times"].extend(src.get("solution_total_times", []))
|
|
14
|
-
dst["checker_outputs"].extend(src.get("checker_outputs", []))
|
|
11
|
+
dst["_solver_results"].extend(src.get("_solver_results", []))
|
|
12
|
+
dst["_check_results"].extend(src.get("_check_results", []))
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
def main() -> None:
|
|
@@ -59,7 +57,10 @@ def main() -> None:
|
|
|
59
57
|
continue
|
|
60
58
|
try:
|
|
61
59
|
output = json.loads(line)
|
|
62
|
-
|
|
60
|
+
# _input_data is a jsonpickle-encoded DataModel; its full
|
|
61
|
+
# serialization is the merge key (the schema's id field
|
|
62
|
+
# name is not known at this layer).
|
|
63
|
+
data_id = json.dumps(output["_input_data"], sort_keys=True)
|
|
63
64
|
if data_id in merged_data:
|
|
64
65
|
_merge_batches(merged_data[data_id], output)
|
|
65
66
|
else:
|