ape-framework 0.1.2__tar.gz → 2.0.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. ape_framework-2.0.0.dev1/LICENSE +14 -0
  2. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/PKG-INFO +24 -1
  3. ape_framework-2.0.0.dev1/ape/__init__.py +50 -0
  4. ape_framework-2.0.0.dev1/ape/__main__.py +10 -0
  5. ape_framework-2.0.0.dev1/ape/adapter.py +3 -0
  6. ape_framework-2.0.0.dev1/ape/checker/__init__.py +11 -0
  7. ape_framework-2.0.0.dev1/ape/checker/checker.py +32 -0
  8. ape_framework-2.0.0.dev1/ape/checker/orchestrator.py +92 -0
  9. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/ape/cli.py +95 -96
  10. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/ape/combine.py +6 -5
  11. ape_framework-2.0.0.dev1/ape/data/__init__.py +10 -0
  12. ape_framework-2.0.0.dev1/ape/data/data.py +102 -0
  13. ape_framework-2.0.0.dev1/ape/data/data_provider.py +117 -0
  14. ape_framework-2.0.0.dev1/ape/llm/__init__.py +5 -0
  15. ape_framework-2.0.0.dev1/ape/llm/orchestrator.py +204 -0
  16. ape_framework-2.0.0.dev1/ape/llm/session_log.py +238 -0
  17. ape_framework-2.0.0.dev1/ape/llm/solver.py +369 -0
  18. ape_framework-2.0.0.dev1/ape/llm/solver_config.py +83 -0
  19. ape_framework-2.0.0.dev1/ape/llm/utils.py +29 -0
  20. ape_framework-2.0.0.dev1/ape/llm/workflow.py +425 -0
  21. ape_framework-2.0.0.dev1/ape/report.py +17 -0
  22. ape_framework-2.0.0.dev1/ape/run.py +417 -0
  23. ape_framework-2.0.0.dev1/ape/runner/__init__.py +23 -0
  24. ape_framework-2.0.0.dev1/ape/runner/monitor.py +210 -0
  25. ape_framework-2.0.0.dev1/ape/runner/output.py +47 -0
  26. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/ape/runner/run_params.py +0 -7
  27. ape_framework-2.0.0.dev1/ape/runner/runner.py +96 -0
  28. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/ape/save_handler.py +53 -38
  29. ape_framework-2.0.0.dev1/ape/tools/__init__.py +5 -0
  30. ape_framework-2.0.0.dev1/ape/tools/base.py +17 -0
  31. ape_framework-2.0.0.dev1/ape/tools/julia.py +130 -0
  32. ape_framework-2.0.0.dev1/ape/tools/lmfdb.py +331 -0
  33. ape_framework-2.0.0.dev1/ape/tools/lmfdb_backend/metadata_formatter.py +68 -0
  34. ape_framework-2.0.0.dev1/ape/tools/notebook.py +151 -0
  35. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/ape/visualizer.py +22 -20
  36. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/ape_framework.egg-info/PKG-INFO +24 -1
  37. ape_framework-2.0.0.dev1/ape_framework.egg-info/SOURCES.txt +51 -0
  38. ape_framework-2.0.0.dev1/ape_framework.egg-info/requires.txt +9 -0
  39. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/pyproject.toml +15 -3
  40. ape_framework-2.0.0.dev1/tests/test_data.py +113 -0
  41. ape_framework-2.0.0.dev1/tests/test_julia.py +100 -0
  42. ape_framework-2.0.0.dev1/tests/test_lmfdb.py +98 -0
  43. ape_framework-2.0.0.dev1/tests/test_notebook.py +58 -0
  44. ape_framework-2.0.0.dev1/tests/test_orchestrators.py +267 -0
  45. ape_framework-2.0.0.dev1/tests/test_output.py +52 -0
  46. ape_framework-2.0.0.dev1/tests/test_save_handler.py +70 -0
  47. ape_framework-2.0.0.dev1/tests/test_session_log.py +103 -0
  48. ape_framework-2.0.0.dev1/tests/test_solver_config.py +83 -0
  49. ape_framework-2.0.0.dev1/tests/test_workflow.py +310 -0
  50. ape_framework-0.1.2/LICENSE +0 -0
  51. ape_framework-0.1.2/ape/__init__.py +0 -27
  52. ape_framework-0.1.2/ape/adapter.py +0 -11
  53. ape_framework-0.1.2/ape/checker.py +0 -39
  54. ape_framework-0.1.2/ape/data/__init__.py +0 -19
  55. ape_framework-0.1.2/ape/data/data.py +0 -122
  56. ape_framework-0.1.2/ape/data/data_provider.py +0 -123
  57. ape_framework-0.1.2/ape/llm/__init__.py +0 -12
  58. ape_framework-0.1.2/ape/llm/gemini_model.py +0 -35
  59. ape_framework-0.1.2/ape/llm/llm_solver.py +0 -121
  60. ape_framework-0.1.2/ape/llm/model.py +0 -23
  61. ape_framework-0.1.2/ape/report.py +0 -21
  62. ape_framework-0.1.2/ape/runner/__init__.py +0 -19
  63. ape_framework-0.1.2/ape/runner/checker_worker.py +0 -54
  64. ape_framework-0.1.2/ape/runner/monitor.py +0 -149
  65. ape_framework-0.1.2/ape/runner/output.py +0 -86
  66. ape_framework-0.1.2/ape/runner/runner.py +0 -188
  67. ape_framework-0.1.2/ape/runner/solver_worker.py +0 -256
  68. ape_framework-0.1.2/ape/solver.py +0 -46
  69. ape_framework-0.1.2/ape_framework.egg-info/SOURCES.txt +0 -30
  70. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/README.md +0 -0
  71. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/ape_framework.egg-info/dependency_links.txt +0 -0
  72. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/ape_framework.egg-info/top_level.txt +0 -0
  73. {ape_framework-0.1.2 → ape_framework-2.0.0.dev1}/setup.cfg +0 -0
@@ -0,0 +1,14 @@
1
+ APE (Algebra Problems Evaluator) is a framework for building and running benchmarks that evaluate LLMs on their ability to solve algebra problems.
2
+ Copyright (C) 2025-2026 Adrian Boguszewski, Kacper Grzybowski, Maciej Teterycz, Mateusz Kasprzak
3
+
4
+ This program is free software: you can redistribute it and/or modify
5
+ it under the terms of the GNU General Public License as published by
6
+ the Free Software Foundation, either version 3 of the License, or any later version.
7
+
8
+ This program is distributed in the hope that it will be useful,
9
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ GNU General Public License for more details.
12
+
13
+ You should have received a copy of the GNU General Public License
14
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
@@ -1,12 +1,35 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ape-framework
3
- Version: 0.1.2
3
+ Version: 2.0.0.dev1
4
4
  Summary: Package for evaluating algebra problems using AI systems.
5
+ License: APE (Algebra Problems Evaluator) is a framework for building and running benchmarks that evaluate LLMs on their ability to solve algebra problems.
6
+ Copyright (C) 2025-2026 Adrian Boguszewski, Kacper Grzybowski, Maciej Teterycz, Mateusz Kasprzak
7
+
8
+ This program is free software: you can redistribute it and/or modify
9
+ it under the terms of the GNU General Public License as published by
10
+ the Free Software Foundation, either version 3 of the License, or any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
5
19
  Classifier: Programming Language :: Python :: 3
6
20
  Classifier: Operating System :: OS Independent
7
21
  Requires-Python: >=3.12
8
22
  Description-Content-Type: text/markdown
9
23
  License-File: LICENSE
24
+ Requires-Dist: langchain
25
+ Requires-Dist: langgraph
26
+ Requires-Dist: rich
27
+ Requires-Dist: jsonpickle
28
+ Requires-Dist: docker
29
+ Requires-Dist: psycopg[binary,pool]
30
+ Requires-Dist: pglast
31
+ Requires-Dist: tiktoken
32
+ Requires-Dist: pydantic
10
33
  Dynamic: license-file
11
34
 
12
35
  # Algebra Problems Evaluator (APE)
@@ -0,0 +1,50 @@
1
+ import logging
2
+
3
+ from .adapter import SolutionAdapter
4
+ from .checker import Checker, CheckerFactory, CheckResult
5
+ from .cli import CLI
6
+ from .llm import EntryKind, Session, Solver, SolverConfig
7
+ from .report import Report
8
+ from .run import run
9
+ from .runner import (
10
+ CheckerState,
11
+ CheckerStatus,
12
+ NullObserver,
13
+ OutputBatch,
14
+ RunObserver,
15
+ RunParams,
16
+ SolverState,
17
+ SolverStatus,
18
+ WorkerSnapshot,
19
+ )
20
+ from .runner.runner import Runner
21
+ from .visualizer import Visualizer
22
+
23
+ # Prevent logging if the user did not configure it
24
+ logger = logging.getLogger(__name__)
25
+ logger.addHandler(logging.NullHandler())
26
+
27
+ __all__ = [
28
+ "SolutionAdapter",
29
+ "Checker",
30
+ "CheckerFactory",
31
+ "CheckResult",
32
+ "CLI",
33
+ "Report",
34
+ "Visualizer",
35
+ "Runner",
36
+ "RunParams",
37
+ "Solver",
38
+ "SolverConfig",
39
+ "OutputBatch",
40
+ "RunObserver",
41
+ "NullObserver",
42
+ "WorkerSnapshot",
43
+ "SolverState",
44
+ "CheckerState",
45
+ "SolverStatus",
46
+ "CheckerStatus",
47
+ "Session",
48
+ "EntryKind",
49
+ "run",
50
+ ]
@@ -0,0 +1,10 @@
1
+ """Allow ``python -m ape <config.toml>`` as the package entry point.
2
+
3
+ This avoids the ``RuntimeWarning`` that ``python -m ape.run`` triggers (because
4
+ ``ape/__init__.py`` imports ``ape.run``); ``python -m ape.run`` still works.
5
+ """
6
+
7
+ from .run import main
8
+
9
+ if __name__ == "__main__":
10
+ main()
@@ -0,0 +1,3 @@
1
+ from typing import Callable
2
+
3
+ type SolutionAdapter[SolutionT] = Callable[[str], SolutionT]
@@ -0,0 +1,11 @@
1
+ from .checker import Checker, CheckerFactory, CheckResult, Failed, Passed
2
+ from .orchestrator import CheckerOrchestrator
3
+
4
+ __all__ = [
5
+ "Checker",
6
+ "CheckerFactory",
7
+ "CheckResult",
8
+ "Failed",
9
+ "Passed",
10
+ "CheckerOrchestrator",
11
+ ]
@@ -0,0 +1,32 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass, field
3
+ from datetime import timedelta
4
+ from typing import Any, Callable, Optional
5
+
6
+ from ..data import DataModel
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class Passed:
11
+ runtime: Optional[timedelta] = None
12
+ metadata: dict[str, Any] = field(default_factory=dict)
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class Failed:
17
+ reason: str
18
+ runtime: Optional[timedelta] = None
19
+ metadata: dict[str, Any] = field(default_factory=dict)
20
+
21
+
22
+ type CheckResult = Passed | Failed
23
+
24
+
25
+ class Checker[DataT: DataModel, SolutionT](ABC):
26
+ @abstractmethod
27
+ def check(self, data: DataT, solution: SolutionT) -> CheckResult: ...
28
+
29
+
30
+ type CheckerFactory[DataT: DataModel, SolutionT] = Callable[
31
+ [], Checker[DataT, SolutionT]
32
+ ]
@@ -0,0 +1,92 @@
1
+ import logging
2
+ from concurrent.futures.thread import ThreadPoolExecutor
3
+ from queue import Queue
4
+ from threading import Event
5
+
6
+ from ..data import DataModel
7
+ from ..runner.monitor import (
8
+ CheckerState,
9
+ CheckerStatus,
10
+ WorkerStatusTracker,
11
+ )
12
+ from .checker import Checker, CheckerFactory, CheckResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class CheckerOrchestrator[DataT: DataModel, SolutionT]:
18
+
19
+ def __init__(
20
+ self,
21
+ checker_factory: CheckerFactory[DataT, SolutionT],
22
+ checker_workers: int,
23
+ tracker: WorkerStatusTracker,
24
+ retry_timeout: int = 10,
25
+ ) -> None:
26
+ self._tracker = tracker
27
+ self._retry_timeout = retry_timeout
28
+
29
+ self._cancel = Event()
30
+ self._executor = ThreadPoolExecutor(max_workers=checker_workers)
31
+ self._checker_carousel: Queue[tuple[Checker[DataT, SolutionT], int]] = Queue()
32
+ for checker_id in range(checker_workers):
33
+ self._checker_carousel.put((checker_factory(), checker_id))
34
+
35
+ def check(self, data: DataT, solution: SolutionT, solver_id: int) -> CheckResult:
36
+ # This check is TOCTOU, but it doesn't matter - best effort to not put any new futures into the pool
37
+ if self._cancel.is_set():
38
+ raise InterruptedError
39
+
40
+ fut = self._executor.submit(self._checker_worker, data, solution, solver_id)
41
+ while True:
42
+ try:
43
+ check_result: CheckResult = fut.result(timeout=self._retry_timeout)
44
+ return check_result
45
+ except TimeoutError:
46
+ if self._cancel.is_set():
47
+ raise InterruptedError
48
+ continue
49
+ except Exception:
50
+ logger.exception(
51
+ "Checker orchestrator caught an exception from checker worker, propagating."
52
+ )
53
+ raise
54
+ raise RuntimeError("unreachable") # Make PyCharm happy
55
+
56
+ def stop(self) -> None:
57
+ self._cancel.set()
58
+ self._executor.shutdown(cancel_futures=True)
59
+
60
+ def _checker_worker(
61
+ self, data: DataT, solution: SolutionT, solver_id: int
62
+ ) -> CheckResult:
63
+ checker, checker_id = self._checker_carousel.get()
64
+
65
+ logger.debug("Checker worker started, got checker with id %d.", checker_id)
66
+ self._tracker.update_checker(
67
+ CheckerStatus(
68
+ id=checker_id,
69
+ state=CheckerState.BUSY,
70
+ data_id=data.data_id,
71
+ solver_id=solver_id,
72
+ )
73
+ )
74
+
75
+ try:
76
+ check_result: CheckResult = checker.check(data, solution)
77
+ except Exception:
78
+ logger.exception("Checker worker encountered an unexpected exception.")
79
+ raise
80
+ finally:
81
+ self._tracker.update_checker(
82
+ CheckerStatus(
83
+ id=checker_id,
84
+ state=CheckerState.IDLE,
85
+ data_id=None,
86
+ solver_id=None,
87
+ )
88
+ )
89
+ self._checker_carousel.put((checker, checker_id))
90
+
91
+ logger.debug("Checker worker for checker with id %d has finished.", checker_id)
92
+ return check_result
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import logging.config
5
4
  from collections import deque
6
5
  from contextlib import AbstractContextManager
7
6
  from datetime import datetime
@@ -10,7 +9,7 @@ from types import TracebackType
10
9
  from typing import Any, Callable, Literal, Optional
11
10
 
12
11
  from rich import box
13
- from rich.console import Console
12
+ from rich.console import Console, ConsoleOptions, RenderResult
14
13
  from rich.layout import Layout
15
14
  from rich.live import Live
16
15
  from rich.panel import Panel
@@ -24,8 +23,22 @@ from rich.progress import (
24
23
  TimeElapsedColumn,
25
24
  )
26
25
  from rich.table import Table
26
+ from rich.text import Text
27
27
 
28
- from .runner import Runner
28
+ from .llm.session_log import configure_session_log_file
29
+ from .runner.monitor import CheckerStatus, SolverStatus, WorkerSnapshot
30
+
31
+
32
+ class _DashboardRenderable:
33
+ """Thin wrapper so Live's refresh thread calls _render_dashboard each cycle."""
34
+
35
+ def __init__(self, cli: CLI) -> None:
36
+ self._cli = cli
37
+
38
+ def __rich_console__(
39
+ self, console: Console, options: ConsoleOptions
40
+ ) -> RenderResult:
41
+ yield self._cli._render_dashboard()
29
42
 
30
43
 
31
44
  class LogBufferHandler(logging.Handler):
@@ -35,10 +48,12 @@ class LogBufferHandler(logging.Handler):
35
48
 
36
49
  def emit(self, record: logging.LogRecord) -> None:
37
50
  try:
51
+ # Show only the last segment of the logger name to save space
52
+ short_name = record.name.rsplit(".", 1)[-1]
38
53
  entry = {
39
54
  "time": datetime.fromtimestamp(record.created).strftime("%H:%M:%S"),
40
55
  "level": record.levelname,
41
- "logger": record.name,
56
+ "logger": short_name,
42
57
  "message": record.getMessage(),
43
58
  }
44
59
  self._add_entry(entry)
@@ -47,20 +62,27 @@ class LogBufferHandler(logging.Handler):
47
62
 
48
63
 
49
64
  class CLI(AbstractContextManager):
65
+ """Rich-based dashboard. Implements ``RunObserver`` — pass the CLI
66
+ instance straight to ``Runner.run(observer=...)``::
67
+
68
+ with CLI(...) as cli:
69
+ Runner.run(..., observer=cli)
70
+ """
71
+
50
72
  def __init__(
51
73
  self,
52
- runner: Runner,
53
74
  *,
54
75
  log_file: Path | str = Path("output/run.log"),
76
+ session_log_file: Path | str = Path("output/sessions.log"),
55
77
  log_level: int | str = logging.INFO,
56
78
  ) -> None:
57
- self._runner = runner
58
79
  self._log_file = Path(log_file)
80
+ self._session_log_file = Path(session_log_file)
59
81
  self._log_level = self._resolve_log_level(log_level)
60
82
  self._progress_task_id: Optional[TaskID] = None
61
83
  self._log_buffer: deque[dict[str, str]] = deque(maxlen=50)
62
- self._solver_statuses: list[dict[str, Any]] = []
63
- self._checker_statuses: list[dict[str, Any]] = []
84
+ self._solver_statuses: list[SolverStatus] = []
85
+ self._checker_statuses: list[CheckerStatus] = []
64
86
 
65
87
  def __enter__(self) -> CLI:
66
88
  self._setup_logging()
@@ -81,14 +103,13 @@ class CLI(AbstractContextManager):
81
103
  self._log_handler.setLevel(self._log_level)
82
104
  logging.getLogger().addHandler(self._log_handler)
83
105
  self._live = Live(
84
- self._render_dashboard(),
106
+ _DashboardRenderable(self),
85
107
  console=self._console,
86
108
  refresh_per_second=8,
87
- transient=False,
109
+ screen=True,
88
110
  )
89
111
  self._live.start()
90
- self._progress.start()
91
- self._refresh_dashboard()
112
+ self._logger.info("CLI dashboard started")
92
113
  return self
93
114
 
94
115
  def __exit__(
@@ -97,69 +118,54 @@ class CLI(AbstractContextManager):
97
118
  exc: BaseException | None,
98
119
  tb: TracebackType | None,
99
120
  ) -> Literal[False]:
100
- if self._live:
101
- self._live.stop()
102
- if self._progress:
103
- self._progress.stop()
104
- if self._log_handler:
105
- logging.getLogger().removeHandler(self._log_handler)
106
- self._log_handler.close()
121
+ live = getattr(self, "_live", None)
122
+ if live is not None:
123
+ live.stop()
124
+ self._teardown_logging()
107
125
  return False
108
126
 
109
- def run(self, *args: Any, **kwargs: Any) -> Any:
110
- self._logger.info("CLI runner starting")
111
-
112
- forwarded = dict(kwargs)
113
- forwarded.setdefault("progress_hook", self._progress_hook)
114
- forwarded.setdefault("worker_status_hook", self._status_hook)
115
-
116
- result = self._runner.run(*args, **forwarded)
117
-
118
- self._logger.info("CLI runner finished")
119
- return result
127
+ # RunObserver interface
120
128
 
121
- def _progress_hook(self, done: int, total: int) -> None:
122
- # Initialize task on first call
129
+ def on_progress(self, done: int, total: int) -> None:
123
130
  if self._progress_task_id is None:
124
- description = "Processing data"
125
- self._progress_task_id = self._progress.add_task(description, total=total)
126
-
127
- if self._progress_task_id is not None:
128
- self._progress.update(self._progress_task_id, completed=done, total=total)
129
- self._refresh_dashboard()
130
-
131
- def _status_hook(self, snapshot: dict[str, list[dict[str, Any]]]) -> None:
132
- self._solver_statuses = snapshot.get("solvers", [])
133
- self._checker_statuses = snapshot.get("checkers", [])
134
- self._refresh_dashboard()
131
+ self._progress_task_id = self._progress.add_task(
132
+ "Processing data", total=total
133
+ )
134
+ self._progress.update(self._progress_task_id, completed=done, total=total)
135
+
136
+ def on_worker_status(self, snapshot: WorkerSnapshot) -> None:
137
+ self._solver_statuses = list(snapshot.solvers)
138
+ self._checker_statuses = list(snapshot.checkers)
139
+
140
+ # Internal
141
+
142
+ def _teardown_logging(self) -> None:
143
+ root = logging.getLogger()
144
+ log_handler = getattr(self, "_log_handler", None)
145
+ if log_handler is not None:
146
+ root.removeHandler(log_handler)
147
+ log_handler.close()
148
+ file_handler = getattr(self, "_file_handler", None)
149
+ if file_handler is not None:
150
+ root.removeHandler(file_handler)
151
+ file_handler.close()
152
+ teardown_session_log = getattr(self, "_teardown_session_log", None)
153
+ if teardown_session_log is not None:
154
+ teardown_session_log()
135
155
 
136
156
  def _setup_logging(self) -> None:
137
157
  self._log_file.parent.mkdir(parents=True, exist_ok=True)
138
158
 
139
- logging.config.dictConfig(
140
- {
141
- "version": 1,
142
- "disable_existing_loggers": False,
143
- "formatters": {
144
- "default": {
145
- "format": "%(asctime)s %(name)s %(levelname)s %(message)s",
146
- }
147
- },
148
- "handlers": {
149
- "file": {
150
- "class": "logging.FileHandler",
151
- "level": self._log_level,
152
- "formatter": "default",
153
- "filename": str(self._log_file),
154
- "encoding": "utf-8",
155
- }
156
- },
157
- "root": {
158
- "level": self._log_level,
159
- "handlers": ["file"],
160
- },
161
- }
159
+ self._file_handler = logging.FileHandler(self._log_file, encoding="utf-8")
160
+ self._file_handler.setLevel(self._log_level)
161
+ self._file_handler.setFormatter(
162
+ logging.Formatter("%(asctime)s %(name)s %(levelname)s %(message)s")
162
163
  )
164
+ root = logging.getLogger()
165
+ root.setLevel(self._log_level)
166
+ root.addHandler(self._file_handler)
167
+
168
+ self._teardown_session_log = configure_session_log_file(self._session_log_file)
163
169
 
164
170
  @staticmethod
165
171
  def _resolve_log_level(level: int | str) -> int:
@@ -180,14 +186,6 @@ class CLI(AbstractContextManager):
180
186
 
181
187
  def _add_log_entry(self, entry: dict[str, str]) -> None:
182
188
  self._log_buffer.append(entry)
183
- self._refresh_dashboard()
184
-
185
- def _refresh_dashboard(self) -> None:
186
- try:
187
- self._live.update(self._render_dashboard(), refresh=True)
188
- except Exception:
189
- # Avoid crashing the run due to rendering issues.
190
- return
191
189
 
192
190
  def _render_dashboard(self) -> Layout:
193
191
  layout = Layout(name="root")
@@ -199,11 +197,11 @@ class CLI(AbstractContextManager):
199
197
  return layout
200
198
 
201
199
  def _render_progress_panel(self) -> Panel:
202
- if self._progress is None or self._progress_task_id is None:
203
- body: Any = "Waiting for progress updates..."
204
- else:
205
- body = self._progress
206
-
200
+ body: Any = (
201
+ self._progress
202
+ if self._progress_task_id is not None
203
+ else "Waiting for progress updates..."
204
+ )
207
205
  return Panel(body, title="Run Progress", box=box.SIMPLE)
208
206
 
209
207
  def _render_workers_panel(self) -> Panel:
@@ -225,13 +223,13 @@ class CLI(AbstractContextManager):
225
223
  if not self._solver_statuses:
226
224
  solvers.add_row("-", "-", "-", "-", "-")
227
225
  else:
228
- for st in self._solver_statuses:
226
+ for solver_st in self._solver_statuses:
229
227
  solvers.add_row(
230
- str(st.get("id", "-")),
231
- str(st.get("state", "-")),
232
- str(st.get("data_id", "-")),
233
- f"{st.get('curr_run', '-')}/{st.get('total_runs', '-')}",
234
- str(st.get("session_length", "-")),
228
+ str(solver_st.id),
229
+ str(solver_st.state),
230
+ str(solver_st.data_id) if solver_st.data_id is not None else "-",
231
+ f"{solver_st.curr_run}/{solver_st.total_runs}",
232
+ str(solver_st.session_length),
235
233
  )
236
234
 
237
235
  checkers = Table(
@@ -251,12 +249,16 @@ class CLI(AbstractContextManager):
251
249
  if not self._checker_statuses:
252
250
  checkers.add_row("-", "-", "-", "-")
253
251
  else:
254
- for st in self._checker_statuses:
252
+ for checker_st in self._checker_statuses:
255
253
  checkers.add_row(
256
- str(st.get("id", "-")),
257
- str(st.get("state", "-")),
258
- str(st.get("data_id", "-")),
259
- str(st.get("solver_id", "-")),
254
+ str(checker_st.id),
255
+ str(checker_st.state),
256
+ str(checker_st.data_id) if checker_st.data_id is not None else "-",
257
+ (
258
+ str(checker_st.solver_id)
259
+ if checker_st.solver_id is not None
260
+ else "-"
261
+ ),
260
262
  )
261
263
 
262
264
  grid = Table.grid(expand=True)
@@ -276,23 +278,20 @@ class CLI(AbstractContextManager):
276
278
  )
277
279
  table.add_column("Time", style="dim", width=8, no_wrap=True)
278
280
  table.add_column("Level", width=8, no_wrap=True)
279
- table.add_column("Logger", style="dim", width=20, no_wrap=True)
281
+ table.add_column("Logger", style="dim", width=14, no_wrap=True)
280
282
  table.add_column("Message", overflow="fold")
281
283
 
282
284
  if not self._log_buffer:
283
285
  table.add_row("-", "-", "-", "Waiting for logs...")
284
286
  else:
285
- max_rows = 6
286
- if self._console is not None:
287
- max_rows = max(6, min(30, self._console.size.height - 12))
287
+ max_rows = max(6, min(30, self._console.size.height - 12))
288
288
 
289
289
  for entry in reversed(list(self._log_buffer)[-max_rows:]):
290
290
  table.add_row(
291
291
  entry.get("time", ""),
292
292
  entry.get("level", ""),
293
293
  entry.get("logger", ""),
294
- entry.get("message", ""),
294
+ Text(entry.get("message", "")),
295
295
  )
296
- table.add_section()
297
296
 
298
297
  return Panel(table, title="Latest Logs", box=box.SIMPLE, padding=(0, 0))
@@ -8,10 +8,8 @@ from pathlib import Path
8
8
 
9
9
 
10
10
  def _merge_batches(dst: dict, src: dict) -> None:
11
- dst["solutions"].extend(src.get("solutions", []))
12
- dst["solution_runtimes"].extend(src.get("solution_runtimes", []))
13
- dst["solution_total_times"].extend(src.get("solution_total_times", []))
14
- dst["checker_outputs"].extend(src.get("checker_outputs", []))
11
+ dst["_solver_results"].extend(src.get("_solver_results", []))
12
+ dst["_check_results"].extend(src.get("_check_results", []))
15
13
 
16
14
 
17
15
  def main() -> None:
@@ -59,7 +57,10 @@ def main() -> None:
59
57
  continue
60
58
  try:
61
59
  output = json.loads(line)
62
- data_id = output["input_data"]["_Data__id"]
60
+ # _input_data is a jsonpickle-encoded DataModel; its full
61
+ # serialization is the merge key (the schema's id field
62
+ # name is not known at this layer).
63
+ data_id = json.dumps(output["_input_data"], sort_keys=True)
63
64
  if data_id in merged_data:
64
65
  _merge_batches(merged_data[data_id], output)
65
66
  else:
@@ -0,0 +1,10 @@
1
+ from .data import DataModel, id_field
2
+ from .data_provider import CSVDataProvider, DataProvider, JSONLDataProvider
3
+
4
+ __all__ = [
5
+ "DataModel",
6
+ "id_field",
7
+ "DataProvider",
8
+ "CSVDataProvider",
9
+ "JSONLDataProvider",
10
+ ]