chuk-puzzles-gym 0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_puzzles_gym/__init__.py +19 -0
- chuk_puzzles_gym/constants.py +9 -0
- chuk_puzzles_gym/eval.py +763 -0
- chuk_puzzles_gym/export/__init__.py +20 -0
- chuk_puzzles_gym/export/dataset.py +376 -0
- chuk_puzzles_gym/games/__init__.py +94 -0
- chuk_puzzles_gym/games/_base/__init__.py +6 -0
- chuk_puzzles_gym/games/_base/commands.py +91 -0
- chuk_puzzles_gym/games/_base/game.py +337 -0
- chuk_puzzles_gym/games/binary/__init__.py +6 -0
- chuk_puzzles_gym/games/binary/config.py +23 -0
- chuk_puzzles_gym/games/binary/game.py +434 -0
- chuk_puzzles_gym/games/bridges/__init__.py +6 -0
- chuk_puzzles_gym/games/bridges/config.py +24 -0
- chuk_puzzles_gym/games/bridges/game.py +489 -0
- chuk_puzzles_gym/games/einstein/__init__.py +6 -0
- chuk_puzzles_gym/games/einstein/config.py +23 -0
- chuk_puzzles_gym/games/einstein/constants.py +13 -0
- chuk_puzzles_gym/games/einstein/game.py +366 -0
- chuk_puzzles_gym/games/einstein/models.py +35 -0
- chuk_puzzles_gym/games/fillomino/__init__.py +6 -0
- chuk_puzzles_gym/games/fillomino/config.py +24 -0
- chuk_puzzles_gym/games/fillomino/game.py +516 -0
- chuk_puzzles_gym/games/futoshiki/__init__.py +6 -0
- chuk_puzzles_gym/games/futoshiki/config.py +23 -0
- chuk_puzzles_gym/games/futoshiki/game.py +391 -0
- chuk_puzzles_gym/games/hidato/__init__.py +6 -0
- chuk_puzzles_gym/games/hidato/config.py +24 -0
- chuk_puzzles_gym/games/hidato/game.py +403 -0
- chuk_puzzles_gym/games/hitori/__init__.py +6 -0
- chuk_puzzles_gym/games/hitori/config.py +23 -0
- chuk_puzzles_gym/games/hitori/game.py +451 -0
- chuk_puzzles_gym/games/kakuro/__init__.py +6 -0
- chuk_puzzles_gym/games/kakuro/config.py +24 -0
- chuk_puzzles_gym/games/kakuro/game.py +399 -0
- chuk_puzzles_gym/games/kenken/__init__.py +6 -0
- chuk_puzzles_gym/games/kenken/config.py +24 -0
- chuk_puzzles_gym/games/kenken/enums.py +13 -0
- chuk_puzzles_gym/games/kenken/game.py +486 -0
- chuk_puzzles_gym/games/kenken/models.py +15 -0
- chuk_puzzles_gym/games/killer_sudoku/__init__.py +6 -0
- chuk_puzzles_gym/games/killer_sudoku/config.py +23 -0
- chuk_puzzles_gym/games/killer_sudoku/game.py +502 -0
- chuk_puzzles_gym/games/killer_sudoku/models.py +15 -0
- chuk_puzzles_gym/games/knapsack/__init__.py +6 -0
- chuk_puzzles_gym/games/knapsack/config.py +24 -0
- chuk_puzzles_gym/games/knapsack/enums.py +10 -0
- chuk_puzzles_gym/games/knapsack/game.py +340 -0
- chuk_puzzles_gym/games/knapsack/models.py +13 -0
- chuk_puzzles_gym/games/lights_out/__init__.py +6 -0
- chuk_puzzles_gym/games/lights_out/config.py +24 -0
- chuk_puzzles_gym/games/lights_out/game.py +249 -0
- chuk_puzzles_gym/games/logic_grid/__init__.py +6 -0
- chuk_puzzles_gym/games/logic_grid/config.py +24 -0
- chuk_puzzles_gym/games/logic_grid/constants.py +12 -0
- chuk_puzzles_gym/games/logic_grid/game.py +333 -0
- chuk_puzzles_gym/games/logic_grid/models.py +24 -0
- chuk_puzzles_gym/games/mastermind/__init__.py +6 -0
- chuk_puzzles_gym/games/mastermind/config.py +25 -0
- chuk_puzzles_gym/games/mastermind/game.py +297 -0
- chuk_puzzles_gym/games/minesweeper/__init__.py +6 -0
- chuk_puzzles_gym/games/minesweeper/config.py +24 -0
- chuk_puzzles_gym/games/minesweeper/enums.py +12 -0
- chuk_puzzles_gym/games/minesweeper/game.py +432 -0
- chuk_puzzles_gym/games/nonogram/__init__.py +6 -0
- chuk_puzzles_gym/games/nonogram/config.py +23 -0
- chuk_puzzles_gym/games/nonogram/game.py +296 -0
- chuk_puzzles_gym/games/nurikabe/__init__.py +6 -0
- chuk_puzzles_gym/games/nurikabe/config.py +24 -0
- chuk_puzzles_gym/games/nurikabe/enums.py +14 -0
- chuk_puzzles_gym/games/nurikabe/game.py +586 -0
- chuk_puzzles_gym/games/scheduler/__init__.py +6 -0
- chuk_puzzles_gym/games/scheduler/config.py +25 -0
- chuk_puzzles_gym/games/scheduler/constants.py +15 -0
- chuk_puzzles_gym/games/scheduler/enums.py +10 -0
- chuk_puzzles_gym/games/scheduler/game.py +431 -0
- chuk_puzzles_gym/games/scheduler/models.py +14 -0
- chuk_puzzles_gym/games/shikaku/__init__.py +6 -0
- chuk_puzzles_gym/games/shikaku/config.py +24 -0
- chuk_puzzles_gym/games/shikaku/game.py +419 -0
- chuk_puzzles_gym/games/slitherlink/__init__.py +6 -0
- chuk_puzzles_gym/games/slitherlink/config.py +23 -0
- chuk_puzzles_gym/games/slitherlink/game.py +386 -0
- chuk_puzzles_gym/games/sokoban/__init__.py +6 -0
- chuk_puzzles_gym/games/sokoban/config.py +24 -0
- chuk_puzzles_gym/games/sokoban/game.py +671 -0
- chuk_puzzles_gym/games/star_battle/__init__.py +6 -0
- chuk_puzzles_gym/games/star_battle/config.py +24 -0
- chuk_puzzles_gym/games/star_battle/game.py +390 -0
- chuk_puzzles_gym/games/sudoku/__init__.py +7 -0
- chuk_puzzles_gym/games/sudoku/commands.py +96 -0
- chuk_puzzles_gym/games/sudoku/config.py +22 -0
- chuk_puzzles_gym/games/sudoku/game.py +328 -0
- chuk_puzzles_gym/games/tents/__init__.py +6 -0
- chuk_puzzles_gym/games/tents/config.py +24 -0
- chuk_puzzles_gym/games/tents/game.py +416 -0
- chuk_puzzles_gym/gym_env.py +465 -0
- chuk_puzzles_gym/models/__init__.py +47 -0
- chuk_puzzles_gym/models/base.py +30 -0
- chuk_puzzles_gym/models/config.py +11 -0
- chuk_puzzles_gym/models/enums.py +104 -0
- chuk_puzzles_gym/models/evaluation.py +487 -0
- chuk_puzzles_gym/models/games.py +12 -0
- chuk_puzzles_gym/server.py +1171 -0
- chuk_puzzles_gym/trace/__init__.py +10 -0
- chuk_puzzles_gym/trace/generator.py +726 -0
- chuk_puzzles_gym/utils/__init__.py +4 -0
- chuk_puzzles_gym-0.9.dist-info/METADATA +1471 -0
- chuk_puzzles_gym-0.9.dist-info/RECORD +112 -0
- chuk_puzzles_gym-0.9.dist-info/WHEEL +5 -0
- chuk_puzzles_gym-0.9.dist-info/entry_points.txt +4 -0
- chuk_puzzles_gym-0.9.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
"""Evaluation and scoring models for the Puzzle Arcade server.
|
|
2
|
+
|
|
3
|
+
These models support the standardised evaluation schema for benchmarking
|
|
4
|
+
agent performance across puzzles.
|
|
5
|
+
|
|
6
|
+
Re-exports core types from chuk-gym-core for convenience.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import time
|
|
11
|
+
import uuid
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, TextIO
|
|
15
|
+
|
|
16
|
+
# Import core types from chuk-gym-core
|
|
17
|
+
from chuk_gym_core import (
|
|
18
|
+
SolverConfig,
|
|
19
|
+
)
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
|
21
|
+
|
|
22
|
+
from .enums import DifficultyLevel, EpisodeStatus
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MoveRecord(BaseModel):
|
|
26
|
+
"""Record of a single move in an episode for step-level analysis."""
|
|
27
|
+
|
|
28
|
+
model_config = ConfigDict(frozen=True)
|
|
29
|
+
|
|
30
|
+
step: int = Field(ge=0, description="Step number in episode (0-indexed)")
|
|
31
|
+
action: str = Field(description="Action taken (e.g., 'place 1 5 7')")
|
|
32
|
+
success: bool = Field(description="Whether the move was valid")
|
|
33
|
+
advances_solution: bool = Field(
|
|
34
|
+
default=True,
|
|
35
|
+
description="Whether this move advances toward solution (not a backtrack)",
|
|
36
|
+
)
|
|
37
|
+
hint_used: bool = Field(default=False, description="Whether this move came from a hint")
|
|
38
|
+
timestamp_ms: int = Field(default=0, description="Milliseconds since episode start")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class EpisodeResult(BaseModel):
|
|
42
|
+
"""Complete result of a single puzzle episode with normalized metrics.
|
|
43
|
+
|
|
44
|
+
This is the core output format that turns Puzzle Arcade into:
|
|
45
|
+
- A benchmark (comparable scores)
|
|
46
|
+
- An RL environment (reward signals)
|
|
47
|
+
- A regression test suite for agents
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
model_config = ConfigDict(frozen=True)
|
|
51
|
+
|
|
52
|
+
# Identity
|
|
53
|
+
game: str = Field(description="Game identifier (e.g., 'sudoku')")
|
|
54
|
+
difficulty: DifficultyLevel = Field(description="Difficulty level")
|
|
55
|
+
seed: int = Field(description="Reproducible puzzle seed")
|
|
56
|
+
|
|
57
|
+
# Timing
|
|
58
|
+
started_at: datetime = Field(description="Episode start timestamp")
|
|
59
|
+
ended_at: datetime = Field(description="Episode end timestamp")
|
|
60
|
+
wall_time_ms: int = Field(ge=0, description="Total wall clock time in milliseconds")
|
|
61
|
+
|
|
62
|
+
# Outcome
|
|
63
|
+
status: EpisodeStatus = Field(description="Final episode status")
|
|
64
|
+
|
|
65
|
+
# Raw metrics
|
|
66
|
+
steps_taken: int = Field(ge=0, description="Total valid moves made")
|
|
67
|
+
invalid_actions: int = Field(ge=0, description="Rejected/invalid moves")
|
|
68
|
+
hints_used: int = Field(ge=0, description="Solver hints consumed")
|
|
69
|
+
retries: int = Field(
|
|
70
|
+
default=0,
|
|
71
|
+
ge=0,
|
|
72
|
+
description="Attempts on same cell (backtracking indicator)",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Ground truth reference (if available)
|
|
76
|
+
optimal_steps: int | None = Field(
|
|
77
|
+
default=None,
|
|
78
|
+
ge=1,
|
|
79
|
+
description="Minimum steps to solve (from solver)",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Solver configuration used
|
|
83
|
+
solver_config: SolverConfig = Field(
|
|
84
|
+
default_factory=SolverConfig,
|
|
85
|
+
description="Solver/hint configuration for this episode",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Optional step-level trace
|
|
89
|
+
move_history: list[MoveRecord] = Field(
|
|
90
|
+
default_factory=list,
|
|
91
|
+
description="Complete move history for detailed analysis",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Computed normalized metrics
|
|
95
|
+
@computed_field
|
|
96
|
+
@property
|
|
97
|
+
def success(self) -> bool:
|
|
98
|
+
"""Whether the puzzle was solved."""
|
|
99
|
+
return self.status == EpisodeStatus.SOLVED
|
|
100
|
+
|
|
101
|
+
@computed_field
|
|
102
|
+
@property
|
|
103
|
+
def efficiency_score(self) -> float:
|
|
104
|
+
"""Ratio of optimal steps to actual steps (1.0 = optimal).
|
|
105
|
+
|
|
106
|
+
Returns 0.0 if puzzle not solved or optimal_steps unknown.
|
|
107
|
+
"""
|
|
108
|
+
if not self.success or self.optimal_steps is None or self.steps_taken == 0:
|
|
109
|
+
return 0.0
|
|
110
|
+
return min(1.0, self.optimal_steps / self.steps_taken)
|
|
111
|
+
|
|
112
|
+
@computed_field
|
|
113
|
+
@property
|
|
114
|
+
def error_rate(self) -> float:
|
|
115
|
+
"""Ratio of invalid actions to total actions."""
|
|
116
|
+
total = self.steps_taken + self.invalid_actions
|
|
117
|
+
if total == 0:
|
|
118
|
+
return 0.0
|
|
119
|
+
return self.invalid_actions / total
|
|
120
|
+
|
|
121
|
+
@computed_field
|
|
122
|
+
@property
|
|
123
|
+
def error_recovery_rate(self) -> float:
|
|
124
|
+
"""Ratio of successful corrections after errors.
|
|
125
|
+
|
|
126
|
+
Approximated as: if we had errors but still solved, we recovered.
|
|
127
|
+
More accurate tracking requires move_history analysis.
|
|
128
|
+
"""
|
|
129
|
+
if self.invalid_actions == 0:
|
|
130
|
+
return 1.0 # No errors to recover from
|
|
131
|
+
if not self.success:
|
|
132
|
+
return 0.0 # Failed to recover
|
|
133
|
+
# Approximation: solved despite errors
|
|
134
|
+
return 1.0 - self.error_rate
|
|
135
|
+
|
|
136
|
+
@computed_field
|
|
137
|
+
@property
|
|
138
|
+
def hint_dependency(self) -> float:
|
|
139
|
+
"""Ratio of moves that came from hints (tool dependency)."""
|
|
140
|
+
if self.steps_taken == 0:
|
|
141
|
+
return 0.0
|
|
142
|
+
return min(1.0, self.hints_used / self.steps_taken)
|
|
143
|
+
|
|
144
|
+
@computed_field
|
|
145
|
+
@property
|
|
146
|
+
def adjusted_score(self) -> float:
|
|
147
|
+
"""Final score accounting for efficiency and hint penalties.
|
|
148
|
+
|
|
149
|
+
Score = efficiency_score * (1 - hint_penalty * hint_dependency)
|
|
150
|
+
"""
|
|
151
|
+
base_score = self.efficiency_score
|
|
152
|
+
penalty = self.solver_config.hint_penalty * self.hint_dependency
|
|
153
|
+
return max(0.0, base_score * (1 - penalty))
|
|
154
|
+
|
|
155
|
+
def to_summary_dict(self) -> dict[str, Any]:
|
|
156
|
+
"""One-line episode summary for logging/streaming."""
|
|
157
|
+
return {
|
|
158
|
+
"game": self.game,
|
|
159
|
+
"seed": self.seed,
|
|
160
|
+
"difficulty": self.difficulty.value,
|
|
161
|
+
"success": self.success,
|
|
162
|
+
"steps": self.steps_taken,
|
|
163
|
+
"invalid": self.invalid_actions,
|
|
164
|
+
"hints": self.hints_used,
|
|
165
|
+
"efficiency": round(self.efficiency_score, 3),
|
|
166
|
+
"time_ms": self.wall_time_ms,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
def to_jsonl(self) -> str:
|
|
170
|
+
"""Single-line JSON for streaming output."""
|
|
171
|
+
import json
|
|
172
|
+
|
|
173
|
+
return json.dumps(self.to_summary_dict())
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class EvaluationSummary(BaseModel):
|
|
177
|
+
"""Aggregated summary of multiple episodes for a game/difficulty."""
|
|
178
|
+
|
|
179
|
+
model_config = ConfigDict(frozen=True)
|
|
180
|
+
|
|
181
|
+
game: str
|
|
182
|
+
difficulty: DifficultyLevel
|
|
183
|
+
total_episodes: int = Field(ge=0)
|
|
184
|
+
solved_count: int = Field(ge=0)
|
|
185
|
+
episodes: list[EpisodeResult] = Field(default_factory=list)
|
|
186
|
+
|
|
187
|
+
@computed_field
|
|
188
|
+
@property
|
|
189
|
+
def solve_rate(self) -> float:
|
|
190
|
+
"""Fraction of episodes solved."""
|
|
191
|
+
if self.total_episodes == 0:
|
|
192
|
+
return 0.0
|
|
193
|
+
return self.solved_count / self.total_episodes
|
|
194
|
+
|
|
195
|
+
@computed_field
|
|
196
|
+
@property
|
|
197
|
+
def avg_steps(self) -> float:
|
|
198
|
+
"""Average steps taken across all episodes."""
|
|
199
|
+
if not self.episodes:
|
|
200
|
+
return 0.0
|
|
201
|
+
return sum(e.steps_taken for e in self.episodes) / len(self.episodes)
|
|
202
|
+
|
|
203
|
+
@computed_field
|
|
204
|
+
@property
|
|
205
|
+
def avg_efficiency(self) -> float:
|
|
206
|
+
"""Average efficiency score across solved episodes."""
|
|
207
|
+
solved = [e for e in self.episodes if e.success]
|
|
208
|
+
if not solved:
|
|
209
|
+
return 0.0
|
|
210
|
+
return sum(e.efficiency_score for e in solved) / len(solved)
|
|
211
|
+
|
|
212
|
+
@computed_field
|
|
213
|
+
@property
|
|
214
|
+
def avg_time_ms(self) -> float:
|
|
215
|
+
"""Average wall time across all episodes."""
|
|
216
|
+
if not self.episodes:
|
|
217
|
+
return 0.0
|
|
218
|
+
return sum(e.wall_time_ms for e in self.episodes) / len(self.episodes)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class TraceEvent(BaseModel):
|
|
222
|
+
"""A single event in an episode trace for JSONL logging."""
|
|
223
|
+
|
|
224
|
+
model_config = ConfigDict(frozen=True)
|
|
225
|
+
|
|
226
|
+
type: str = Field(description="Event type: episode_start, observation, action, hint, episode_end")
|
|
227
|
+
episode_id: str = Field(description="Unique episode identifier")
|
|
228
|
+
timestamp_ms: int = Field(description="Milliseconds since episode start")
|
|
229
|
+
data: dict[str, Any] = Field(default_factory=dict, description="Event-specific data")
|
|
230
|
+
|
|
231
|
+
def to_jsonl(self) -> str:
|
|
232
|
+
"""Convert to single-line JSON for streaming."""
|
|
233
|
+
return json.dumps({"type": self.type, "id": self.episode_id, "ts": self.timestamp_ms, **self.data})
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class EpisodeTracer:
|
|
237
|
+
"""Traces complete episodes in JSONL format for offline analysis.
|
|
238
|
+
|
|
239
|
+
Usage:
|
|
240
|
+
tracer = EpisodeTracer(output_path="traces.jsonl")
|
|
241
|
+
|
|
242
|
+
# Start episode
|
|
243
|
+
tracer.start_episode(game="sudoku", seed=42, difficulty="medium")
|
|
244
|
+
|
|
245
|
+
# Log observations
|
|
246
|
+
tracer.log_observation(grid=[[...]], valid_actions=[...])
|
|
247
|
+
|
|
248
|
+
# Log actions
|
|
249
|
+
tracer.log_action(action="place 1 5 7", success=True)
|
|
250
|
+
|
|
251
|
+
# Log hints
|
|
252
|
+
tracer.log_hint(hint="Try row 3, column 4")
|
|
253
|
+
|
|
254
|
+
# End episode
|
|
255
|
+
tracer.end_episode(status="solved", moves=45, efficiency=0.92)
|
|
256
|
+
|
|
257
|
+
Output format (JSONL):
|
|
258
|
+
{"type":"episode_start","id":"ep_abc123","ts":0,"game":"sudoku","seed":42,"difficulty":"medium"}
|
|
259
|
+
{"type":"observation","id":"ep_abc123","ts":100,"grid":[...],"valid_actions":[...]}
|
|
260
|
+
{"type":"action","id":"ep_abc123","ts":150,"action":"place 1 5 7","success":true}
|
|
261
|
+
{"type":"episode_end","id":"ep_abc123","ts":12400,"status":"solved","moves":45,"efficiency":0.92}
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
def __init__(self, output: str | Path | TextIO | None = None):
|
|
265
|
+
"""Initialize the tracer.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
output: Output destination - file path, Path object, file handle, or None for memory-only
|
|
269
|
+
"""
|
|
270
|
+
self._output: TextIO | None = None
|
|
271
|
+
self._owns_file = False
|
|
272
|
+
self._events: list[TraceEvent] = []
|
|
273
|
+
|
|
274
|
+
if output is not None:
|
|
275
|
+
if isinstance(output, (str, Path)):
|
|
276
|
+
self._output = open(output, "a", encoding="utf-8")
|
|
277
|
+
self._owns_file = True
|
|
278
|
+
else:
|
|
279
|
+
self._output = output
|
|
280
|
+
|
|
281
|
+
self._episode_id: str | None = None
|
|
282
|
+
self._start_time_ns: int = 0
|
|
283
|
+
self._game: str = ""
|
|
284
|
+
self._seed: int = 0
|
|
285
|
+
self._difficulty: str = ""
|
|
286
|
+
|
|
287
|
+
def __enter__(self) -> "EpisodeTracer":
|
|
288
|
+
return self
|
|
289
|
+
|
|
290
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
291
|
+
self.close()
|
|
292
|
+
|
|
293
|
+
def close(self) -> None:
|
|
294
|
+
"""Close the output file if we own it."""
|
|
295
|
+
if self._owns_file and self._output:
|
|
296
|
+
self._output.close()
|
|
297
|
+
self._output = None
|
|
298
|
+
|
|
299
|
+
def _elapsed_ms(self) -> int:
|
|
300
|
+
"""Get milliseconds since episode start."""
|
|
301
|
+
if self._start_time_ns == 0:
|
|
302
|
+
return 0
|
|
303
|
+
return int((time.time_ns() - self._start_time_ns) / 1_000_000)
|
|
304
|
+
|
|
305
|
+
def _emit(self, event: TraceEvent) -> None:
|
|
306
|
+
"""Emit an event to output and memory."""
|
|
307
|
+
self._events.append(event)
|
|
308
|
+
if self._output:
|
|
309
|
+
self._output.write(event.to_jsonl() + "\n")
|
|
310
|
+
self._output.flush()
|
|
311
|
+
|
|
312
|
+
def start_episode(
|
|
313
|
+
self,
|
|
314
|
+
game: str,
|
|
315
|
+
seed: int,
|
|
316
|
+
difficulty: str,
|
|
317
|
+
solver_config: SolverConfig | None = None,
|
|
318
|
+
**extra: Any,
|
|
319
|
+
) -> str:
|
|
320
|
+
"""Start tracing a new episode.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
game: Game identifier
|
|
324
|
+
seed: Puzzle seed
|
|
325
|
+
difficulty: Difficulty level
|
|
326
|
+
solver_config: Solver configuration
|
|
327
|
+
**extra: Additional metadata to include
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
Episode ID for reference
|
|
331
|
+
"""
|
|
332
|
+
self._episode_id = f"ep_{uuid.uuid4().hex[:12]}"
|
|
333
|
+
self._start_time_ns = time.time_ns()
|
|
334
|
+
self._game = game
|
|
335
|
+
self._seed = seed
|
|
336
|
+
self._difficulty = difficulty
|
|
337
|
+
self._events = []
|
|
338
|
+
|
|
339
|
+
data: dict[str, Any] = {
|
|
340
|
+
"game": game,
|
|
341
|
+
"seed": seed,
|
|
342
|
+
"difficulty": difficulty,
|
|
343
|
+
}
|
|
344
|
+
if solver_config:
|
|
345
|
+
data["solver_config"] = {
|
|
346
|
+
"solver_allowed": solver_config.solver_allowed,
|
|
347
|
+
"hint_budget": solver_config.hint_budget,
|
|
348
|
+
"hint_penalty": solver_config.hint_penalty,
|
|
349
|
+
}
|
|
350
|
+
data.update(extra)
|
|
351
|
+
|
|
352
|
+
event = TraceEvent(type="episode_start", episode_id=self._episode_id, timestamp_ms=0, data=data)
|
|
353
|
+
self._emit(event)
|
|
354
|
+
|
|
355
|
+
return self._episode_id
|
|
356
|
+
|
|
357
|
+
def log_observation(self, grid: Any = None, valid_actions: list[str] | None = None, **extra: Any) -> None:
|
|
358
|
+
"""Log a state observation.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
grid: Current grid state
|
|
362
|
+
valid_actions: List of valid actions
|
|
363
|
+
**extra: Additional observation data
|
|
364
|
+
"""
|
|
365
|
+
if not self._episode_id:
|
|
366
|
+
return
|
|
367
|
+
|
|
368
|
+
data: dict[str, Any] = {}
|
|
369
|
+
if grid is not None:
|
|
370
|
+
data["grid"] = grid
|
|
371
|
+
if valid_actions is not None:
|
|
372
|
+
data["valid_actions"] = valid_actions
|
|
373
|
+
data.update(extra)
|
|
374
|
+
|
|
375
|
+
event = TraceEvent(type="observation", episode_id=self._episode_id, timestamp_ms=self._elapsed_ms(), data=data)
|
|
376
|
+
self._emit(event)
|
|
377
|
+
|
|
378
|
+
def log_action(self, action: str, success: bool, **extra: Any) -> None:
|
|
379
|
+
"""Log an action taken.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
action: Action string (e.g., 'place 1 5 7')
|
|
383
|
+
success: Whether the action was valid
|
|
384
|
+
**extra: Additional action data (e.g., result message)
|
|
385
|
+
"""
|
|
386
|
+
if not self._episode_id:
|
|
387
|
+
return
|
|
388
|
+
|
|
389
|
+
data: dict[str, Any] = {"action": action, "success": success}
|
|
390
|
+
data.update(extra)
|
|
391
|
+
|
|
392
|
+
event = TraceEvent(type="action", episode_id=self._episode_id, timestamp_ms=self._elapsed_ms(), data=data)
|
|
393
|
+
self._emit(event)
|
|
394
|
+
|
|
395
|
+
def log_hint(self, hint: str, hints_remaining: int | None = None, **extra: Any) -> None:
|
|
396
|
+
"""Log a hint request.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
hint: Hint content
|
|
400
|
+
hints_remaining: Remaining hint budget
|
|
401
|
+
**extra: Additional hint data
|
|
402
|
+
"""
|
|
403
|
+
if not self._episode_id:
|
|
404
|
+
return
|
|
405
|
+
|
|
406
|
+
data: dict[str, Any] = {"hint": hint}
|
|
407
|
+
if hints_remaining is not None:
|
|
408
|
+
data["hints_remaining"] = hints_remaining
|
|
409
|
+
data.update(extra)
|
|
410
|
+
|
|
411
|
+
event = TraceEvent(type="hint", episode_id=self._episode_id, timestamp_ms=self._elapsed_ms(), data=data)
|
|
412
|
+
self._emit(event)
|
|
413
|
+
|
|
414
|
+
def log_reasoning(self, thought: str, **extra: Any) -> None:
|
|
415
|
+
"""Log agent reasoning/thought.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
thought: Reasoning content
|
|
419
|
+
**extra: Additional data
|
|
420
|
+
"""
|
|
421
|
+
if not self._episode_id:
|
|
422
|
+
return
|
|
423
|
+
|
|
424
|
+
data: dict[str, Any] = {"thought": thought}
|
|
425
|
+
data.update(extra)
|
|
426
|
+
|
|
427
|
+
event = TraceEvent(type="reasoning", episode_id=self._episode_id, timestamp_ms=self._elapsed_ms(), data=data)
|
|
428
|
+
self._emit(event)
|
|
429
|
+
|
|
430
|
+
def end_episode(
|
|
431
|
+
self,
|
|
432
|
+
status: str | EpisodeStatus,
|
|
433
|
+
moves: int = 0,
|
|
434
|
+
invalid_moves: int = 0,
|
|
435
|
+
hints_used: int = 0,
|
|
436
|
+
optimal_steps: int | None = None,
|
|
437
|
+
**extra: Any,
|
|
438
|
+
) -> None:
|
|
439
|
+
"""End the current episode.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
status: Final status (solved, failed, timeout, abandoned)
|
|
443
|
+
moves: Total valid moves made
|
|
444
|
+
invalid_moves: Total invalid attempts
|
|
445
|
+
hints_used: Total hints consumed
|
|
446
|
+
optimal_steps: Minimum steps (if known)
|
|
447
|
+
**extra: Additional final data
|
|
448
|
+
"""
|
|
449
|
+
if not self._episode_id:
|
|
450
|
+
return
|
|
451
|
+
|
|
452
|
+
if isinstance(status, EpisodeStatus):
|
|
453
|
+
status = status.value
|
|
454
|
+
|
|
455
|
+
elapsed = self._elapsed_ms()
|
|
456
|
+
efficiency = 0.0
|
|
457
|
+
if status == "solved" and optimal_steps and moves > 0:
|
|
458
|
+
efficiency = min(1.0, optimal_steps / moves)
|
|
459
|
+
|
|
460
|
+
data: dict[str, Any] = {
|
|
461
|
+
"status": status,
|
|
462
|
+
"moves": moves,
|
|
463
|
+
"invalid_moves": invalid_moves,
|
|
464
|
+
"hints_used": hints_used,
|
|
465
|
+
"wall_time_ms": elapsed,
|
|
466
|
+
}
|
|
467
|
+
if optimal_steps is not None:
|
|
468
|
+
data["optimal_steps"] = optimal_steps
|
|
469
|
+
data["efficiency"] = round(efficiency, 3)
|
|
470
|
+
data.update(extra)
|
|
471
|
+
|
|
472
|
+
event = TraceEvent(type="episode_end", episode_id=self._episode_id, timestamp_ms=elapsed, data=data)
|
|
473
|
+
self._emit(event)
|
|
474
|
+
|
|
475
|
+
# Reset state
|
|
476
|
+
self._episode_id = None
|
|
477
|
+
self._start_time_ns = 0
|
|
478
|
+
|
|
479
|
+
@property
|
|
480
|
+
def events(self) -> list[TraceEvent]:
|
|
481
|
+
"""Get all events for current/last episode."""
|
|
482
|
+
return self._events.copy()
|
|
483
|
+
|
|
484
|
+
@property
|
|
485
|
+
def current_episode_id(self) -> str | None:
|
|
486
|
+
"""Get current episode ID, or None if not in episode."""
|
|
487
|
+
return self._episode_id
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Game-related Pydantic models.
|
|
2
|
+
|
|
3
|
+
Note: Game-specific models have been moved to their respective game folders:
|
|
4
|
+
- games/scheduler/models.py - Task
|
|
5
|
+
- games/knapsack/models.py - Item
|
|
6
|
+
- games/kenken/models.py - Cage
|
|
7
|
+
- games/killer_sudoku/models.py - Cage
|
|
8
|
+
- games/einstein/models.py - HouseAssignment
|
|
9
|
+
- games/logic_grid/models.py - LogicGridCategories, PersonAttributes
|
|
10
|
+
|
|
11
|
+
This file is kept for backwards compatibility but is essentially empty.
|
|
12
|
+
"""
|