chuk-puzzles-gym 0.9__py3-none-any.whl → 0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_puzzles_gym/eval.py +21 -0
- chuk_puzzles_gym/games/__init__.py +22 -0
- chuk_puzzles_gym/games/cryptarithmetic/__init__.py +7 -0
- chuk_puzzles_gym/games/cryptarithmetic/commands.py +75 -0
- chuk_puzzles_gym/games/cryptarithmetic/config.py +23 -0
- chuk_puzzles_gym/games/cryptarithmetic/game.py +383 -0
- chuk_puzzles_gym/games/graph_coloring/__init__.py +7 -0
- chuk_puzzles_gym/games/graph_coloring/commands.py +79 -0
- chuk_puzzles_gym/games/graph_coloring/config.py +24 -0
- chuk_puzzles_gym/games/graph_coloring/game.py +309 -0
- chuk_puzzles_gym/games/nqueens/__init__.py +6 -0
- chuk_puzzles_gym/games/nqueens/config.py +23 -0
- chuk_puzzles_gym/games/nqueens/game.py +316 -0
- chuk_puzzles_gym/games/numberlink/__init__.py +6 -0
- chuk_puzzles_gym/games/numberlink/config.py +23 -0
- chuk_puzzles_gym/games/numberlink/game.py +338 -0
- chuk_puzzles_gym/games/rush_hour/__init__.py +8 -0
- chuk_puzzles_gym/games/rush_hour/commands.py +57 -0
- chuk_puzzles_gym/games/rush_hour/config.py +25 -0
- chuk_puzzles_gym/games/rush_hour/game.py +475 -0
- chuk_puzzles_gym/games/rush_hour/models.py +15 -0
- chuk_puzzles_gym/games/skyscrapers/__init__.py +6 -0
- chuk_puzzles_gym/games/skyscrapers/config.py +22 -0
- chuk_puzzles_gym/games/skyscrapers/game.py +277 -0
- chuk_puzzles_gym/server.py +1 -1
- chuk_puzzles_gym/trace/generator.py +87 -0
- {chuk_puzzles_gym-0.9.dist-info → chuk_puzzles_gym-0.10.dist-info}/METADATA +60 -19
- {chuk_puzzles_gym-0.9.dist-info → chuk_puzzles_gym-0.10.dist-info}/RECORD +31 -9
- {chuk_puzzles_gym-0.9.dist-info → chuk_puzzles_gym-0.10.dist-info}/WHEEL +1 -1
- {chuk_puzzles_gym-0.9.dist-info → chuk_puzzles_gym-0.10.dist-info}/entry_points.txt +0 -0
- {chuk_puzzles_gym-0.9.dist-info → chuk_puzzles_gym-0.10.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""Skyscrapers puzzle game implementation."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from ...models import DifficultyLevel, DifficultyProfile, MoveResult
|
|
6
|
+
from .._base import PuzzleGame
|
|
7
|
+
from .config import SkyscrapersConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SkyscrapersGame(PuzzleGame):
|
|
11
|
+
"""Skyscrapers puzzle - fill a Latin square with visibility clues.
|
|
12
|
+
|
|
13
|
+
Rules:
|
|
14
|
+
- Fill an NxN grid with numbers 1 to N
|
|
15
|
+
- Each row and column must contain each number exactly once (Latin square)
|
|
16
|
+
- Numbers represent building heights
|
|
17
|
+
- Clues around the border indicate how many buildings are visible from that direction
|
|
18
|
+
- A taller building hides all shorter buildings behind it
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, difficulty: str = "easy", seed: int | None = None, **kwargs):
|
|
22
|
+
super().__init__(difficulty, seed, **kwargs)
|
|
23
|
+
self.config = SkyscrapersConfig.from_difficulty(self.difficulty)
|
|
24
|
+
self.size = self.config.size
|
|
25
|
+
self.grid: list[list[int]] = []
|
|
26
|
+
self.solution: list[list[int]] = []
|
|
27
|
+
self.initial_grid: list[list[int]] = []
|
|
28
|
+
self.clues: dict[str, list[int]] = {"top": [], "bottom": [], "left": [], "right": []}
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def name(self) -> str:
|
|
32
|
+
return "Skyscrapers"
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def description(self) -> str:
|
|
36
|
+
return "Fill the grid with building heights using visibility clues"
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def constraint_types(self) -> list[str]:
|
|
40
|
+
return ["all_different", "visibility", "ordering", "boundary_clues"]
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def business_analogies(self) -> list[str]:
|
|
44
|
+
return ["urban_planning", "line_of_sight_analysis", "signal_visibility"]
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def complexity_profile(self) -> dict[str, str]:
|
|
48
|
+
return {
|
|
49
|
+
"reasoning_type": "deductive",
|
|
50
|
+
"search_space": "medium",
|
|
51
|
+
"constraint_density": "dense",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def complexity_metrics(self) -> dict[str, int | float]:
|
|
56
|
+
empty = sum(1 for row in self.grid for cell in row if cell == 0)
|
|
57
|
+
return {
|
|
58
|
+
"variable_count": self.size * self.size,
|
|
59
|
+
"constraint_count": 2 * self.size + 4 * self.size,
|
|
60
|
+
"domain_size": self.size,
|
|
61
|
+
"branching_factor": self.size / 2.0,
|
|
62
|
+
"empty_cells": empty,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def difficulty_profile(self) -> DifficultyProfile:
|
|
67
|
+
profiles = {
|
|
68
|
+
DifficultyLevel.EASY: DifficultyProfile(
|
|
69
|
+
logic_depth=2, branching_factor=2.0, state_observability=1.0, constraint_density=0.7
|
|
70
|
+
),
|
|
71
|
+
DifficultyLevel.MEDIUM: DifficultyProfile(
|
|
72
|
+
logic_depth=4, branching_factor=3.0, state_observability=1.0, constraint_density=0.6
|
|
73
|
+
),
|
|
74
|
+
DifficultyLevel.HARD: DifficultyProfile(
|
|
75
|
+
logic_depth=6, branching_factor=4.0, state_observability=1.0, constraint_density=0.5
|
|
76
|
+
),
|
|
77
|
+
}
|
|
78
|
+
return profiles[self.difficulty]
|
|
79
|
+
|
|
80
|
+
def _compute_visibility(self, line: list[int]) -> int:
|
|
81
|
+
"""Count how many buildings are visible from the start of a line."""
|
|
82
|
+
count = 0
|
|
83
|
+
max_height = 0
|
|
84
|
+
for h in line:
|
|
85
|
+
if h > max_height:
|
|
86
|
+
count += 1
|
|
87
|
+
max_height = h
|
|
88
|
+
return count
|
|
89
|
+
|
|
90
|
+
def _generate_latin_square(self) -> list[list[int]]:
|
|
91
|
+
"""Generate a random NxN Latin square."""
|
|
92
|
+
n = self.size
|
|
93
|
+
# Start with a shifted-row construction
|
|
94
|
+
base = list(range(1, n + 1))
|
|
95
|
+
grid = []
|
|
96
|
+
for r in range(n):
|
|
97
|
+
row = [(base[(r + c) % n]) for c in range(n)]
|
|
98
|
+
grid.append(row)
|
|
99
|
+
|
|
100
|
+
# Shuffle rows
|
|
101
|
+
rows = list(range(n))
|
|
102
|
+
self._rng.shuffle(rows)
|
|
103
|
+
grid = [grid[r] for r in rows]
|
|
104
|
+
|
|
105
|
+
# Shuffle columns
|
|
106
|
+
cols = list(range(n))
|
|
107
|
+
self._rng.shuffle(cols)
|
|
108
|
+
grid = [[row[c] for c in cols] for row in grid]
|
|
109
|
+
|
|
110
|
+
# Shuffle values (relabel)
|
|
111
|
+
perm = list(range(1, n + 1))
|
|
112
|
+
self._rng.shuffle(perm)
|
|
113
|
+
mapping = {i + 1: perm[i] for i in range(n)}
|
|
114
|
+
grid = [[mapping[cell] for cell in row] for row in grid]
|
|
115
|
+
|
|
116
|
+
return grid
|
|
117
|
+
|
|
118
|
+
def _compute_all_clues(self, grid: list[list[int]]) -> dict[str, list[int]]:
|
|
119
|
+
"""Compute visibility clues from all 4 directions."""
|
|
120
|
+
n = self.size
|
|
121
|
+
clues: dict[str, list[int]] = {"top": [], "bottom": [], "left": [], "right": []}
|
|
122
|
+
|
|
123
|
+
for c in range(n):
|
|
124
|
+
col = [grid[r][c] for r in range(n)]
|
|
125
|
+
clues["top"].append(self._compute_visibility(col))
|
|
126
|
+
clues["bottom"].append(self._compute_visibility(col[::-1]))
|
|
127
|
+
|
|
128
|
+
for r in range(n):
|
|
129
|
+
clues["left"].append(self._compute_visibility(grid[r]))
|
|
130
|
+
clues["right"].append(self._compute_visibility(grid[r][::-1]))
|
|
131
|
+
|
|
132
|
+
return clues
|
|
133
|
+
|
|
134
|
+
async def generate_puzzle(self) -> None:
|
|
135
|
+
"""Generate a Skyscrapers puzzle."""
|
|
136
|
+
self.solution = self._generate_latin_square()
|
|
137
|
+
self.clues = self._compute_all_clues(self.solution)
|
|
138
|
+
|
|
139
|
+
# Copy solution to grid, then remove cells based on difficulty
|
|
140
|
+
self.grid = [row[:] for row in self.solution]
|
|
141
|
+
|
|
142
|
+
# Determine cells to remove
|
|
143
|
+
n = self.size
|
|
144
|
+
total_cells = n * n
|
|
145
|
+
remove_map = {
|
|
146
|
+
DifficultyLevel.EASY: int(total_cells * 0.45),
|
|
147
|
+
DifficultyLevel.MEDIUM: int(total_cells * 0.60),
|
|
148
|
+
DifficultyLevel.HARD: int(total_cells * 0.75),
|
|
149
|
+
}
|
|
150
|
+
cells_to_remove = remove_map[self.difficulty]
|
|
151
|
+
|
|
152
|
+
# Randomly remove cells
|
|
153
|
+
all_cells = [(r, c) for r in range(n) for c in range(n)]
|
|
154
|
+
self._rng.shuffle(all_cells)
|
|
155
|
+
for r, c in all_cells[:cells_to_remove]:
|
|
156
|
+
self.grid[r][c] = 0
|
|
157
|
+
|
|
158
|
+
self.initial_grid = [row[:] for row in self.grid]
|
|
159
|
+
self.game_started = True
|
|
160
|
+
|
|
161
|
+
async def validate_move(self, row: int, col: int, num: int) -> MoveResult:
|
|
162
|
+
"""Validate placing a height value.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
row: 1-indexed row
|
|
166
|
+
col: 1-indexed column
|
|
167
|
+
num: Height value (1-N) or 0 to clear
|
|
168
|
+
"""
|
|
169
|
+
n = self.size
|
|
170
|
+
r, c = row - 1, col - 1
|
|
171
|
+
|
|
172
|
+
if not (0 <= r < n and 0 <= c < n):
|
|
173
|
+
self.record_move((row, col), False)
|
|
174
|
+
return MoveResult(success=False, message=f"Position ({row}, {col}) is out of bounds.")
|
|
175
|
+
|
|
176
|
+
if self.initial_grid[r][c] != 0:
|
|
177
|
+
self.record_move((row, col), False)
|
|
178
|
+
return MoveResult(success=False, message="Cannot modify an initial cell.")
|
|
179
|
+
|
|
180
|
+
if num == 0:
|
|
181
|
+
self.grid[r][c] = 0
|
|
182
|
+
self.record_move((row, col), True)
|
|
183
|
+
return MoveResult(success=True, message=f"Cleared cell ({row}, {col}).", state_changed=True)
|
|
184
|
+
|
|
185
|
+
if not (1 <= num <= n):
|
|
186
|
+
self.record_move((row, col), False)
|
|
187
|
+
return MoveResult(success=False, message=f"Value must be between 1 and {n}.")
|
|
188
|
+
|
|
189
|
+
# Check row uniqueness
|
|
190
|
+
for cc in range(n):
|
|
191
|
+
if cc != c and self.grid[r][cc] == num:
|
|
192
|
+
self.record_move((row, col), False)
|
|
193
|
+
return MoveResult(
|
|
194
|
+
success=False,
|
|
195
|
+
message=f"Value {num} already exists in row {row}.",
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Check column uniqueness
|
|
199
|
+
for rr in range(n):
|
|
200
|
+
if rr != r and self.grid[rr][c] == num:
|
|
201
|
+
self.record_move((row, col), False)
|
|
202
|
+
return MoveResult(
|
|
203
|
+
success=False,
|
|
204
|
+
message=f"Value {num} already exists in column {col}.",
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
self.grid[r][c] = num
|
|
208
|
+
self.record_move((row, col), True)
|
|
209
|
+
return MoveResult(
|
|
210
|
+
success=True,
|
|
211
|
+
message=f"Placed {num} at ({row}, {col}).",
|
|
212
|
+
state_changed=True,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
def is_complete(self) -> bool:
|
|
216
|
+
"""Check if the puzzle is solved correctly."""
|
|
217
|
+
return self.grid == self.solution
|
|
218
|
+
|
|
219
|
+
async def get_hint(self) -> tuple[Any, str] | None:
|
|
220
|
+
"""Get a hint - suggest a cell to fill."""
|
|
221
|
+
if not self.can_use_hint():
|
|
222
|
+
return None
|
|
223
|
+
n = self.size
|
|
224
|
+
for r in range(n):
|
|
225
|
+
for c in range(n):
|
|
226
|
+
if self.grid[r][c] == 0:
|
|
227
|
+
val = self.solution[r][c]
|
|
228
|
+
return (
|
|
229
|
+
(r + 1, c + 1, val),
|
|
230
|
+
f"Try placing {val} at row {r + 1}, column {c + 1}.",
|
|
231
|
+
)
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
def render_grid(self) -> str:
|
|
235
|
+
"""Render the puzzle with visibility clues."""
|
|
236
|
+
n = self.size
|
|
237
|
+
lines = []
|
|
238
|
+
|
|
239
|
+
# Top clues
|
|
240
|
+
top_clues = " " + " ".join(str(c) if c > 0 else " " for c in self.clues["top"])
|
|
241
|
+
lines.append(top_clues)
|
|
242
|
+
lines.append(" " + "+" + "---" * n + "+")
|
|
243
|
+
|
|
244
|
+
# Grid rows with left/right clues
|
|
245
|
+
for r in range(n):
|
|
246
|
+
left = str(self.clues["left"][r]) if self.clues["left"][r] > 0 else " "
|
|
247
|
+
right = str(self.clues["right"][r]) if self.clues["right"][r] > 0 else " "
|
|
248
|
+
cells = " ".join(str(v) if v != 0 else "." for v in self.grid[r])
|
|
249
|
+
lines.append(f" {left} | {cells} | {right}")
|
|
250
|
+
|
|
251
|
+
# Bottom border and clues
|
|
252
|
+
lines.append(" " + "+" + "---" * n + "+")
|
|
253
|
+
bot_clues = " " + " ".join(str(c) if c > 0 else " " for c in self.clues["bottom"])
|
|
254
|
+
lines.append(bot_clues)
|
|
255
|
+
|
|
256
|
+
return "\n".join(lines)
|
|
257
|
+
|
|
258
|
+
def get_rules(self) -> str:
|
|
259
|
+
return (
|
|
260
|
+
f"SKYSCRAPERS ({self.size}x{self.size})\n"
|
|
261
|
+
f"Fill the grid with numbers 1 to {self.size}.\n"
|
|
262
|
+
"Each row and column must contain each number exactly once.\n"
|
|
263
|
+
"Numbers represent building heights.\n"
|
|
264
|
+
"Clues around the border show how many buildings are visible from that direction.\n"
|
|
265
|
+
"A taller building hides all shorter ones behind it."
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def get_commands(self) -> str:
|
|
269
|
+
return (
|
|
270
|
+
"Commands:\n"
|
|
271
|
+
f" place <row> <col> <height> - Place a height (1-{self.size})\n"
|
|
272
|
+
" clear <row> <col> - Clear a cell\n"
|
|
273
|
+
" hint - Get a hint\n"
|
|
274
|
+
" check - Check if solved\n"
|
|
275
|
+
" show - Show current state\n"
|
|
276
|
+
" menu - Return to menu"
|
|
277
|
+
)
|
chuk_puzzles_gym/server.py
CHANGED
|
@@ -202,7 +202,7 @@ class ArcadeHandler(TelnetHandler):
|
|
|
202
202
|
# Set up command handler if available for this game
|
|
203
203
|
handler_class = GAME_COMMAND_HANDLERS.get(game_id.lower())
|
|
204
204
|
if handler_class:
|
|
205
|
-
self.game_handler = handler_class(self.current_game)
|
|
205
|
+
self.game_handler = handler_class(self.current_game) # type: ignore[abstract]
|
|
206
206
|
else:
|
|
207
207
|
self.game_handler = None
|
|
208
208
|
|
|
@@ -711,6 +711,93 @@ class TraceGenerator:
|
|
|
711
711
|
"""Generate trace for Einstein's Puzzle (handles 'einstein's puzzle' -> 'einsteins_puzzle')."""
|
|
712
712
|
return self._generate_einstein(game)
|
|
713
713
|
|
|
714
|
+
def _generate_graph_coloring(self, game: PuzzleGame) -> Trace:
|
|
715
|
+
"""Generate trace for Graph Coloring puzzle."""
|
|
716
|
+
problem_id = f"graph_coloring_{game.difficulty.value}_{game.seed}"
|
|
717
|
+
steps: list[Step] = []
|
|
718
|
+
|
|
719
|
+
solution = getattr(game, "solution", {})
|
|
720
|
+
initial_coloring = getattr(game, "initial_coloring", {})
|
|
721
|
+
step_idx = 0
|
|
722
|
+
|
|
723
|
+
for node in sorted(solution.keys()):
|
|
724
|
+
if node not in initial_coloring:
|
|
725
|
+
color = solution[node]
|
|
726
|
+
step = Step(
|
|
727
|
+
index=step_idx,
|
|
728
|
+
operation=StepOperation.PLACE,
|
|
729
|
+
before_state=f"node({node})=uncolored",
|
|
730
|
+
after_state=f"node({node})=color{color}",
|
|
731
|
+
output_value=color,
|
|
732
|
+
position=(node,),
|
|
733
|
+
rule_applied="graph_coloring_constraint",
|
|
734
|
+
explanation=f"Color node {node} with color {color}, avoiding conflicts with adjacent nodes.",
|
|
735
|
+
)
|
|
736
|
+
steps.append(step)
|
|
737
|
+
step_idx += 1
|
|
738
|
+
|
|
739
|
+
return Trace(
|
|
740
|
+
problem_id=problem_id,
|
|
741
|
+
steps=steps,
|
|
742
|
+
checkpoints=self._identify_checkpoints(steps),
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
def _generate_cryptarithmetic(self, game: PuzzleGame) -> Trace:
|
|
746
|
+
"""Generate trace for Cryptarithmetic puzzle."""
|
|
747
|
+
problem_id = f"cryptarithmetic_{game.difficulty.value}_{game.seed}"
|
|
748
|
+
steps: list[Step] = []
|
|
749
|
+
|
|
750
|
+
letter_mapping = getattr(game, "letter_mapping", {})
|
|
751
|
+
initial_mapping = getattr(game, "initial_mapping", {})
|
|
752
|
+
step_idx = 0
|
|
753
|
+
|
|
754
|
+
for letter in sorted(letter_mapping.keys()):
|
|
755
|
+
if letter not in initial_mapping:
|
|
756
|
+
digit = letter_mapping[letter]
|
|
757
|
+
step = Step(
|
|
758
|
+
index=step_idx,
|
|
759
|
+
operation=StepOperation.DEDUCE,
|
|
760
|
+
before_state=f"letter({letter})=unknown",
|
|
761
|
+
after_state=f"letter({letter})={digit}",
|
|
762
|
+
output_value=digit,
|
|
763
|
+
rule_applied="arithmetic_constraint",
|
|
764
|
+
explanation=f"Assign digit {digit} to letter {letter} to satisfy the equation.",
|
|
765
|
+
)
|
|
766
|
+
steps.append(step)
|
|
767
|
+
step_idx += 1
|
|
768
|
+
|
|
769
|
+
return Trace(
|
|
770
|
+
problem_id=problem_id,
|
|
771
|
+
steps=steps,
|
|
772
|
+
checkpoints=self._identify_checkpoints(steps),
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
def _generate_rush_hour(self, game: PuzzleGame) -> Trace:
|
|
776
|
+
"""Generate trace for Rush Hour puzzle."""
|
|
777
|
+
problem_id = f"rush_hour_{game.difficulty.value}_{game.seed}"
|
|
778
|
+
steps: list[Step] = []
|
|
779
|
+
|
|
780
|
+
# Rush Hour traces are move sequences; generate from hint system
|
|
781
|
+
vehicles = getattr(game, "vehicles", {})
|
|
782
|
+
if vehicles:
|
|
783
|
+
step = Step(
|
|
784
|
+
index=0,
|
|
785
|
+
operation=StepOperation.DEDUCE,
|
|
786
|
+
before_state="target_car=blocked",
|
|
787
|
+
after_state="target_car=exit",
|
|
788
|
+
output_value="solve",
|
|
789
|
+
rule_applied="sequential_planning",
|
|
790
|
+
explanation=f"Slide vehicles to clear a path for car X to reach the exit. "
|
|
791
|
+
f"Minimum solution: {getattr(game, 'min_solution_moves', '?')} moves.",
|
|
792
|
+
)
|
|
793
|
+
steps.append(step)
|
|
794
|
+
|
|
795
|
+
return Trace(
|
|
796
|
+
problem_id=problem_id,
|
|
797
|
+
steps=steps,
|
|
798
|
+
checkpoints=self._identify_checkpoints(steps),
|
|
799
|
+
)
|
|
800
|
+
|
|
714
801
|
|
|
715
802
|
def generate_trace(game: PuzzleGame) -> Trace:
|
|
716
803
|
"""
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: chuk-puzzles-gym
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Multi-game puzzle gym for LLM training and benchmarking -
|
|
3
|
+
Version: 0.10
|
|
4
|
+
Summary: Multi-game puzzle gym for LLM training and benchmarking - 30 constraint puzzles with synthetic data generation
|
|
5
5
|
Author: Chris Hay
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/chrishayuk/chuk-puzzles-gym
|
|
@@ -39,7 +39,7 @@ Requires-Dist: ipython>=8.14.0; extra == "dev"
|
|
|
39
39
|
[](https://docs.pydantic.dev/)
|
|
40
40
|
[](http://mypy-lang.org/)
|
|
41
41
|
|
|
42
|
-
A **multi-game puzzle gym** for **LLM training and benchmarking**, hosting
|
|
42
|
+
A **multi-game puzzle gym** for **LLM training and benchmarking**, hosting 30 different logic puzzle types with synthetic data generation. Built using [chuk-gym-core](https://github.com/chrishayuk/chuk-gym-core) and [chuk-protocol-server](https://github.com/chrishayuk/chuk-protocol-server).
|
|
43
43
|
|
|
44
44
|
**Perfect for:**
|
|
45
45
|
- 🤖 **LLM Agent Testing** - Benchmark reasoning capabilities across constraint types
|
|
@@ -82,12 +82,13 @@ Once connected, type `help` to see available games, or `sudoku easy` to start pl
|
|
|
82
82
|
|
|
83
83
|
## Features
|
|
84
84
|
|
|
85
|
-
- **
|
|
85
|
+
- **30 Puzzle Games** with three difficulty levels each (easy, medium, hard)
|
|
86
86
|
- **7 Classic Logic Puzzles** - Sudoku, KenKen, Kakuro, Binary, Futoshiki, Nonogram, Logic Grid
|
|
87
87
|
- **7 Advanced CP-SAT Puzzles** - Killer Sudoku, Lights Out, Mastermind, Slitherlink, Bridges, Hitori, Shikaku
|
|
88
88
|
- **5 Specialized Constraint Puzzles** - Hidato, Tents and Trees, Fillomino, Star Battle, Sokoban
|
|
89
89
|
- **2 Optimization Challenges** - Knapsack, Task Scheduler
|
|
90
90
|
- **3 Advanced Reasoning Puzzles** - Nurikabe, Einstein's Puzzle, Minesweeper
|
|
91
|
+
- **6 Combinatorial & Search Puzzles** - Skyscrapers, N-Queens, Numberlink, Graph Coloring, Cryptarithmetic, Rush Hour
|
|
91
92
|
- **Agent-Friendly Mode** - Structured output with clear markers for AI agents and tools
|
|
92
93
|
- Enable with `mode agent` command
|
|
93
94
|
- Machine-parseable grid format with clear start/end markers
|
|
@@ -113,7 +114,7 @@ Once connected, type `help` to see available games, or `sudoku easy` to start pl
|
|
|
113
114
|
- **Clean ASCII art grids** - perfectly aligned for easy parsing
|
|
114
115
|
- **Deterministic seeding** - Replay any puzzle with the same seed
|
|
115
116
|
- **Gymnasium-compatible RL Environment** (`PuzzleEnv`) for training agents
|
|
116
|
-
- **Comprehensive test suite** (
|
|
117
|
+
- **Comprehensive test suite** (1323 tests, 94% coverage)
|
|
117
118
|
- **Modern Python best practices:**
|
|
118
119
|
- **Pydantic v2 native** - All models use ConfigDict for type safety
|
|
119
120
|
- **Async native** - Full async/await support throughout
|
|
@@ -173,6 +174,17 @@ Once connected, type `help` to see available games, or `sudoku easy` to start pl
|
|
|
173
174
|
| **Einstein's Puzzle** | 5 houses × 5 attributes | Multi-attribute deduction + Logic chains | ✅ Complete |
|
|
174
175
|
| **Minesweeper** | 6×6 to 10×10 | Probabilistic reasoning + Safe deduction | ✅ Complete |
|
|
175
176
|
|
|
177
|
+
### Combinatorial & Search Puzzles
|
|
178
|
+
|
|
179
|
+
| Game | Grid Size | Constraint Types | Status |
|
|
180
|
+
|------|-----------|------------------|--------|
|
|
181
|
+
| **Skyscrapers** | 4×4 to 6×6 | Latin square + Visibility clues from 4 borders | ✅ Complete |
|
|
182
|
+
| **N-Queens** | 6×6 to 12×12 | Placement + Row/Column/Diagonal attack avoidance | ✅ Complete |
|
|
183
|
+
| **Numberlink** | 5×5 to 9×9 | Path connectivity + Non-crossing + Space filling | ✅ Complete |
|
|
184
|
+
| **Graph Coloring** | 6-15 nodes | Graph coloring + Inequality + Global constraint | ✅ Complete |
|
|
185
|
+
| **Cryptarithmetic** | 3-5 digit words | Arithmetic + AllDifferent + Carry propagation | ✅ Complete |
|
|
186
|
+
| **Rush Hour** | 6×6 | Sequential planning + Spatial blocking + Search | ✅ Complete |
|
|
187
|
+
|
|
176
188
|
## Solver Profiles & Business Mapping
|
|
177
189
|
|
|
178
190
|
Each game includes metadata for **constraint types**, **business analogies**, and **complexity profiles**, making it easy to:
|
|
@@ -216,7 +228,13 @@ resource_games = [
|
|
|
216
228
|
| **Global Loop** | Slitherlink | Circuit design, Path finding |
|
|
217
229
|
| **Boolean SAT** | Lights Out | Feature dependencies, Toggle systems |
|
|
218
230
|
| **Cage Sums** | Killer Sudoku, Kakuro | Team budgets, Grouped constraints |
|
|
219
|
-
| **AllDifferent** | Sudoku, KenKen | Resource uniqueness, Assignment problems |
|
|
231
|
+
| **AllDifferent** | Sudoku, KenKen, Skyscrapers | Resource uniqueness, Assignment problems |
|
|
232
|
+
| **Visibility/Ordering** | Skyscrapers | Priority ranking, Stack-based processing |
|
|
233
|
+
| **Attack Avoidance** | N-Queens, Star Battle | Non-conflicting resource placement |
|
|
234
|
+
| **Path Connectivity** | Numberlink, Nurikabe | Network routing, Cable layout |
|
|
235
|
+
| **Graph Coloring** | Graph Coloring | Frequency assignment, Register allocation, Scheduling |
|
|
236
|
+
| **Arithmetic Deduction** | Cryptarithmetic, KenKen | Code breaking, Constraint propagation |
|
|
237
|
+
| **Sequential Planning** | Rush Hour, Sokoban | Logistics planning, Deadlock resolution |
|
|
220
238
|
|
|
221
239
|
## Quick Start
|
|
222
240
|
|
|
@@ -397,6 +415,14 @@ ADVANCED REASONING PUZZLES:
|
|
|
397
415
|
23) Einstein's Puzzle - Who owns the fish? Multi-attribute deduction
|
|
398
416
|
24) Minesweeper - Find all mines using logical deduction
|
|
399
417
|
|
|
418
|
+
COMBINATORIAL & SEARCH PUZZLES:
|
|
419
|
+
25) Skyscrapers - Latin square with visibility clues from borders
|
|
420
|
+
26) N-Queens - Place queens with no row/column/diagonal conflicts
|
|
421
|
+
27) Numberlink - Connect pairs with non-crossing paths filling the grid
|
|
422
|
+
28) Graph Coloring - Color nodes so no adjacent pair shares a color
|
|
423
|
+
29) Cryptarithmetic - Assign digits to letters to satisfy an equation
|
|
424
|
+
30) Rush Hour - Slide vehicles to free the target car to the exit
|
|
425
|
+
|
|
400
426
|
Commands:
|
|
401
427
|
<number> - Select game by number
|
|
402
428
|
<name> - Select game by name (e.g., 'sudoku')
|
|
@@ -453,7 +479,7 @@ The project includes a **Gymnasium-compatible environment** for training reinfor
|
|
|
453
479
|
```python
|
|
454
480
|
from chuk_puzzles_gym.gym_env import PuzzleEnv
|
|
455
481
|
|
|
456
|
-
# Create environment for any of the
|
|
482
|
+
# Create environment for any of the 30 games
|
|
457
483
|
env = PuzzleEnv("sudoku", difficulty="easy", seed=42)
|
|
458
484
|
|
|
459
485
|
# Reset to start a new episode
|
|
@@ -472,7 +498,7 @@ games = PuzzleEnv.available_games()
|
|
|
472
498
|
|
|
473
499
|
### Features
|
|
474
500
|
|
|
475
|
-
- **All
|
|
501
|
+
- **All 30 games** accessible through unified API
|
|
476
502
|
- **Configurable rewards** for correct moves, invalid attempts, completion bonuses
|
|
477
503
|
- **Hint system** with optional budget limits
|
|
478
504
|
- **Solver-free mode** for pure reasoning benchmarks
|
|
@@ -586,7 +612,7 @@ Generate synthetic puzzle datasets for training and benchmarking LLMs and constr
|
|
|
586
612
|
### CLI Usage
|
|
587
613
|
|
|
588
614
|
```bash
|
|
589
|
-
# Generate 100 puzzles per game/difficulty for all
|
|
615
|
+
# Generate 100 puzzles per game/difficulty for all 30 games
|
|
590
616
|
chuk-puzzles-export -o puzzles.jsonl
|
|
591
617
|
|
|
592
618
|
# Specific games only
|
|
@@ -744,6 +770,9 @@ When `include_trace=True` (default), each problem includes step-by-step solution
|
|
|
744
770
|
| Hitori | `duplicate_elimination` |
|
|
745
771
|
| Bridges | `connectivity_constraint` |
|
|
746
772
|
| Slitherlink | `loop_constraint` |
|
|
773
|
+
| Graph Coloring | `graph_coloring_constraint` |
|
|
774
|
+
| Cryptarithmetic | `arithmetic_constraint` |
|
|
775
|
+
| Rush Hour | `sequential_planning` |
|
|
747
776
|
| Others | `constraint_propagation` |
|
|
748
777
|
|
|
749
778
|
### Example: Generate Training Data
|
|
@@ -772,9 +801,9 @@ With default settings (`-n 100` per game/difficulty):
|
|
|
772
801
|
|
|
773
802
|
| Configuration | Problems Generated |
|
|
774
803
|
|--------------|-------------------|
|
|
775
|
-
| All games, all difficulties |
|
|
804
|
+
| All games, all difficulties | 30 games × 3 difficulties × 100 = 9,000 |
|
|
776
805
|
| Single game, all difficulties | 1 × 3 × 100 = 300 |
|
|
777
|
-
| All games, single difficulty |
|
|
806
|
+
| All games, single difficulty | 30 × 1 × 100 = 3,000 |
|
|
778
807
|
|
|
779
808
|
### Integration with chuk-gym-core
|
|
780
809
|
|
|
@@ -983,7 +1012,7 @@ pip install -e ".[dev]"
|
|
|
983
1012
|
|
|
984
1013
|
### Testing
|
|
985
1014
|
|
|
986
|
-
The project has comprehensive test coverage (94%,
|
|
1015
|
+
The project has comprehensive test coverage (94%, 1323 tests):
|
|
987
1016
|
|
|
988
1017
|
```bash
|
|
989
1018
|
# Run all tests
|
|
@@ -1034,7 +1063,7 @@ The project follows modern Python best practices with a **9.8/10 compliance scor
|
|
|
1034
1063
|
- ✅ **Test Coverage** (9.5/10) - 94% overall, most files ≥90%
|
|
1035
1064
|
|
|
1036
1065
|
#### Quality Metrics
|
|
1037
|
-
- **
|
|
1066
|
+
- **1323 tests** - All passing ✅
|
|
1038
1067
|
- **94% coverage** - Exceeds 90% threshold ✅
|
|
1039
1068
|
- **Zero linting errors** - Clean codebase ✅
|
|
1040
1069
|
- **Full type safety** - MyPy passes ✅
|
|
@@ -1181,7 +1210,7 @@ chuk-puzzles-gym/
|
|
|
1181
1210
|
│ │ ├── __init__.py
|
|
1182
1211
|
│ │ ├── game.py
|
|
1183
1212
|
│ │ └── config.py
|
|
1184
|
-
│ └── ... (
|
|
1213
|
+
│ └── ... (30 games total)
|
|
1185
1214
|
├── tests/
|
|
1186
1215
|
│ ├── test_puzzle_game.py # Base class tests
|
|
1187
1216
|
│ ├── test_deterministic_seeding.py # Seeding tests
|
|
@@ -1191,6 +1220,12 @@ chuk-puzzles-gym/
|
|
|
1191
1220
|
├── examples/
|
|
1192
1221
|
│ ├── simple_client.py # Telnet client example
|
|
1193
1222
|
│ ├── websocket_client.py # WebSocket client example
|
|
1223
|
+
│ ├── example_skyscrapers.py # Skyscrapers game logic demo
|
|
1224
|
+
│ ├── example_nqueens.py # N-Queens game logic demo
|
|
1225
|
+
│ ├── example_numberlink.py # Numberlink game logic demo
|
|
1226
|
+
│ ├── example_graph_coloring.py # Graph Coloring game logic demo
|
|
1227
|
+
│ ├── example_cryptarithmetic.py# Cryptarithmetic game logic demo
|
|
1228
|
+
│ ├── example_rush_hour.py # Rush Hour game logic demo
|
|
1194
1229
|
│ └── README.md # Example usage guide
|
|
1195
1230
|
├── .github/workflows/ # CI/CD workflows
|
|
1196
1231
|
├── pyproject.toml # Modern Python project config
|
|
@@ -1203,14 +1238,15 @@ chuk-puzzles-gym/
|
|
|
1203
1238
|
|
|
1204
1239
|
### Key Statistics
|
|
1205
1240
|
|
|
1206
|
-
- **Test Coverage**: 94% overall (
|
|
1241
|
+
- **Test Coverage**: 94% overall (1323 tests, all passing)
|
|
1207
1242
|
- **Code Quality Score**: 9.8/10 (near perfect compliance)
|
|
1208
|
-
- **Games Implemented**:
|
|
1243
|
+
- **Games Implemented**: 30 complete puzzle types
|
|
1209
1244
|
- 7 Classic Logic Puzzles
|
|
1210
1245
|
- 7 Advanced CP-SAT Puzzles
|
|
1211
1246
|
- 5 Specialized Constraint Puzzles
|
|
1212
1247
|
- 2 Optimization Challenges
|
|
1213
1248
|
- 3 Advanced Reasoning Puzzles
|
|
1249
|
+
- 6 Combinatorial & Search Puzzles
|
|
1214
1250
|
- **Supported Transports**: 4 (Telnet, TCP, WebSocket, WS-Telnet)
|
|
1215
1251
|
- **Agent-Friendly Mode**: Structured output for AI tools
|
|
1216
1252
|
- **Gymnasium API**: RL-compatible environment for all games
|
|
@@ -1243,7 +1279,7 @@ Test the generality of constraint solvers (like MCP solvers):
|
|
|
1243
1279
|
|
|
1244
1280
|
Learn about constraint satisfaction problems:
|
|
1245
1281
|
|
|
1246
|
-
- **
|
|
1282
|
+
- **30 different puzzle types** demonstrating various constraint types:
|
|
1247
1283
|
- AllDifferent constraints (Sudoku, KenKen, Futoshiki)
|
|
1248
1284
|
- Arithmetic constraints (KenKen, Kakuro, Killer Sudoku)
|
|
1249
1285
|
- Boolean/SAT constraints (Lights Out, Binary Puzzle)
|
|
@@ -1253,9 +1289,14 @@ Learn about constraint satisfaction problems:
|
|
|
1253
1289
|
- Temporal reasoning (Task Scheduler)
|
|
1254
1290
|
- Connectivity constraints (Nurikabe, Slitherlink)
|
|
1255
1291
|
- Probabilistic reasoning (Minesweeper)
|
|
1256
|
-
-
|
|
1292
|
+
- Graph coloring (Graph Coloring)
|
|
1293
|
+
- Arithmetic deduction (Cryptarithmetic)
|
|
1294
|
+
- Sequential planning (Rush Hour)
|
|
1295
|
+
- Visibility constraints (Skyscrapers)
|
|
1296
|
+
- Attack avoidance (N-Queens)
|
|
1297
|
+
- Path connectivity (Numberlink)
|
|
1257
1298
|
- **Well-documented code** showing puzzle generation algorithms
|
|
1258
|
-
- **Comprehensive tests** (
|
|
1299
|
+
- **Comprehensive tests** (1323 tests, 94% coverage) demonstrating validation
|
|
1259
1300
|
- **Deterministic seeding** - Reproduce any puzzle for debugging/testing
|
|
1260
1301
|
- **Production-ready** - 9.8/10 code quality score
|
|
1261
1302
|
- **Type-safe** - Full Pydantic v2 and MyPy compliance
|