chuk-puzzles-gym 0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_puzzles_gym/__init__.py +19 -0
- chuk_puzzles_gym/constants.py +9 -0
- chuk_puzzles_gym/eval.py +763 -0
- chuk_puzzles_gym/export/__init__.py +20 -0
- chuk_puzzles_gym/export/dataset.py +376 -0
- chuk_puzzles_gym/games/__init__.py +94 -0
- chuk_puzzles_gym/games/_base/__init__.py +6 -0
- chuk_puzzles_gym/games/_base/commands.py +91 -0
- chuk_puzzles_gym/games/_base/game.py +337 -0
- chuk_puzzles_gym/games/binary/__init__.py +6 -0
- chuk_puzzles_gym/games/binary/config.py +23 -0
- chuk_puzzles_gym/games/binary/game.py +434 -0
- chuk_puzzles_gym/games/bridges/__init__.py +6 -0
- chuk_puzzles_gym/games/bridges/config.py +24 -0
- chuk_puzzles_gym/games/bridges/game.py +489 -0
- chuk_puzzles_gym/games/einstein/__init__.py +6 -0
- chuk_puzzles_gym/games/einstein/config.py +23 -0
- chuk_puzzles_gym/games/einstein/constants.py +13 -0
- chuk_puzzles_gym/games/einstein/game.py +366 -0
- chuk_puzzles_gym/games/einstein/models.py +35 -0
- chuk_puzzles_gym/games/fillomino/__init__.py +6 -0
- chuk_puzzles_gym/games/fillomino/config.py +24 -0
- chuk_puzzles_gym/games/fillomino/game.py +516 -0
- chuk_puzzles_gym/games/futoshiki/__init__.py +6 -0
- chuk_puzzles_gym/games/futoshiki/config.py +23 -0
- chuk_puzzles_gym/games/futoshiki/game.py +391 -0
- chuk_puzzles_gym/games/hidato/__init__.py +6 -0
- chuk_puzzles_gym/games/hidato/config.py +24 -0
- chuk_puzzles_gym/games/hidato/game.py +403 -0
- chuk_puzzles_gym/games/hitori/__init__.py +6 -0
- chuk_puzzles_gym/games/hitori/config.py +23 -0
- chuk_puzzles_gym/games/hitori/game.py +451 -0
- chuk_puzzles_gym/games/kakuro/__init__.py +6 -0
- chuk_puzzles_gym/games/kakuro/config.py +24 -0
- chuk_puzzles_gym/games/kakuro/game.py +399 -0
- chuk_puzzles_gym/games/kenken/__init__.py +6 -0
- chuk_puzzles_gym/games/kenken/config.py +24 -0
- chuk_puzzles_gym/games/kenken/enums.py +13 -0
- chuk_puzzles_gym/games/kenken/game.py +486 -0
- chuk_puzzles_gym/games/kenken/models.py +15 -0
- chuk_puzzles_gym/games/killer_sudoku/__init__.py +6 -0
- chuk_puzzles_gym/games/killer_sudoku/config.py +23 -0
- chuk_puzzles_gym/games/killer_sudoku/game.py +502 -0
- chuk_puzzles_gym/games/killer_sudoku/models.py +15 -0
- chuk_puzzles_gym/games/knapsack/__init__.py +6 -0
- chuk_puzzles_gym/games/knapsack/config.py +24 -0
- chuk_puzzles_gym/games/knapsack/enums.py +10 -0
- chuk_puzzles_gym/games/knapsack/game.py +340 -0
- chuk_puzzles_gym/games/knapsack/models.py +13 -0
- chuk_puzzles_gym/games/lights_out/__init__.py +6 -0
- chuk_puzzles_gym/games/lights_out/config.py +24 -0
- chuk_puzzles_gym/games/lights_out/game.py +249 -0
- chuk_puzzles_gym/games/logic_grid/__init__.py +6 -0
- chuk_puzzles_gym/games/logic_grid/config.py +24 -0
- chuk_puzzles_gym/games/logic_grid/constants.py +12 -0
- chuk_puzzles_gym/games/logic_grid/game.py +333 -0
- chuk_puzzles_gym/games/logic_grid/models.py +24 -0
- chuk_puzzles_gym/games/mastermind/__init__.py +6 -0
- chuk_puzzles_gym/games/mastermind/config.py +25 -0
- chuk_puzzles_gym/games/mastermind/game.py +297 -0
- chuk_puzzles_gym/games/minesweeper/__init__.py +6 -0
- chuk_puzzles_gym/games/minesweeper/config.py +24 -0
- chuk_puzzles_gym/games/minesweeper/enums.py +12 -0
- chuk_puzzles_gym/games/minesweeper/game.py +432 -0
- chuk_puzzles_gym/games/nonogram/__init__.py +6 -0
- chuk_puzzles_gym/games/nonogram/config.py +23 -0
- chuk_puzzles_gym/games/nonogram/game.py +296 -0
- chuk_puzzles_gym/games/nurikabe/__init__.py +6 -0
- chuk_puzzles_gym/games/nurikabe/config.py +24 -0
- chuk_puzzles_gym/games/nurikabe/enums.py +14 -0
- chuk_puzzles_gym/games/nurikabe/game.py +586 -0
- chuk_puzzles_gym/games/scheduler/__init__.py +6 -0
- chuk_puzzles_gym/games/scheduler/config.py +25 -0
- chuk_puzzles_gym/games/scheduler/constants.py +15 -0
- chuk_puzzles_gym/games/scheduler/enums.py +10 -0
- chuk_puzzles_gym/games/scheduler/game.py +431 -0
- chuk_puzzles_gym/games/scheduler/models.py +14 -0
- chuk_puzzles_gym/games/shikaku/__init__.py +6 -0
- chuk_puzzles_gym/games/shikaku/config.py +24 -0
- chuk_puzzles_gym/games/shikaku/game.py +419 -0
- chuk_puzzles_gym/games/slitherlink/__init__.py +6 -0
- chuk_puzzles_gym/games/slitherlink/config.py +23 -0
- chuk_puzzles_gym/games/slitherlink/game.py +386 -0
- chuk_puzzles_gym/games/sokoban/__init__.py +6 -0
- chuk_puzzles_gym/games/sokoban/config.py +24 -0
- chuk_puzzles_gym/games/sokoban/game.py +671 -0
- chuk_puzzles_gym/games/star_battle/__init__.py +6 -0
- chuk_puzzles_gym/games/star_battle/config.py +24 -0
- chuk_puzzles_gym/games/star_battle/game.py +390 -0
- chuk_puzzles_gym/games/sudoku/__init__.py +7 -0
- chuk_puzzles_gym/games/sudoku/commands.py +96 -0
- chuk_puzzles_gym/games/sudoku/config.py +22 -0
- chuk_puzzles_gym/games/sudoku/game.py +328 -0
- chuk_puzzles_gym/games/tents/__init__.py +6 -0
- chuk_puzzles_gym/games/tents/config.py +24 -0
- chuk_puzzles_gym/games/tents/game.py +416 -0
- chuk_puzzles_gym/gym_env.py +465 -0
- chuk_puzzles_gym/models/__init__.py +47 -0
- chuk_puzzles_gym/models/base.py +30 -0
- chuk_puzzles_gym/models/config.py +11 -0
- chuk_puzzles_gym/models/enums.py +104 -0
- chuk_puzzles_gym/models/evaluation.py +487 -0
- chuk_puzzles_gym/models/games.py +12 -0
- chuk_puzzles_gym/server.py +1171 -0
- chuk_puzzles_gym/trace/__init__.py +10 -0
- chuk_puzzles_gym/trace/generator.py +726 -0
- chuk_puzzles_gym/utils/__init__.py +4 -0
- chuk_puzzles_gym-0.9.dist-info/METADATA +1471 -0
- chuk_puzzles_gym-0.9.dist-info/RECORD +112 -0
- chuk_puzzles_gym-0.9.dist-info/WHEEL +5 -0
- chuk_puzzles_gym-0.9.dist-info/entry_points.txt +4 -0
- chuk_puzzles_gym-0.9.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
"""Gymnasium-compatible environment API for Puzzle Arcade.
|
|
2
|
+
|
|
3
|
+
This module provides a standard RL environment interface for puzzle games,
|
|
4
|
+
compatible with Gymnasium (the maintained fork of OpenAI Gym).
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from chuk_puzzles_gym.gym_env import PuzzleEnv
|
|
8
|
+
|
|
9
|
+
env = PuzzleEnv("sudoku", difficulty="medium", seed=42)
|
|
10
|
+
obs, info = env.reset()
|
|
11
|
+
|
|
12
|
+
while not done:
|
|
13
|
+
action = agent.decide(obs)
|
|
14
|
+
obs, reward, terminated, truncated, info = env.step(action)
|
|
15
|
+
|
|
16
|
+
env.close()
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from typing import Any, SupportsFloat
|
|
20
|
+
|
|
21
|
+
from .games import AVAILABLE_GAMES
|
|
22
|
+
from .games._base import PuzzleGame
|
|
23
|
+
from .models import SolverConfig
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PuzzleEnv:
|
|
27
|
+
"""Gymnasium-compatible environment for puzzle games.
|
|
28
|
+
|
|
29
|
+
This provides a standard RL interface for puzzle games with:
|
|
30
|
+
- Discrete action space (game-specific commands)
|
|
31
|
+
- Dictionary observation space with grid and metadata
|
|
32
|
+
- Configurable reward shaping
|
|
33
|
+
- Solver configuration as experimental variable
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
game_name: Name of the puzzle game
|
|
37
|
+
difficulty: Difficulty level (easy, medium, hard)
|
|
38
|
+
seed: Random seed for puzzle generation
|
|
39
|
+
solver_config: Configuration for hint/solver usage
|
|
40
|
+
reward_config: Configuration for reward shaping
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
# Reward configuration defaults
|
|
44
|
+
DEFAULT_REWARDS = {
|
|
45
|
+
"correct_placement": 1.0,
|
|
46
|
+
"invalid_attempt": -0.5,
|
|
47
|
+
"hint_penalty": -0.1,
|
|
48
|
+
"completion_bonus": 10.0,
|
|
49
|
+
"efficiency_multiplier": 1.0,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
game_name: str,
|
|
55
|
+
difficulty: str = "easy",
|
|
56
|
+
seed: int | None = None,
|
|
57
|
+
solver_config: SolverConfig | None = None,
|
|
58
|
+
reward_config: dict[str, float] | None = None,
|
|
59
|
+
max_steps: int = 1000,
|
|
60
|
+
):
|
|
61
|
+
"""Initialize the puzzle environment.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
game_name: Name of the puzzle game (e.g., 'sudoku', 'kenken')
|
|
65
|
+
difficulty: Difficulty level ('easy', 'medium', 'hard')
|
|
66
|
+
seed: Random seed for reproducible puzzles
|
|
67
|
+
solver_config: Solver/hint configuration
|
|
68
|
+
reward_config: Custom reward values (merged with defaults)
|
|
69
|
+
max_steps: Maximum steps before truncation
|
|
70
|
+
"""
|
|
71
|
+
if game_name not in AVAILABLE_GAMES:
|
|
72
|
+
available = ", ".join(sorted(AVAILABLE_GAMES.keys()))
|
|
73
|
+
raise ValueError(f"Unknown game '{game_name}'. Available: {available}")
|
|
74
|
+
|
|
75
|
+
self.game_name = game_name
|
|
76
|
+
self.difficulty = difficulty
|
|
77
|
+
self._initial_seed = seed
|
|
78
|
+
self.solver_config = solver_config or SolverConfig()
|
|
79
|
+
self.max_steps = max_steps
|
|
80
|
+
|
|
81
|
+
# Merge reward config with defaults
|
|
82
|
+
self.reward_config = self.DEFAULT_REWARDS.copy()
|
|
83
|
+
if reward_config:
|
|
84
|
+
self.reward_config.update(reward_config)
|
|
85
|
+
|
|
86
|
+
# Game state
|
|
87
|
+
self._game: PuzzleGame | None = None
|
|
88
|
+
self._step_count = 0
|
|
89
|
+
# AVAILABLE_GAMES contains concrete subclasses of PuzzleGame
|
|
90
|
+
self._game_class: type[PuzzleGame] = AVAILABLE_GAMES[game_name] # type: ignore[type-abstract]
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def unwrapped(self) -> "PuzzleEnv":
|
|
94
|
+
"""Return the base environment (Gymnasium compatibility)."""
|
|
95
|
+
return self
|
|
96
|
+
|
|
97
|
+
async def reset(
|
|
98
|
+
self,
|
|
99
|
+
seed: int | None = None,
|
|
100
|
+
options: dict[str, Any] | None = None,
|
|
101
|
+
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
102
|
+
"""Reset the environment to a new puzzle.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
seed: Optional seed for this episode (overrides init seed)
|
|
106
|
+
options: Additional options (e.g., difficulty override)
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Tuple of (observation, info)
|
|
110
|
+
"""
|
|
111
|
+
# Use provided seed, or fall back to initial seed, or generate random
|
|
112
|
+
episode_seed = seed if seed is not None else self._initial_seed
|
|
113
|
+
|
|
114
|
+
# Allow difficulty override in options
|
|
115
|
+
difficulty = self.difficulty
|
|
116
|
+
if options and "difficulty" in options:
|
|
117
|
+
difficulty = options["difficulty"]
|
|
118
|
+
|
|
119
|
+
# Create and initialize the game
|
|
120
|
+
self._game = self._game_class(
|
|
121
|
+
difficulty=difficulty,
|
|
122
|
+
seed=episode_seed,
|
|
123
|
+
solver_config=self.solver_config,
|
|
124
|
+
)
|
|
125
|
+
await self._game.generate_puzzle()
|
|
126
|
+
self._step_count = 0
|
|
127
|
+
|
|
128
|
+
return self._get_observation(), self._get_info()
|
|
129
|
+
|
|
130
|
+
async def step(
|
|
131
|
+
self,
|
|
132
|
+
action: str | tuple[str, ...] | list[Any],
|
|
133
|
+
) -> tuple[dict[str, Any], SupportsFloat, bool, bool, dict[str, Any]]:
|
|
134
|
+
"""Take a step in the environment.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
action: Action to take. Can be:
|
|
138
|
+
- str: Full command string (e.g., "place 1 5 7")
|
|
139
|
+
- tuple/list: Command parts (e.g., ("place", 1, 5, 7))
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Tuple of (observation, reward, terminated, truncated, info)
|
|
143
|
+
"""
|
|
144
|
+
if self._game is None:
|
|
145
|
+
raise RuntimeError("Environment not initialized. Call reset() first.")
|
|
146
|
+
|
|
147
|
+
# Parse action
|
|
148
|
+
if isinstance(action, str):
|
|
149
|
+
action_str = action
|
|
150
|
+
else:
|
|
151
|
+
action_str = " ".join(str(x) for x in action)
|
|
152
|
+
|
|
153
|
+
# Execute action
|
|
154
|
+
parts = action_str.strip().split()
|
|
155
|
+
if not parts:
|
|
156
|
+
# Empty action
|
|
157
|
+
reward = self.reward_config["invalid_attempt"]
|
|
158
|
+
self._game.invalid_moves += 1
|
|
159
|
+
return (
|
|
160
|
+
self._get_observation(),
|
|
161
|
+
reward,
|
|
162
|
+
False,
|
|
163
|
+
self._step_count >= self.max_steps,
|
|
164
|
+
{"action": action_str, "success": False, "message": "Empty action"},
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Only lowercase the command, preserve case for arguments
|
|
168
|
+
# (important for games like logic/einstein with named values)
|
|
169
|
+
cmd = parts[0].lower()
|
|
170
|
+
args = parts[1:]
|
|
171
|
+
|
|
172
|
+
# Handle hint request
|
|
173
|
+
if cmd == "hint":
|
|
174
|
+
if self._game.record_hint():
|
|
175
|
+
hint_result = await self._game.get_hint()
|
|
176
|
+
if hint_result:
|
|
177
|
+
hint_data, hint_message = hint_result
|
|
178
|
+
reward = self.reward_config["hint_penalty"]
|
|
179
|
+
return (
|
|
180
|
+
self._get_observation(),
|
|
181
|
+
reward,
|
|
182
|
+
False,
|
|
183
|
+
False,
|
|
184
|
+
{"action": action_str, "success": True, "hint": hint_message, "hint_data": hint_data},
|
|
185
|
+
)
|
|
186
|
+
reward = self.reward_config["invalid_attempt"]
|
|
187
|
+
return (
|
|
188
|
+
self._get_observation(),
|
|
189
|
+
reward,
|
|
190
|
+
False,
|
|
191
|
+
False,
|
|
192
|
+
{"action": action_str, "success": False, "message": "Hint not available"},
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Execute game-specific action
|
|
196
|
+
try:
|
|
197
|
+
result = await self._execute_action(cmd, args)
|
|
198
|
+
except Exception as e:
|
|
199
|
+
self._game.invalid_moves += 1
|
|
200
|
+
return (
|
|
201
|
+
self._get_observation(),
|
|
202
|
+
self.reward_config["invalid_attempt"],
|
|
203
|
+
False,
|
|
204
|
+
self._step_count >= self.max_steps,
|
|
205
|
+
{"action": action_str, "success": False, "error": str(e)},
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
self._step_count += 1
|
|
209
|
+
|
|
210
|
+
# Calculate reward
|
|
211
|
+
if result.success:
|
|
212
|
+
reward = self.reward_config["correct_placement"]
|
|
213
|
+
|
|
214
|
+
# Check for completion
|
|
215
|
+
terminated = self._game.is_complete()
|
|
216
|
+
if terminated:
|
|
217
|
+
# Add completion bonus with efficiency multiplier
|
|
218
|
+
optimal = self._game.optimal_steps
|
|
219
|
+
if optimal and self._game.moves_made > 0:
|
|
220
|
+
efficiency = min(1.0, optimal / self._game.moves_made)
|
|
221
|
+
else:
|
|
222
|
+
efficiency = 1.0
|
|
223
|
+
reward += (
|
|
224
|
+
self.reward_config["completion_bonus"] * efficiency * self.reward_config["efficiency_multiplier"]
|
|
225
|
+
)
|
|
226
|
+
else:
|
|
227
|
+
reward = self.reward_config["invalid_attempt"]
|
|
228
|
+
self._game.invalid_moves += 1
|
|
229
|
+
terminated = False
|
|
230
|
+
|
|
231
|
+
truncated = self._step_count >= self.max_steps
|
|
232
|
+
|
|
233
|
+
info = {
|
|
234
|
+
"action": action_str,
|
|
235
|
+
"success": result.success,
|
|
236
|
+
"message": result.message,
|
|
237
|
+
"moves": self._game.moves_made,
|
|
238
|
+
"invalid_moves": self._game.invalid_moves,
|
|
239
|
+
"hints_used": self._game.hints_used,
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return self._get_observation(), reward, terminated, truncated, info
|
|
243
|
+
|
|
244
|
+
async def _execute_action(self, cmd: str, args: list[str]) -> Any:
|
|
245
|
+
"""Execute a game-specific action.
|
|
246
|
+
|
|
247
|
+
This maps commands to game methods based on the game type.
|
|
248
|
+
"""
|
|
249
|
+
if self._game is None:
|
|
250
|
+
raise RuntimeError("No game initialized")
|
|
251
|
+
|
|
252
|
+
# Common commands for grid-based puzzles
|
|
253
|
+
if cmd in ("place", "p"):
|
|
254
|
+
if len(args) >= 3:
|
|
255
|
+
row, col, val = int(args[0]), int(args[1]), int(args[2])
|
|
256
|
+
return await self._game.validate_move(row, col, val)
|
|
257
|
+
|
|
258
|
+
if cmd in ("clear", "c"):
|
|
259
|
+
if len(args) >= 2:
|
|
260
|
+
row, col = int(args[0]), int(args[1])
|
|
261
|
+
return await self._game.validate_move(row, col, 0)
|
|
262
|
+
|
|
263
|
+
# Game-specific commands
|
|
264
|
+
game_name = self.game_name.lower()
|
|
265
|
+
|
|
266
|
+
if game_name == "lights_out" and cmd == "press":
|
|
267
|
+
row, col = int(args[0]), int(args[1])
|
|
268
|
+
return await self._game.validate_move(row, col)
|
|
269
|
+
|
|
270
|
+
if game_name == "sokoban" and cmd == "move":
|
|
271
|
+
return await self._game.validate_move(args[0])
|
|
272
|
+
|
|
273
|
+
if game_name == "minesweeper":
|
|
274
|
+
if cmd == "reveal":
|
|
275
|
+
row, col = int(args[0]), int(args[1])
|
|
276
|
+
return await self._game.validate_move("reveal", row, col)
|
|
277
|
+
if cmd == "flag":
|
|
278
|
+
row, col = int(args[0]), int(args[1])
|
|
279
|
+
return await self._game.validate_move("flag", row, col)
|
|
280
|
+
|
|
281
|
+
if game_name == "mastermind" and cmd == "guess":
|
|
282
|
+
guess = [int(x) for x in args]
|
|
283
|
+
return await self._game.validate_move(*guess)
|
|
284
|
+
|
|
285
|
+
if game_name == "einstein":
|
|
286
|
+
# Einstein uses assign <house> <attribute> <value>
|
|
287
|
+
# Value may contain spaces (e.g., "Blue Master")
|
|
288
|
+
if cmd == "assign":
|
|
289
|
+
house, attribute = int(args[0]), args[1]
|
|
290
|
+
value = " ".join(args[2:]) # Join remaining args for multi-word values
|
|
291
|
+
return await self._game.validate_move(house, attribute, value)
|
|
292
|
+
|
|
293
|
+
if game_name == "logic":
|
|
294
|
+
# Logic grid uses connect <person_name> <category> <value> (3 args)
|
|
295
|
+
# which maps to validate_move("person", person_name, category, value, True)
|
|
296
|
+
# or connect <cat1> <val1> <cat2> <val2> (4 args)
|
|
297
|
+
if cmd == "connect":
|
|
298
|
+
if len(args) == 3:
|
|
299
|
+
# Shorthand: connect <person_name> <category> <value>
|
|
300
|
+
# e.g., connect Alice color Blue -> ("person", "Alice", "color", "Blue", True)
|
|
301
|
+
return await self._game.validate_move("person", args[0], args[1], args[2], True)
|
|
302
|
+
elif len(args) >= 4:
|
|
303
|
+
return await self._game.validate_move(args[0], args[1], args[2], args[3], True)
|
|
304
|
+
if cmd == "exclude":
|
|
305
|
+
if len(args) == 3:
|
|
306
|
+
return await self._game.validate_move("person", args[0], args[1], args[2], False)
|
|
307
|
+
elif len(args) >= 4:
|
|
308
|
+
return await self._game.validate_move(args[0], args[1], args[2], args[3], False)
|
|
309
|
+
|
|
310
|
+
if game_name == "bridges" and cmd == "bridge":
|
|
311
|
+
r1, c1, r2, c2, count = int(args[0]), int(args[1]), int(args[2]), int(args[3]), int(args[4])
|
|
312
|
+
return await self._game.validate_move(r1, c1, r2, c2, count)
|
|
313
|
+
|
|
314
|
+
if game_name == "shikaku" and cmd == "rect":
|
|
315
|
+
# Shikaku uses rect <r1> <c1> <r2> <c2>
|
|
316
|
+
r1, c1, r2, c2 = int(args[0]), int(args[1]), int(args[2]), int(args[3])
|
|
317
|
+
return await self._game.validate_move(r1, c1, r2, c2)
|
|
318
|
+
|
|
319
|
+
if game_name == "nonogram":
|
|
320
|
+
# Nonogram uses fill/mark <row> <col> [value]
|
|
321
|
+
if cmd == "fill":
|
|
322
|
+
row, col = int(args[0]), int(args[1])
|
|
323
|
+
return await self._game.validate_move(row, col, 1)
|
|
324
|
+
if cmd == "mark":
|
|
325
|
+
row, col = int(args[0]), int(args[1])
|
|
326
|
+
return await self._game.validate_move(row, col, 0)
|
|
327
|
+
if cmd == "clear":
|
|
328
|
+
row, col = int(args[0]), int(args[1])
|
|
329
|
+
return await self._game.validate_move(row, col, -1)
|
|
330
|
+
if cmd == "set":
|
|
331
|
+
row, col, cell_val = int(args[0]), int(args[1]), int(args[2])
|
|
332
|
+
return await self._game.validate_move(row, col, cell_val)
|
|
333
|
+
|
|
334
|
+
if game_name == "slither" and cmd == "set":
|
|
335
|
+
edge_type, row, col, state = args[0], int(args[1]), int(args[2]), int(args[3])
|
|
336
|
+
return await self._game.validate_move(edge_type, row, col, state)
|
|
337
|
+
|
|
338
|
+
if game_name == "nurikabe" and cmd == "mark":
|
|
339
|
+
row, col, color = int(args[0]), int(args[1]), args[2]
|
|
340
|
+
return await self._game.validate_move(row, col, color)
|
|
341
|
+
|
|
342
|
+
if game_name == "hitori" and cmd == "shade":
|
|
343
|
+
row, col = int(args[0]), int(args[1])
|
|
344
|
+
return await self._game.validate_move(row, col, "shade")
|
|
345
|
+
|
|
346
|
+
if game_name == "knapsack":
|
|
347
|
+
if cmd == "select":
|
|
348
|
+
return await self._game.validate_move("select", int(args[0]))
|
|
349
|
+
if cmd == "deselect":
|
|
350
|
+
return await self._game.validate_move("deselect", int(args[0]))
|
|
351
|
+
|
|
352
|
+
if game_name == "scheduler":
|
|
353
|
+
if cmd == "assign":
|
|
354
|
+
task, worker, start = int(args[0]), int(args[1]), int(args[2])
|
|
355
|
+
return await self._game.validate_move(task, worker, start)
|
|
356
|
+
if cmd == "unassign":
|
|
357
|
+
return await self._game.validate_move(int(args[0]), 0, -1)
|
|
358
|
+
|
|
359
|
+
# Default: try validate_move with parsed args
|
|
360
|
+
parsed_args: list[int | str] = []
|
|
361
|
+
for arg in args:
|
|
362
|
+
try:
|
|
363
|
+
parsed_args.append(int(arg))
|
|
364
|
+
except ValueError:
|
|
365
|
+
parsed_args.append(arg)
|
|
366
|
+
|
|
367
|
+
return await self._game.validate_move(*parsed_args)
|
|
368
|
+
|
|
369
|
+
def _get_observation(self) -> dict[str, Any]:
|
|
370
|
+
"""Get the current observation."""
|
|
371
|
+
if self._game is None:
|
|
372
|
+
return {"error": "no_game"}
|
|
373
|
+
|
|
374
|
+
obs = {
|
|
375
|
+
"game": self._game.name,
|
|
376
|
+
"difficulty": self._game.difficulty.value,
|
|
377
|
+
"seed": self._game.seed,
|
|
378
|
+
"moves": self._game.moves_made,
|
|
379
|
+
"invalid_moves": self._game.invalid_moves,
|
|
380
|
+
"hints_used": self._game.hints_used,
|
|
381
|
+
"hints_remaining": self._game.hints_remaining,
|
|
382
|
+
"is_complete": self._game.is_complete(),
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
# Add grid if available
|
|
386
|
+
if hasattr(self._game, "grid"):
|
|
387
|
+
obs["grid"] = self._game.grid
|
|
388
|
+
|
|
389
|
+
# Add rendered view
|
|
390
|
+
obs["render"] = self._game.render_grid()
|
|
391
|
+
|
|
392
|
+
return obs
|
|
393
|
+
|
|
394
|
+
def _get_info(self) -> dict[str, Any]:
|
|
395
|
+
"""Get additional info about the environment state."""
|
|
396
|
+
if self._game is None:
|
|
397
|
+
return {}
|
|
398
|
+
|
|
399
|
+
profile = self._game.difficulty_profile
|
|
400
|
+
return {
|
|
401
|
+
"optimal_steps": self._game.optimal_steps,
|
|
402
|
+
"difficulty_profile": {
|
|
403
|
+
"logic_depth": profile.logic_depth,
|
|
404
|
+
"branching_factor": profile.branching_factor,
|
|
405
|
+
"state_observability": profile.state_observability,
|
|
406
|
+
"constraint_density": profile.constraint_density,
|
|
407
|
+
},
|
|
408
|
+
"constraint_types": self._game.constraint_types,
|
|
409
|
+
"solver_config": {
|
|
410
|
+
"solver_allowed": self.solver_config.solver_allowed,
|
|
411
|
+
"hint_budget": self.solver_config.hint_budget,
|
|
412
|
+
"hint_penalty": self.solver_config.hint_penalty,
|
|
413
|
+
},
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
def render(self, mode: str = "ansi") -> str | None:
|
|
417
|
+
"""Render the environment.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
mode: Render mode ('ansi' for text output)
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
Rendered string if mode is 'ansi', None otherwise
|
|
424
|
+
"""
|
|
425
|
+
if self._game is None:
|
|
426
|
+
return None
|
|
427
|
+
|
|
428
|
+
if mode == "ansi":
|
|
429
|
+
return self._game.render_grid()
|
|
430
|
+
return None
|
|
431
|
+
|
|
432
|
+
def close(self) -> None:
|
|
433
|
+
"""Clean up environment resources."""
|
|
434
|
+
self._game = None
|
|
435
|
+
|
|
436
|
+
@property
|
|
437
|
+
def game(self) -> PuzzleGame | None:
|
|
438
|
+
"""Access the underlying game instance."""
|
|
439
|
+
return self._game
|
|
440
|
+
|
|
441
|
+
@classmethod
|
|
442
|
+
def available_games(cls) -> list[str]:
|
|
443
|
+
"""Get list of available game names."""
|
|
444
|
+
return sorted(AVAILABLE_GAMES.keys())
|
|
445
|
+
|
|
446
|
+
@classmethod
|
|
447
|
+
def make(
|
|
448
|
+
cls,
|
|
449
|
+
game_name: str,
|
|
450
|
+
difficulty: str = "easy",
|
|
451
|
+
seed: int | None = None,
|
|
452
|
+
**kwargs: Any,
|
|
453
|
+
) -> "PuzzleEnv":
|
|
454
|
+
"""Factory method to create an environment (Gymnasium-style).
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
game_name: Name of the puzzle game
|
|
458
|
+
difficulty: Difficulty level
|
|
459
|
+
seed: Random seed
|
|
460
|
+
**kwargs: Additional arguments passed to __init__
|
|
461
|
+
|
|
462
|
+
Returns:
|
|
463
|
+
Configured PuzzleEnv instance
|
|
464
|
+
"""
|
|
465
|
+
return cls(game_name, difficulty=difficulty, seed=seed, **kwargs)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Pydantic models and enums for the Puzzle Arcade server.
|
|
2
|
+
|
|
3
|
+
Game-specific models and enums have been moved to their respective game folders.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from chuk_gym_core import DifficultyProfile
|
|
7
|
+
|
|
8
|
+
from .base import GridPosition, MoveResult
|
|
9
|
+
from .config import GameConfig
|
|
10
|
+
from .enums import (
|
|
11
|
+
CellState,
|
|
12
|
+
ConnectionState,
|
|
13
|
+
DifficultyLevel,
|
|
14
|
+
EpisodeStatus,
|
|
15
|
+
GameCommand,
|
|
16
|
+
OutputMode,
|
|
17
|
+
)
|
|
18
|
+
from .evaluation import (
|
|
19
|
+
EpisodeResult,
|
|
20
|
+
EpisodeTracer,
|
|
21
|
+
EvaluationSummary,
|
|
22
|
+
MoveRecord,
|
|
23
|
+
SolverConfig,
|
|
24
|
+
TraceEvent,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
# Enums
|
|
29
|
+
"CellState",
|
|
30
|
+
"ConnectionState",
|
|
31
|
+
"DifficultyLevel",
|
|
32
|
+
"EpisodeStatus",
|
|
33
|
+
"GameCommand",
|
|
34
|
+
"OutputMode",
|
|
35
|
+
# Base models
|
|
36
|
+
"MoveResult",
|
|
37
|
+
"GridPosition",
|
|
38
|
+
"GameConfig",
|
|
39
|
+
# Evaluation models
|
|
40
|
+
"DifficultyProfile",
|
|
41
|
+
"EpisodeResult",
|
|
42
|
+
"EpisodeTracer",
|
|
43
|
+
"EvaluationSummary",
|
|
44
|
+
"MoveRecord",
|
|
45
|
+
"SolverConfig",
|
|
46
|
+
"TraceEvent",
|
|
47
|
+
]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Base Pydantic models for the Puzzle Arcade server."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GridPosition(BaseModel):
|
|
7
|
+
"""A position on a game grid (1-indexed for user-facing coordinates)."""
|
|
8
|
+
|
|
9
|
+
row: int = Field(ge=1, description="Row number (1-indexed)")
|
|
10
|
+
col: int = Field(ge=1, description="Column number (1-indexed)")
|
|
11
|
+
|
|
12
|
+
def to_zero_indexed(self) -> tuple[int, int]:
|
|
13
|
+
"""Convert to 0-indexed coordinates for internal use."""
|
|
14
|
+
return (self.row - 1, self.col - 1)
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def from_zero_indexed(cls, row: int, col: int) -> "GridPosition":
|
|
18
|
+
"""Create from 0-indexed coordinates."""
|
|
19
|
+
return cls(row=row + 1, col=col + 1)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MoveResult(BaseModel):
|
|
23
|
+
"""Result of a game move or action."""
|
|
24
|
+
|
|
25
|
+
model_config = ConfigDict(frozen=True) # Immutable result
|
|
26
|
+
|
|
27
|
+
success: bool = Field(description="Whether the move was successful")
|
|
28
|
+
message: str = Field(description="Message to display to the user")
|
|
29
|
+
state_changed: bool = Field(default=False, description="Whether game state was modified")
|
|
30
|
+
game_over: bool = Field(default=False, description="Whether the game has ended")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Base configuration models for games."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
from .enums import DifficultyLevel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GameConfig(BaseModel):
|
|
9
|
+
"""Base configuration for all games."""
|
|
10
|
+
|
|
11
|
+
difficulty: DifficultyLevel = Field(default=DifficultyLevel.EASY, description="Game difficulty level")
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Enums for the Puzzle Arcade server."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum, IntEnum
|
|
4
|
+
|
|
5
|
+
# Import DifficultyLevel from chuk-gym-core for unified difficulty levels
|
|
6
|
+
from chuk_gym_core import DifficultyLevel
|
|
7
|
+
|
|
8
|
+
# Re-export for backwards compatibility
|
|
9
|
+
__all__ = [
|
|
10
|
+
"DifficultyLevel",
|
|
11
|
+
"GameCommand",
|
|
12
|
+
"CellState",
|
|
13
|
+
"ConnectionState",
|
|
14
|
+
"OutputMode",
|
|
15
|
+
"EpisodeStatus",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class GameCommand(str, Enum):
|
|
20
|
+
"""Commands available in game mode."""
|
|
21
|
+
|
|
22
|
+
QUIT = "quit"
|
|
23
|
+
EXIT = "exit"
|
|
24
|
+
Q = "q"
|
|
25
|
+
HELP = "help"
|
|
26
|
+
H = "h"
|
|
27
|
+
SHOW = "show"
|
|
28
|
+
S = "s"
|
|
29
|
+
HINT = "hint"
|
|
30
|
+
CHECK = "check"
|
|
31
|
+
SOLVE = "solve"
|
|
32
|
+
RESET = "reset"
|
|
33
|
+
MENU = "menu"
|
|
34
|
+
M = "m"
|
|
35
|
+
MODE = "mode"
|
|
36
|
+
SEED = "seed"
|
|
37
|
+
COMPARE = "compare"
|
|
38
|
+
STATS = "stats"
|
|
39
|
+
# Game-specific commands (kept here for server command parsing)
|
|
40
|
+
PLACE = "place"
|
|
41
|
+
CLEAR = "clear"
|
|
42
|
+
PRESS = "press"
|
|
43
|
+
CONNECT = "connect"
|
|
44
|
+
EXCLUDE = "exclude"
|
|
45
|
+
REVEAL = "reveal"
|
|
46
|
+
FLAG = "flag"
|
|
47
|
+
SELECT = "select"
|
|
48
|
+
DESELECT = "deselect"
|
|
49
|
+
ASSIGN = "assign"
|
|
50
|
+
UNASSIGN = "unassign"
|
|
51
|
+
MARK = "mark"
|
|
52
|
+
GUESS = "guess"
|
|
53
|
+
SET = "set"
|
|
54
|
+
SHADE = "shade"
|
|
55
|
+
BRIDGE = "bridge"
|
|
56
|
+
MOVE = "move"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class CellState(IntEnum):
|
|
60
|
+
"""State of a cell in grid-based games."""
|
|
61
|
+
|
|
62
|
+
EMPTY = 0
|
|
63
|
+
UNREVEALED = 0
|
|
64
|
+
FILLED = 1
|
|
65
|
+
REVEALED = 1
|
|
66
|
+
FLAGGED = 2
|
|
67
|
+
MARKED = 2
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ConnectionState(IntEnum):
|
|
71
|
+
"""Connection state in logic grid puzzles."""
|
|
72
|
+
|
|
73
|
+
UNKNOWN = 0
|
|
74
|
+
DISCONNECTED = 1
|
|
75
|
+
CONNECTED = 2
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class OutputMode(str, Enum):
|
|
79
|
+
"""Output mode for the server.
|
|
80
|
+
|
|
81
|
+
- NORMAL: Human-friendly output with explanations and formatting
|
|
82
|
+
- AGENT: Structured output with clear markers for AI agents
|
|
83
|
+
- COMPACT: Minimal output for bandwidth-constrained connections
|
|
84
|
+
- STRICT: Fixed grammar, symbolic inputs, machine-verifiable (for RL/benchmarks)
|
|
85
|
+
- NATURAL: Conversational, accepts ambiguous/paraphrased inputs (robustness testing)
|
|
86
|
+
- JSON: Full JSON protocol for RL integration (gym-style observations/actions)
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
NORMAL = "normal"
|
|
90
|
+
AGENT = "agent"
|
|
91
|
+
COMPACT = "compact"
|
|
92
|
+
STRICT = "strict"
|
|
93
|
+
NATURAL = "natural"
|
|
94
|
+
JSON = "json"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class EpisodeStatus(str, Enum):
|
|
98
|
+
"""Status of a puzzle episode."""
|
|
99
|
+
|
|
100
|
+
IN_PROGRESS = "in_progress"
|
|
101
|
+
SOLVED = "solved"
|
|
102
|
+
FAILED = "failed"
|
|
103
|
+
TIMEOUT = "timeout"
|
|
104
|
+
ABANDONED = "abandoned"
|