chuk-puzzles-gym 0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_puzzles_gym/__init__.py +19 -0
- chuk_puzzles_gym/constants.py +9 -0
- chuk_puzzles_gym/eval.py +763 -0
- chuk_puzzles_gym/export/__init__.py +20 -0
- chuk_puzzles_gym/export/dataset.py +376 -0
- chuk_puzzles_gym/games/__init__.py +94 -0
- chuk_puzzles_gym/games/_base/__init__.py +6 -0
- chuk_puzzles_gym/games/_base/commands.py +91 -0
- chuk_puzzles_gym/games/_base/game.py +337 -0
- chuk_puzzles_gym/games/binary/__init__.py +6 -0
- chuk_puzzles_gym/games/binary/config.py +23 -0
- chuk_puzzles_gym/games/binary/game.py +434 -0
- chuk_puzzles_gym/games/bridges/__init__.py +6 -0
- chuk_puzzles_gym/games/bridges/config.py +24 -0
- chuk_puzzles_gym/games/bridges/game.py +489 -0
- chuk_puzzles_gym/games/einstein/__init__.py +6 -0
- chuk_puzzles_gym/games/einstein/config.py +23 -0
- chuk_puzzles_gym/games/einstein/constants.py +13 -0
- chuk_puzzles_gym/games/einstein/game.py +366 -0
- chuk_puzzles_gym/games/einstein/models.py +35 -0
- chuk_puzzles_gym/games/fillomino/__init__.py +6 -0
- chuk_puzzles_gym/games/fillomino/config.py +24 -0
- chuk_puzzles_gym/games/fillomino/game.py +516 -0
- chuk_puzzles_gym/games/futoshiki/__init__.py +6 -0
- chuk_puzzles_gym/games/futoshiki/config.py +23 -0
- chuk_puzzles_gym/games/futoshiki/game.py +391 -0
- chuk_puzzles_gym/games/hidato/__init__.py +6 -0
- chuk_puzzles_gym/games/hidato/config.py +24 -0
- chuk_puzzles_gym/games/hidato/game.py +403 -0
- chuk_puzzles_gym/games/hitori/__init__.py +6 -0
- chuk_puzzles_gym/games/hitori/config.py +23 -0
- chuk_puzzles_gym/games/hitori/game.py +451 -0
- chuk_puzzles_gym/games/kakuro/__init__.py +6 -0
- chuk_puzzles_gym/games/kakuro/config.py +24 -0
- chuk_puzzles_gym/games/kakuro/game.py +399 -0
- chuk_puzzles_gym/games/kenken/__init__.py +6 -0
- chuk_puzzles_gym/games/kenken/config.py +24 -0
- chuk_puzzles_gym/games/kenken/enums.py +13 -0
- chuk_puzzles_gym/games/kenken/game.py +486 -0
- chuk_puzzles_gym/games/kenken/models.py +15 -0
- chuk_puzzles_gym/games/killer_sudoku/__init__.py +6 -0
- chuk_puzzles_gym/games/killer_sudoku/config.py +23 -0
- chuk_puzzles_gym/games/killer_sudoku/game.py +502 -0
- chuk_puzzles_gym/games/killer_sudoku/models.py +15 -0
- chuk_puzzles_gym/games/knapsack/__init__.py +6 -0
- chuk_puzzles_gym/games/knapsack/config.py +24 -0
- chuk_puzzles_gym/games/knapsack/enums.py +10 -0
- chuk_puzzles_gym/games/knapsack/game.py +340 -0
- chuk_puzzles_gym/games/knapsack/models.py +13 -0
- chuk_puzzles_gym/games/lights_out/__init__.py +6 -0
- chuk_puzzles_gym/games/lights_out/config.py +24 -0
- chuk_puzzles_gym/games/lights_out/game.py +249 -0
- chuk_puzzles_gym/games/logic_grid/__init__.py +6 -0
- chuk_puzzles_gym/games/logic_grid/config.py +24 -0
- chuk_puzzles_gym/games/logic_grid/constants.py +12 -0
- chuk_puzzles_gym/games/logic_grid/game.py +333 -0
- chuk_puzzles_gym/games/logic_grid/models.py +24 -0
- chuk_puzzles_gym/games/mastermind/__init__.py +6 -0
- chuk_puzzles_gym/games/mastermind/config.py +25 -0
- chuk_puzzles_gym/games/mastermind/game.py +297 -0
- chuk_puzzles_gym/games/minesweeper/__init__.py +6 -0
- chuk_puzzles_gym/games/minesweeper/config.py +24 -0
- chuk_puzzles_gym/games/minesweeper/enums.py +12 -0
- chuk_puzzles_gym/games/minesweeper/game.py +432 -0
- chuk_puzzles_gym/games/nonogram/__init__.py +6 -0
- chuk_puzzles_gym/games/nonogram/config.py +23 -0
- chuk_puzzles_gym/games/nonogram/game.py +296 -0
- chuk_puzzles_gym/games/nurikabe/__init__.py +6 -0
- chuk_puzzles_gym/games/nurikabe/config.py +24 -0
- chuk_puzzles_gym/games/nurikabe/enums.py +14 -0
- chuk_puzzles_gym/games/nurikabe/game.py +586 -0
- chuk_puzzles_gym/games/scheduler/__init__.py +6 -0
- chuk_puzzles_gym/games/scheduler/config.py +25 -0
- chuk_puzzles_gym/games/scheduler/constants.py +15 -0
- chuk_puzzles_gym/games/scheduler/enums.py +10 -0
- chuk_puzzles_gym/games/scheduler/game.py +431 -0
- chuk_puzzles_gym/games/scheduler/models.py +14 -0
- chuk_puzzles_gym/games/shikaku/__init__.py +6 -0
- chuk_puzzles_gym/games/shikaku/config.py +24 -0
- chuk_puzzles_gym/games/shikaku/game.py +419 -0
- chuk_puzzles_gym/games/slitherlink/__init__.py +6 -0
- chuk_puzzles_gym/games/slitherlink/config.py +23 -0
- chuk_puzzles_gym/games/slitherlink/game.py +386 -0
- chuk_puzzles_gym/games/sokoban/__init__.py +6 -0
- chuk_puzzles_gym/games/sokoban/config.py +24 -0
- chuk_puzzles_gym/games/sokoban/game.py +671 -0
- chuk_puzzles_gym/games/star_battle/__init__.py +6 -0
- chuk_puzzles_gym/games/star_battle/config.py +24 -0
- chuk_puzzles_gym/games/star_battle/game.py +390 -0
- chuk_puzzles_gym/games/sudoku/__init__.py +7 -0
- chuk_puzzles_gym/games/sudoku/commands.py +96 -0
- chuk_puzzles_gym/games/sudoku/config.py +22 -0
- chuk_puzzles_gym/games/sudoku/game.py +328 -0
- chuk_puzzles_gym/games/tents/__init__.py +6 -0
- chuk_puzzles_gym/games/tents/config.py +24 -0
- chuk_puzzles_gym/games/tents/game.py +416 -0
- chuk_puzzles_gym/gym_env.py +465 -0
- chuk_puzzles_gym/models/__init__.py +47 -0
- chuk_puzzles_gym/models/base.py +30 -0
- chuk_puzzles_gym/models/config.py +11 -0
- chuk_puzzles_gym/models/enums.py +104 -0
- chuk_puzzles_gym/models/evaluation.py +487 -0
- chuk_puzzles_gym/models/games.py +12 -0
- chuk_puzzles_gym/server.py +1171 -0
- chuk_puzzles_gym/trace/__init__.py +10 -0
- chuk_puzzles_gym/trace/generator.py +726 -0
- chuk_puzzles_gym/utils/__init__.py +4 -0
- chuk_puzzles_gym-0.9.dist-info/METADATA +1471 -0
- chuk_puzzles_gym-0.9.dist-info/RECORD +112 -0
- chuk_puzzles_gym-0.9.dist-info/WHEEL +5 -0
- chuk_puzzles_gym-0.9.dist-info/entry_points.txt +4 -0
- chuk_puzzles_gym-0.9.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1471 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chuk-puzzles-gym
|
|
3
|
+
Version: 0.9
|
|
4
|
+
Summary: Multi-game puzzle gym for LLM training and benchmarking - 24 constraint puzzles with synthetic data generation
|
|
5
|
+
Author: Chris Hay
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/chrishayuk/chuk-puzzles-gym
|
|
8
|
+
Project-URL: Repository, https://github.com/chrishayuk/chuk-puzzles-gym
|
|
9
|
+
Project-URL: Documentation, https://github.com/chrishayuk/chuk-puzzles-gym#readme
|
|
10
|
+
Keywords: puzzle,gym,llm,training,benchmark,sudoku,kenken,constraint-satisfaction,reasoning,mcp
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Requires-Dist: chuk-gym-core>=0.1.1
|
|
19
|
+
Requires-Dist: chuk-protocol-server>=0.1.0
|
|
20
|
+
Requires-Dist: pydantic>=2.0.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-watch>=4.2.0; extra == "dev"
|
|
26
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
27
|
+
Requires-Dist: mypy>=1.4.0; extra == "dev"
|
|
28
|
+
Requires-Dist: bandit>=1.7.5; extra == "dev"
|
|
29
|
+
Requires-Dist: websockets>=11.0.0; extra == "dev"
|
|
30
|
+
Requires-Dist: ipython>=8.14.0; extra == "dev"
|
|
31
|
+
|
|
32
|
+
# chuk-puzzles-gym
|
|
33
|
+
|
|
34
|
+
[](https://pypi.org/project/chuk-puzzles-gym/)
|
|
35
|
+
[](https://github.com/chrishayuk/chuk-puzzles-gym/actions)
|
|
36
|
+
[](htmlcov/index.html)
|
|
37
|
+
[](https://www.python.org/downloads/)
|
|
38
|
+
[](https://github.com/astral-sh/ruff)
|
|
39
|
+
[](https://docs.pydantic.dev/)
|
|
40
|
+
[](http://mypy-lang.org/)
|
|
41
|
+
|
|
42
|
+
A **multi-game puzzle gym** for **LLM training and benchmarking**, hosting 24 different logic puzzle types with synthetic data generation. Built using [chuk-gym-core](https://github.com/chrishayuk/chuk-gym-core) and [chuk-protocol-server](https://github.com/chrishayuk/chuk-protocol-server).
|
|
43
|
+
|
|
44
|
+
**Perfect for:**
|
|
45
|
+
- 🤖 **LLM Agent Testing** - Benchmark reasoning capabilities across constraint types
|
|
46
|
+
- 🎯 **CP-SAT Education** - Learn constraint programming through progressive puzzles
|
|
47
|
+
- 💼 **Business Demos** - Map puzzle patterns to real scheduling, optimization, and allocation problems
|
|
48
|
+
- 🔧 **MCP Tool Integration** - Showcase CHUK + constraint solver workflows
|
|
49
|
+
|
|
50
|
+
Each puzzle demonstrates specific **constraint patterns** (AllDifferent, Optimization, Connectivity, Boolean SAT, etc.) and maps to **business use cases** (scheduling, resource allocation, routing, etc.).
|
|
51
|
+
|
|
52
|
+
## Try It Now
|
|
53
|
+
|
|
54
|
+
### Run Locally with uvx
|
|
55
|
+
|
|
56
|
+
No installation required - run directly with [uvx](https://docs.astral.sh/uv/guides/tools/):
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
# Start the puzzle server
|
|
60
|
+
uvx chuk-puzzles-gym
|
|
61
|
+
|
|
62
|
+
# Generate training datasets
|
|
63
|
+
uvx --from chuk-puzzles-gym chuk-puzzles-export -g sudoku -n 100 -o data.jsonl
|
|
64
|
+
|
|
65
|
+
# Benchmark an agent
|
|
66
|
+
uvx --from chuk-puzzles-gym chuk-puzzles-eval -g sudoku -n 10
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Connect to Live Demo
|
|
70
|
+
|
|
71
|
+
A live demo server is running on Fly.io:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Connect via Telnet (IPv6)
|
|
75
|
+
telnet 2a09:8280:1::b8:79f4:0 8023
|
|
76
|
+
|
|
77
|
+
# WebSocket connections
|
|
78
|
+
ws://chuk-puzzles-gym.fly.dev:8025/ws
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Once connected, type `help` to see available games, or `sudoku easy` to start playing!
|
|
82
|
+
|
|
83
|
+
## Features
|
|
84
|
+
|
|
85
|
+
- **24 Puzzle Games** with three difficulty levels each (easy, medium, hard)
|
|
86
|
+
- **7 Classic Logic Puzzles** - Sudoku, KenKen, Kakuro, Binary, Futoshiki, Nonogram, Logic Grid
|
|
87
|
+
- **7 Advanced CP-SAT Puzzles** - Killer Sudoku, Lights Out, Mastermind, Slitherlink, Bridges, Hitori, Shikaku
|
|
88
|
+
- **5 Specialized Constraint Puzzles** - Hidato, Tents and Trees, Fillomino, Star Battle, Sokoban
|
|
89
|
+
- **2 Optimization Challenges** - Knapsack, Task Scheduler
|
|
90
|
+
- **3 Advanced Reasoning Puzzles** - Nurikabe, Einstein's Puzzle, Minesweeper
|
|
91
|
+
- **Agent-Friendly Mode** - Structured output with clear markers for AI agents and tools
|
|
92
|
+
- Enable with `mode agent` command
|
|
93
|
+
- Machine-parseable grid format with clear start/end markers
|
|
94
|
+
- Compact output optimized for LLM tool integration
|
|
95
|
+
- **Evaluation Harness** (`chuk-puzzles-eval`) - Built-in benchmarking CLI
|
|
96
|
+
- Batch evaluation with configurable episodes
|
|
97
|
+
- Multiple output formats (JSON, CSV, Markdown)
|
|
98
|
+
- Metrics: moves, invalid moves, hints, solve time
|
|
99
|
+
- Reproducible with deterministic seeds
|
|
100
|
+
- **Dataset Export** (`chuk-puzzles-export`) - Synthetic data generation for LLM training
|
|
101
|
+
- JSONL output with complete problem definitions and solutions
|
|
102
|
+
- Step-by-step reasoning traces for teacher-forcing
|
|
103
|
+
- Constraint metadata and difficulty profiles
|
|
104
|
+
- Compatible with chuk-gym-core schema
|
|
105
|
+
- **Multiple transport protocols:**
|
|
106
|
+
- **Telnet** (port 8023) - Classic telnet protocol
|
|
107
|
+
- **TCP** (port 8024) - Raw TCP connections
|
|
108
|
+
- **WebSocket** (port 8025) - Modern WebSocket protocol
|
|
109
|
+
- **WebSocket-Telnet** (port 8026) - WebSocket with telnet negotiation
|
|
110
|
+
- **Interactive menu-driven interface** with game selection
|
|
111
|
+
- **Hint system** for when you're stuck
|
|
112
|
+
- **Solution checker** and auto-solver for all games
|
|
113
|
+
- **Clean ASCII art grids** - perfectly aligned for easy parsing
|
|
114
|
+
- **Deterministic seeding** - Replay any puzzle with the same seed
|
|
115
|
+
- **Gymnasium-compatible RL Environment** (`PuzzleEnv`) for training agents
|
|
116
|
+
- **Comprehensive test suite** (1067 tests, 94% coverage)
|
|
117
|
+
- **Modern Python best practices:**
|
|
118
|
+
- **Pydantic v2 native** - All models use ConfigDict for type safety
|
|
119
|
+
- **Async native** - Full async/await support throughout
|
|
120
|
+
- **Type-safe** - No dict["key"] patterns, only typed models
|
|
121
|
+
- **Enum-based** - No magic strings, proper enum constants
|
|
122
|
+
- **Modern Python packaging** with pyproject.toml
|
|
123
|
+
- **Docker and Fly.io deployment** ready
|
|
124
|
+
|
|
125
|
+
## Available Games
|
|
126
|
+
|
|
127
|
+
### Classic Logic Puzzles
|
|
128
|
+
|
|
129
|
+
| Game | Grid Size | Constraint Types | Status |
|
|
130
|
+
|------|-----------|------------------|--------|
|
|
131
|
+
| **Sudoku** | 9×9 | AllDifferent (rows, cols, boxes) | ✅ Complete |
|
|
132
|
+
| **KenKen** | 4×4 to 6×6 | Arithmetic cages + AllDifferent | ✅ Complete |
|
|
133
|
+
| **Kakuro** | 5×5 to 8×8 | Sum constraints + AllDifferent | ✅ Complete |
|
|
134
|
+
| **Binary Puzzle** | 6×6 to 10×10 | Adjacency limits + Equal counts | ✅ Complete |
|
|
135
|
+
| **Futoshiki** | 4×4 to 6×6 | Inequalities + AllDifferent | ✅ Complete |
|
|
136
|
+
| **Nonogram** | 5×5 to 10×10 | Line sum constraints + Blocks | ✅ Complete |
|
|
137
|
+
| **Logic Grid** | Variable | Category associations + Logic | ✅ Complete |
|
|
138
|
+
|
|
139
|
+
### Advanced CP-SAT Puzzles
|
|
140
|
+
|
|
141
|
+
| Game | Grid Size | Constraint Types | Status |
|
|
142
|
+
|------|-----------|------------------|--------|
|
|
143
|
+
| **Killer Sudoku** | 9×9 | Linear constraints + AllDifferent + Cages | ✅ Complete |
|
|
144
|
+
| **Lights Out** | 5×5 to 7×7 | Boolean XOR constraints (SAT) | ✅ Complete |
|
|
145
|
+
| **Mastermind** | 4-6 pegs | Deduction + Feedback constraints | ✅ Complete |
|
|
146
|
+
| **Slitherlink** | 5×5 to 10×10 | Global loop + Edge constraints | ✅ Complete |
|
|
147
|
+
| **Bridges** | 7×7 to 11×11 | Connectivity + Degree constraints | ✅ Complete |
|
|
148
|
+
| **Hitori** | 5×5 to 9×9 | AllDifferent + Adjacency + Connectivity | ✅ Complete |
|
|
149
|
+
| **Shikaku** | 6×6 to 10×10 | Area partitioning + Rectangle covering | ✅ Complete |
|
|
150
|
+
|
|
151
|
+
### Specialized Constraint Puzzles
|
|
152
|
+
|
|
153
|
+
| Game | Grid Size | Constraint Types | Status |
|
|
154
|
+
|------|-----------|------------------|--------|
|
|
155
|
+
| **Hidato** | 5×5 to 9×9 | Sequential adjacency + Hamiltonian path | ✅ Complete |
|
|
156
|
+
| **Tents and Trees** | 6×6 to 10×10 | Bipartite matching + Adjacency avoidance | ✅ Complete |
|
|
157
|
+
| **Fillomino** | 6×6 to 10×10 | Region growth + Self-referential constraints | ✅ Complete |
|
|
158
|
+
| **Star Battle** | 6×6 to 10×10 | Multi-region placement + Adjacency avoidance | ✅ Complete |
|
|
159
|
+
| **Sokoban** | 6×6 to 10×10 | Spatial planning + Irreversible actions (optimization) | ✅ Complete |
|
|
160
|
+
|
|
161
|
+
### Optimization Challenges
|
|
162
|
+
|
|
163
|
+
| Game | Problem Size | Constraint Types | Status |
|
|
164
|
+
|------|-------------|------------------|--------|
|
|
165
|
+
| **Knapsack** | 5-12 items | Value maximization + Capacity constraint | ✅ Complete |
|
|
166
|
+
| **Task Scheduler** | 4-8 tasks | Makespan minimization + Dependencies + Resources | ✅ Complete |
|
|
167
|
+
|
|
168
|
+
### Advanced Reasoning Puzzles
|
|
169
|
+
|
|
170
|
+
| Game | Grid Size | Constraint Types | Status |
|
|
171
|
+
|------|-----------|------------------|--------|
|
|
172
|
+
| **Nurikabe** | 6×6 to 10×10 | Connectivity + Island sizes + No 2×2 blocks | ✅ Complete |
|
|
173
|
+
| **Einstein's Puzzle** | 5 houses × 5 attributes | Multi-attribute deduction + Logic chains | ✅ Complete |
|
|
174
|
+
| **Minesweeper** | 6×6 to 10×10 | Probabilistic reasoning + Safe deduction | ✅ Complete |
|
|
175
|
+
|
|
176
|
+
## Solver Profiles & Business Mapping
|
|
177
|
+
|
|
178
|
+
Each game includes metadata for **constraint types**, **business analogies**, and **complexity profiles**, making it easy to:
|
|
179
|
+
|
|
180
|
+
- **Select puzzles by constraint pattern** - Need to demonstrate Boolean SAT? → Lights Out
|
|
181
|
+
- **Map to business use cases** - Task Scheduler → Sprint Planning, Knapsack → Portfolio Selection
|
|
182
|
+
- **Benchmark LLM reasoning** - Compare model performance across different constraint densities
|
|
183
|
+
|
|
184
|
+
### Example: Query Games by Profile
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from chuk_puzzles_gym.games import AVAILABLE_GAMES
|
|
188
|
+
|
|
189
|
+
# Find all optimization problems
|
|
190
|
+
optimization_games = [
|
|
191
|
+
name for name, game_class in AVAILABLE_GAMES.items()
|
|
192
|
+
if "optimization" in game_class().constraint_types
|
|
193
|
+
]
|
|
194
|
+
# → ['knapsack', 'scheduler']
|
|
195
|
+
|
|
196
|
+
# Find games that model resource allocation
|
|
197
|
+
resource_games = [
|
|
198
|
+
name for name, game_class in AVAILABLE_GAMES.items()
|
|
199
|
+
if "resource_allocation" in game_class().business_analogies
|
|
200
|
+
]
|
|
201
|
+
# → ['scheduler', 'knapsack']
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Quick Reference: Constraint Types to Business Problems
|
|
205
|
+
|
|
206
|
+
| Constraint Pattern | Puzzle Examples | Business Use Cases |
|
|
207
|
+
|-------------------|-----------------|-------------------|
|
|
208
|
+
| **Optimization** | Knapsack, Scheduler | Portfolio selection, Sprint planning, Budget allocation |
|
|
209
|
+
| **Precedence** | Scheduler | Project dependencies, Workflow sequencing |
|
|
210
|
+
| **Sequential Adjacency** | Hidato | Path planning, Route sequencing, Tour optimization |
|
|
211
|
+
| **Hamiltonian Path** | Hidato | Traveling salesman, Circuit design |
|
|
212
|
+
| **Bipartite Matching** | Tents and Trees | Job assignment, Resource pairing |
|
|
213
|
+
| **Region Growth** | Fillomino | Territory expansion, Cluster formation |
|
|
214
|
+
| **Spatial Planning** | Sokoban | Warehouse logistics, Movement planning |
|
|
215
|
+
| **Connectivity** | Nurikabe, Slitherlink | Network design, Routing, Zone planning |
|
|
216
|
+
| **Global Loop** | Slitherlink | Circuit design, Path finding |
|
|
217
|
+
| **Boolean SAT** | Lights Out | Feature dependencies, Toggle systems |
|
|
218
|
+
| **Cage Sums** | Killer Sudoku, Kakuro | Team budgets, Grouped constraints |
|
|
219
|
+
| **AllDifferent** | Sudoku, KenKen | Resource uniqueness, Assignment problems |
|
|
220
|
+
|
|
221
|
+
## Quick Start
|
|
222
|
+
|
|
223
|
+
### Prerequisites
|
|
224
|
+
|
|
225
|
+
- Python 3.11 or higher
|
|
226
|
+
- [UV](https://github.com/astral-sh/uv) (recommended) or pip
|
|
227
|
+
|
|
228
|
+
### Installation
|
|
229
|
+
|
|
230
|
+
#### Using uvx (No Installation Required)
|
|
231
|
+
|
|
232
|
+
Run directly without installing using [uvx](https://docs.astral.sh/uv/guides/tools/):
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
# Run the puzzle server
|
|
236
|
+
uvx chuk-puzzles-gym
|
|
237
|
+
|
|
238
|
+
# Generate synthetic datasets
|
|
239
|
+
uvx --from chuk-puzzles-gym chuk-puzzles-export -o puzzles.jsonl
|
|
240
|
+
|
|
241
|
+
# Run evaluation harness
|
|
242
|
+
uvx --from chuk-puzzles-gym chuk-puzzles-eval -g sudoku -n 10
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
#### From PyPI
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
# Install with pip
|
|
249
|
+
pip install chuk-puzzles-gym
|
|
250
|
+
|
|
251
|
+
# Or with uv
|
|
252
|
+
uv pip install chuk-puzzles-gym
|
|
253
|
+
|
|
254
|
+
# Then run commands directly
|
|
255
|
+
chuk-puzzles-server # Start the server
|
|
256
|
+
chuk-puzzles-export # Generate datasets
|
|
257
|
+
chuk-puzzles-eval # Run evaluation
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
#### From Source (Development)
|
|
261
|
+
|
|
262
|
+
##### Using UV (Recommended)
|
|
263
|
+
|
|
264
|
+
```bash
|
|
265
|
+
# Clone the repository
|
|
266
|
+
git clone https://github.com/chrishayuk/chuk-puzzles-gym.git
|
|
267
|
+
cd chuk-puzzles-gym
|
|
268
|
+
|
|
269
|
+
# Install UV if you haven't already
|
|
270
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
271
|
+
|
|
272
|
+
# Install development dependencies
|
|
273
|
+
make dev-install
|
|
274
|
+
|
|
275
|
+
# Run the server
|
|
276
|
+
make run
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
##### Using pip
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
# Clone the repository
|
|
283
|
+
git clone https://github.com/chrishayuk/chuk-puzzles-gym.git
|
|
284
|
+
cd chuk-puzzles-gym
|
|
285
|
+
|
|
286
|
+
# Install in development mode with dev dependencies
|
|
287
|
+
pip install -e ".[dev]"
|
|
288
|
+
|
|
289
|
+
# Run the server
|
|
290
|
+
PYTHONPATH=. uv run --with chuk-protocol-server chuk-protocol-server server-launcher -c config.yaml
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
### Using Make (All Commands)
|
|
294
|
+
|
|
295
|
+
```bash
|
|
296
|
+
# See all available commands
|
|
297
|
+
make help
|
|
298
|
+
|
|
299
|
+
# Development workflow
|
|
300
|
+
make dev-install # Install dev dependencies
|
|
301
|
+
make run # Run the server
|
|
302
|
+
make test # Run tests
|
|
303
|
+
make test-cov # Run tests with coverage report
|
|
304
|
+
make check # Run linting and type checking
|
|
305
|
+
make format # Format code with ruff
|
|
306
|
+
make security # Run security checks
|
|
307
|
+
|
|
308
|
+
# Docker workflow
|
|
309
|
+
make docker-build # Build Docker image
|
|
310
|
+
make docker-run # Run in Docker container
|
|
311
|
+
|
|
312
|
+
# Examples
|
|
313
|
+
make example-telnet # Browse games via telnet
|
|
314
|
+
make example-telnet-sudoku # Sudoku demo
|
|
315
|
+
make example-telnet-kenken # KenKen demo
|
|
316
|
+
make example-ws # WebSocket tour
|
|
317
|
+
make example-ws-interactive # Interactive WebSocket mode
|
|
318
|
+
|
|
319
|
+
# Deployment
|
|
320
|
+
make fly-deploy # Deploy to Fly.io
|
|
321
|
+
make fly-logs # View Fly.io logs
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Docker Setup
|
|
325
|
+
|
|
326
|
+
Build and run with Docker:
|
|
327
|
+
|
|
328
|
+
```bash
|
|
329
|
+
# Using Make
|
|
330
|
+
make docker-run
|
|
331
|
+
|
|
332
|
+
# Or manually
|
|
333
|
+
docker build -t chuk-puzzles-gym .
|
|
334
|
+
docker run -p 8023:8023 -p 8024:8024 -p 8025:8025 -p 8026:8026 chuk-puzzles-gym
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
## Connecting to the Server
|
|
338
|
+
|
|
339
|
+
### Local Development
|
|
340
|
+
|
|
341
|
+
**Via Telnet:**
|
|
342
|
+
```bash
|
|
343
|
+
telnet localhost 8023
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
**Via Netcat (TCP):**
|
|
347
|
+
```bash
|
|
348
|
+
nc localhost 8024
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
**Via WebSocket:**
|
|
352
|
+
```
|
|
353
|
+
ws://localhost:8025/ws
|
|
354
|
+
ws://localhost:8026/ws
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
## Game Menu
|
|
358
|
+
|
|
359
|
+
When you connect, you'll see the main menu:
|
|
360
|
+
|
|
361
|
+
```
|
|
362
|
+
==================================================
|
|
363
|
+
WELCOME TO THE PUZZLE ARCADE!
|
|
364
|
+
==================================================
|
|
365
|
+
|
|
366
|
+
CLASSIC LOGIC PUZZLES:
|
|
367
|
+
1) Sudoku - Classic logic puzzle - fill 9x9 grid with digits 1-9
|
|
368
|
+
2) KenKen - Arithmetic cage puzzle - combine math and logic
|
|
369
|
+
3) Kakuro - Crossword math puzzle - fill runs with unique digits that sum to clues
|
|
370
|
+
4) Binary Puzzle - Fill grid with 0s and 1s - no three in a row, equal counts
|
|
371
|
+
5) Futoshiki - Inequality number puzzle - fill grid with constraints
|
|
372
|
+
6) Nonogram - Picture logic puzzle - reveal image from number clues
|
|
373
|
+
7) Logic Grid - Deductive reasoning puzzle - match attributes using logic
|
|
374
|
+
|
|
375
|
+
ADVANCED CP-SAT PUZZLES:
|
|
376
|
+
8) Killer Sudoku - Sudoku + Kakuro - regions must sum to targets
|
|
377
|
+
9) Lights Out - Toggle lights to turn all off - XOR constraint puzzle
|
|
378
|
+
10) Mastermind - Code-breaking with logical deduction and feedback
|
|
379
|
+
11) Slitherlink - Draw a single loop - numbers show edge counts
|
|
380
|
+
12) Bridges - Connect islands with bridges - satisfy all numbers
|
|
381
|
+
13) Hitori - Shade cells to eliminate duplicates - no adjacent shading
|
|
382
|
+
14) Shikaku - Divide grid into rectangles matching areas
|
|
383
|
+
|
|
384
|
+
SPECIALIZED CONSTRAINT PUZZLES:
|
|
385
|
+
15) Hidato - Sequential path puzzle - connect numbers adjacently
|
|
386
|
+
16) Tents - Place tents next to trees - bipartite matching puzzle
|
|
387
|
+
17) Fillomino - Fill regions with numbers matching region size
|
|
388
|
+
18) Star Battle - Place stars avoiding adjacency - multi-region placement
|
|
389
|
+
19) Sokoban - Push boxes to targets - spatial planning puzzle
|
|
390
|
+
|
|
391
|
+
OPTIMIZATION CHALLENGES:
|
|
392
|
+
20) Knapsack - Maximize value within capacity constraints
|
|
393
|
+
21) Task Scheduler - Minimize makespan with dependencies and resources
|
|
394
|
+
|
|
395
|
+
ADVANCED REASONING PUZZLES:
|
|
396
|
+
22) Nurikabe - Island and sea puzzle - connectivity constraints
|
|
397
|
+
23) Einstein's Puzzle - Who owns the fish? Multi-attribute deduction
|
|
398
|
+
24) Minesweeper - Find all mines using logical deduction
|
|
399
|
+
|
|
400
|
+
Commands:
|
|
401
|
+
<number> - Select game by number
|
|
402
|
+
<name> - Select game by name (e.g., 'sudoku')
|
|
403
|
+
help - Show this menu again
|
|
404
|
+
quit - Exit the server
|
|
405
|
+
==================================================
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
## Agent-Friendly Mode
|
|
409
|
+
|
|
410
|
+
The server includes a special **agent mode** designed for AI tools and LLM integration:
|
|
411
|
+
|
|
412
|
+
### Enabling Agent Mode
|
|
413
|
+
|
|
414
|
+
```
|
|
415
|
+
> mode agent
|
|
416
|
+
Output mode set to: agent
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
### Agent Mode Features
|
|
420
|
+
|
|
421
|
+
**Structured Output** - Grid data is wrapped with clear start/end markers:
|
|
422
|
+
```
|
|
423
|
+
---GAME-START---
|
|
424
|
+
GAME: Sudoku
|
|
425
|
+
DIFFICULTY: medium
|
|
426
|
+
MOVES: 3
|
|
427
|
+
---GRID-START---
|
|
428
|
+
| 1 2 3 | 4 5 6 | 7 8 9 |
|
|
429
|
+
-------------------------
|
|
430
|
+
1 | . . 3 | . 2 . | 6 . . |
|
|
431
|
+
...
|
|
432
|
+
---GRID-END---
|
|
433
|
+
---GAME-END---
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
**Benefits for AI Agents:**
|
|
437
|
+
- Easy parsing with regex: `---GRID-START---(.*?)---GRID-END---`
|
|
438
|
+
- Consistent metadata format (GAME, DIFFICULTY, MOVES)
|
|
439
|
+
- No decorative text or banners to filter out
|
|
440
|
+
- Minimal token usage compared to normal mode
|
|
441
|
+
|
|
442
|
+
**Switching Modes:**
|
|
443
|
+
- `mode normal` - Human-friendly output (default)
|
|
444
|
+
- `mode agent` - Machine-parseable structured output
|
|
445
|
+
- `mode compact` - Reserved for future use
|
|
446
|
+
|
|
447
|
+
## Gymnasium-Compatible RL Environment
|
|
448
|
+
|
|
449
|
+
The project includes a **Gymnasium-compatible environment** for training reinforcement learning agents:
|
|
450
|
+
|
|
451
|
+
### Quick Start
|
|
452
|
+
|
|
453
|
+
```python
|
|
454
|
+
from chuk_puzzles_gym.gym_env import PuzzleEnv
|
|
455
|
+
|
|
456
|
+
# Create environment for any of the 24 games
|
|
457
|
+
env = PuzzleEnv("sudoku", difficulty="easy", seed=42)
|
|
458
|
+
|
|
459
|
+
# Reset to start a new episode
|
|
460
|
+
obs, info = await env.reset()
|
|
461
|
+
|
|
462
|
+
# Take actions (text commands or tuples)
|
|
463
|
+
obs, reward, terminated, truncated, info = await env.step("place 1 1 5")
|
|
464
|
+
|
|
465
|
+
# Or use tuple format
|
|
466
|
+
obs, reward, terminated, truncated, info = await env.step(("place", 1, 1, 5))
|
|
467
|
+
|
|
468
|
+
# Get available games
|
|
469
|
+
games = PuzzleEnv.available_games()
|
|
470
|
+
# → ['sudoku', 'kenken', 'minesweeper', ...]
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
### Features
|
|
474
|
+
|
|
475
|
+
- **All 24 games** accessible through unified API
|
|
476
|
+
- **Configurable rewards** for correct moves, invalid attempts, completion bonuses
|
|
477
|
+
- **Hint system** with optional budget limits
|
|
478
|
+
- **Solver-free mode** for pure reasoning benchmarks
|
|
479
|
+
- **Efficiency scoring** based on optimal step counts
|
|
480
|
+
- **Deterministic seeding** for reproducible experiments
|
|
481
|
+
|
|
482
|
+
### Observation Space
|
|
483
|
+
|
|
484
|
+
```python
|
|
485
|
+
obs = {
|
|
486
|
+
"game": "sudoku",
|
|
487
|
+
"difficulty": "easy",
|
|
488
|
+
"seed": 42,
|
|
489
|
+
"moves": 5,
|
|
490
|
+
"invalid_moves": 1,
|
|
491
|
+
"hints_used": 2,
|
|
492
|
+
"is_complete": False,
|
|
493
|
+
"grid": [[4, 0, 8, ...], ...] # Game-specific state
|
|
494
|
+
}
|
|
495
|
+
```
|
|
496
|
+
|
|
497
|
+
### Reward Configuration
|
|
498
|
+
|
|
499
|
+
```python
|
|
500
|
+
env = PuzzleEnv("kenken", reward_config={
|
|
501
|
+
"correct_placement": 1.0, # Reward for valid moves
|
|
502
|
+
"invalid_attempt": -0.5, # Penalty for invalid moves
|
|
503
|
+
"completion_bonus": 10.0, # Bonus for solving
|
|
504
|
+
"hint_penalty": -0.1, # Penalty for using hints
|
|
505
|
+
"efficiency_multiplier": 2.0, # Scales completion bonus by efficiency
|
|
506
|
+
})
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
### Solver Configuration
|
|
510
|
+
|
|
511
|
+
```python
|
|
512
|
+
from chuk_puzzles_gym.models import SolverConfig
|
|
513
|
+
|
|
514
|
+
# Solver-free mode (no hints allowed)
|
|
515
|
+
config = SolverConfig.solver_free()
|
|
516
|
+
env = PuzzleEnv("sudoku", solver_config=config)
|
|
517
|
+
|
|
518
|
+
# Limited hints
|
|
519
|
+
config = SolverConfig(hint_budget=5, hint_penalty=0.1)
|
|
520
|
+
env = PuzzleEnv("sudoku", solver_config=config)
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
## Evaluation Harness
|
|
524
|
+
|
|
525
|
+
The project includes a built-in **evaluation harness** for benchmarking puzzle-solving agents:
|
|
526
|
+
|
|
527
|
+
### Quick Start
|
|
528
|
+
|
|
529
|
+
```bash
|
|
530
|
+
# List all available games
|
|
531
|
+
chuk-puzzles-eval --list-games
|
|
532
|
+
|
|
533
|
+
# Evaluate a specific game (10 episodes, medium difficulty)
|
|
534
|
+
chuk-puzzles-eval sudoku -d medium -n 10 -v
|
|
535
|
+
|
|
536
|
+
# Evaluate all games (5 episodes each)
|
|
537
|
+
chuk-puzzles-eval --all -d easy -n 5
|
|
538
|
+
|
|
539
|
+
# Output as JSON for analysis
|
|
540
|
+
chuk-puzzles-eval sudoku -n 20 -o json > results.json
|
|
541
|
+
```
|
|
542
|
+
|
|
543
|
+
### Using Make Targets
|
|
544
|
+
|
|
545
|
+
```bash
|
|
546
|
+
make eval # Quick evaluation (3 episodes per game)
|
|
547
|
+
make eval-sudoku # Evaluate Sudoku (10 episodes)
|
|
548
|
+
make eval-all # Evaluate all games (10 episodes each)
|
|
549
|
+
make eval-json # Output as JSON
|
|
550
|
+
make list-games # List available games
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
### Sample Output
|
|
554
|
+
|
|
555
|
+
```
|
|
556
|
+
Sudoku Medium Evaluation (10 episodes)
|
|
557
|
+
==================================================
|
|
558
|
+
Solved: 10/10 (100.0%)
|
|
559
|
+
Avg Moves: 45.3
|
|
560
|
+
Avg Invalid: 0.0
|
|
561
|
+
Avg Time: 12ms
|
|
562
|
+
```
|
|
563
|
+
|
|
564
|
+
### Output Formats
|
|
565
|
+
|
|
566
|
+
- **text** (default) - Human-readable summary
|
|
567
|
+
- **json** - Structured JSON for programmatic analysis
|
|
568
|
+
- **csv** - Spreadsheet-compatible format
|
|
569
|
+
- **markdown** - Documentation-ready tables
|
|
570
|
+
|
|
571
|
+
### Metrics Collected
|
|
572
|
+
|
|
573
|
+
| Metric | Description |
|
|
574
|
+
|--------|-------------|
|
|
575
|
+
| `solved` | Whether the puzzle was solved |
|
|
576
|
+
| `moves_made` | Number of valid moves |
|
|
577
|
+
| `invalid_moves` | Number of rejected moves |
|
|
578
|
+
| `hints_used` | Number of hints requested |
|
|
579
|
+
| `wall_time_ms` | Time to solve in milliseconds |
|
|
580
|
+
| `seed` | Puzzle seed for reproducibility |
|
|
581
|
+
|
|
582
|
+
## Dataset Export
|
|
583
|
+
|
|
584
|
+
Generate synthetic puzzle datasets for training and benchmarking LLMs and constraint solvers. The export system produces JSONL files with complete problem definitions, solutions, and step-by-step reasoning traces.
|
|
585
|
+
|
|
586
|
+
### CLI Usage
|
|
587
|
+
|
|
588
|
+
```bash
|
|
589
|
+
# Generate 100 puzzles per game/difficulty for all 24 games
|
|
590
|
+
chuk-puzzles-export -o puzzles.jsonl
|
|
591
|
+
|
|
592
|
+
# Specific games only
|
|
593
|
+
chuk-puzzles-export -g sudoku kenken einstein -n 100 -o selected.jsonl
|
|
594
|
+
|
|
595
|
+
# Single difficulty level
|
|
596
|
+
chuk-puzzles-export -d easy -n 50 -o easy_puzzles.jsonl
|
|
597
|
+
|
|
598
|
+
# Multiple difficulties
|
|
599
|
+
chuk-puzzles-export -d easy medium -n 100 -o train_data.jsonl
|
|
600
|
+
|
|
601
|
+
# Reproducible generation with seed
|
|
602
|
+
chuk-puzzles-export -g sudoku -s 0 -n 1000 -o sudoku_seed0.jsonl
|
|
603
|
+
|
|
604
|
+
# Without step-by-step traces (smaller files)
|
|
605
|
+
chuk-puzzles-export --no-trace -n 500 -o compact.jsonl
|
|
606
|
+
|
|
607
|
+
# List all available games
|
|
608
|
+
chuk-puzzles-export --list-games
|
|
609
|
+
```
|
|
610
|
+
|
|
611
|
+
### CLI Options
|
|
612
|
+
|
|
613
|
+
| Option | Description | Default |
|
|
614
|
+
|--------|-------------|---------|
|
|
615
|
+
| `-o, --output` | Output file path | `puzzles.jsonl` |
|
|
616
|
+
| `-g, --games` | Games to include (space-separated) | All games |
|
|
617
|
+
| `-n, --count` | Problems per game/difficulty combo | 100 |
|
|
618
|
+
| `-d, --difficulties` | Difficulty levels to include | easy, medium, hard |
|
|
619
|
+
| `-s, --seed` | Starting seed for reproducibility | 0 |
|
|
620
|
+
| `--no-trace` | Exclude step-by-step solution traces | False |
|
|
621
|
+
| `--list-games` | List available games and exit | - |
|
|
622
|
+
|
|
623
|
+
### Python API
|
|
624
|
+
|
|
625
|
+
```python
|
|
626
|
+
import asyncio
|
|
627
|
+
from chuk_puzzles_gym.export import DatasetExporter, generate_dataset
|
|
628
|
+
from chuk_gym_core import DifficultyLevel
|
|
629
|
+
|
|
630
|
+
# Quick generation with async function
|
|
631
|
+
async def generate():
|
|
632
|
+
total = await generate_dataset(
|
|
633
|
+
output_path="data.jsonl",
|
|
634
|
+
games=["sudoku", "kenken", "einstein"],
|
|
635
|
+
count_per_game=100,
|
|
636
|
+
difficulties=["easy", "medium", "hard"],
|
|
637
|
+
include_trace=True,
|
|
638
|
+
)
|
|
639
|
+
print(f"Generated {total} problems")
|
|
640
|
+
|
|
641
|
+
asyncio.run(generate())
|
|
642
|
+
|
|
643
|
+
# Fine-grained control with context manager
|
|
644
|
+
async def export_custom():
|
|
645
|
+
with DatasetExporter("puzzles.jsonl", include_trace=True) as exporter:
|
|
646
|
+
# Export specific game
|
|
647
|
+
await exporter.export_game(
|
|
648
|
+
game_name="sudoku",
|
|
649
|
+
count=500,
|
|
650
|
+
difficulty=DifficultyLevel.MEDIUM,
|
|
651
|
+
start_seed=0,
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
# Export all games
|
|
655
|
+
await exporter.export_all_games(
|
|
656
|
+
count_per_game=50,
|
|
657
|
+
difficulties=[DifficultyLevel.EASY, DifficultyLevel.HARD],
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
print(f"Total exported: {exporter.count}")
|
|
661
|
+
|
|
662
|
+
asyncio.run(export_custom())
|
|
663
|
+
```
|
|
664
|
+
|
|
665
|
+
### Output Format
|
|
666
|
+
|
|
667
|
+
Each line in the JSONL file contains a complete problem definition:
|
|
668
|
+
|
|
669
|
+
```json
|
|
670
|
+
{
|
|
671
|
+
"id": "sudoku_medium_42",
|
|
672
|
+
"seed": 42,
|
|
673
|
+
"domain": "sudoku",
|
|
674
|
+
"difficulty": "medium",
|
|
675
|
+
"prompt": "Sudoku: Classic 9x9 logic puzzle...\n\nRULES:\n...\n\n[grid]",
|
|
676
|
+
"initial_state": [[0,0,3,...], ...],
|
|
677
|
+
"gold_answer": "[[4,8,3,...], ...]",
|
|
678
|
+
"constraint_types": ["all_different_rows", "all_different_columns", "all_different_boxes"],
|
|
679
|
+
"business_analogies": ["resource_allocation", "scheduling", "assignment_problems"],
|
|
680
|
+
"difficulty_profile": {
|
|
681
|
+
"logic_depth": 45,
|
|
682
|
+
"branching_factor": 3.2,
|
|
683
|
+
"state_observability": 0.88,
|
|
684
|
+
"constraint_density": 0.75
|
|
685
|
+
},
|
|
686
|
+
"operation_count": 47,
|
|
687
|
+
"tags": ["sudoku", "medium"]
|
|
688
|
+
}
|
|
689
|
+
```
|
|
690
|
+
|
|
691
|
+
### Solution Traces
|
|
692
|
+
|
|
693
|
+
When `include_trace=True` (default), each problem includes step-by-step solution traces for teacher-forcing training:
|
|
694
|
+
|
|
695
|
+
```json
|
|
696
|
+
{
|
|
697
|
+
"problem": { ... },
|
|
698
|
+
"trace": {
|
|
699
|
+
"problem_id": "sudoku_medium_42",
|
|
700
|
+
"steps": [
|
|
701
|
+
{
|
|
702
|
+
"index": 0,
|
|
703
|
+
"operation": "PLACE",
|
|
704
|
+
"before_state": "cell(r1,c1)=empty",
|
|
705
|
+
"after_state": "cell(r1,c1)=4",
|
|
706
|
+
"output_value": 4,
|
|
707
|
+
"position": [1, 1],
|
|
708
|
+
"rule_applied": "naked_single_row",
|
|
709
|
+
"explanation": "Place 4 at row 1, column 1. This is the only valid digit considering row 1, column 1, and box 1 constraints."
|
|
710
|
+
},
|
|
711
|
+
{
|
|
712
|
+
"index": 1,
|
|
713
|
+
"operation": "PLACE",
|
|
714
|
+
"before_state": "cell(r1,c3)=empty",
|
|
715
|
+
"after_state": "cell(r1,c3)=7",
|
|
716
|
+
"output_value": 7,
|
|
717
|
+
"position": [1, 3],
|
|
718
|
+
"rule_applied": "naked_single_box",
|
|
719
|
+
"explanation": "Place 7 at row 1, column 3..."
|
|
720
|
+
}
|
|
721
|
+
],
|
|
722
|
+
"checkpoints": [0, 12, 24, 47]
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
```
|
|
726
|
+
|
|
727
|
+
### Trace Operations
|
|
728
|
+
|
|
729
|
+
| Operation | Description | Used By |
|
|
730
|
+
|-----------|-------------|---------|
|
|
731
|
+
| `PLACE` | Place a value in a cell | Sudoku, KenKen, Nonogram, etc. |
|
|
732
|
+
| `ELIMINATE` | Mark a cell as excluded/shaded | Hitori, Minesweeper |
|
|
733
|
+
| `DEDUCE` | Logical deduction step | Einstein, Logic Grid, Mastermind |
|
|
734
|
+
|
|
735
|
+
### Rule Types by Game
|
|
736
|
+
|
|
737
|
+
| Game | Rules Applied |
|
|
738
|
+
|------|--------------|
|
|
739
|
+
| Sudoku | `naked_single_row`, `naked_single_column`, `naked_single_box`, `elimination` |
|
|
740
|
+
| Binary | `balance_constraint` |
|
|
741
|
+
| KenKen/Kakuro | `arithmetic_constraint` |
|
|
742
|
+
| Nonogram | `line_constraint` |
|
|
743
|
+
| Einstein | `logical_deduction` |
|
|
744
|
+
| Hitori | `duplicate_elimination` |
|
|
745
|
+
| Bridges | `connectivity_constraint` |
|
|
746
|
+
| Slitherlink | `loop_constraint` |
|
|
747
|
+
| Others | `constraint_propagation` |
|
|
748
|
+
|
|
749
|
+
### Example: Generate Training Data
|
|
750
|
+
|
|
751
|
+
```bash
|
|
752
|
+
# Generate large training dataset
|
|
753
|
+
chuk-puzzles-export \
|
|
754
|
+
-g sudoku kenken kakuro binary futoshiki \
|
|
755
|
+
-n 1000 \
|
|
756
|
+
-d easy medium hard \
|
|
757
|
+
-s 0 \
|
|
758
|
+
-o training_data.jsonl
|
|
759
|
+
|
|
760
|
+
# Generate evaluation set (different seed range)
|
|
761
|
+
chuk-puzzles-export \
|
|
762
|
+
-g sudoku kenken kakuro binary futoshiki \
|
|
763
|
+
-n 100 \
|
|
764
|
+
-d easy medium hard \
|
|
765
|
+
-s 100000 \
|
|
766
|
+
-o eval_data.jsonl
|
|
767
|
+
```
|
|
768
|
+
|
|
769
|
+
### Dataset Statistics
|
|
770
|
+
|
|
771
|
+
With default settings (`-n 100` per game/difficulty):
|
|
772
|
+
|
|
773
|
+
| Configuration | Problems Generated |
|
|
774
|
+
|--------------|-------------------|
|
|
775
|
+
| All games, all difficulties | 24 games × 3 difficulties × 100 = 7,200 |
|
|
776
|
+
| Single game, all difficulties | 1 × 3 × 100 = 300 |
|
|
777
|
+
| All games, single difficulty | 24 × 1 × 100 = 2,400 |
|
|
778
|
+
|
|
779
|
+
### Integration with chuk-gym-core
|
|
780
|
+
|
|
781
|
+
The export system uses [chuk-gym-core](https://pypi.org/project/chuk-gym-core/) for consistent output format, compatible with:
|
|
782
|
+
|
|
783
|
+
- **chuk-math-gym** - Mathematical reasoning datasets
|
|
784
|
+
- **Teacher-forcing training** - Step-by-step trace supervision
|
|
785
|
+
- **Evaluation pipelines** - Standardized problem/solution schema
|
|
786
|
+
|
|
787
|
+
## Universal Game Commands
|
|
788
|
+
|
|
789
|
+
All games support these commands:
|
|
790
|
+
|
|
791
|
+
### Starting and Managing Games
|
|
792
|
+
- `<number> [difficulty]` - Select game by number (e.g., `1 medium`)
|
|
793
|
+
- `<name> [difficulty]` - Select game by name (e.g., `sudoku hard`)
|
|
794
|
+
- `show` - Display the current grid
|
|
795
|
+
- `mode <normal|agent|compact>` - Set output mode
|
|
796
|
+
- `help` - Show game-specific commands and rules
|
|
797
|
+
- `menu` - Return to main menu
|
|
798
|
+
- `quit` - Exit the server
|
|
799
|
+
|
|
800
|
+
### Playing Games
|
|
801
|
+
- `place <row> <col> <value>` - Place a number/value on the grid
|
|
802
|
+
- Example: `place 1 5 7` (places 7 at row 1, column 5)
|
|
803
|
+
- `clear <row> <col>` - Clear a cell you've filled
|
|
804
|
+
- `hint` - Get a hint for the next move
|
|
805
|
+
- `check` - Check your progress
|
|
806
|
+
- `solve` - Show the solution (ends current game)
|
|
807
|
+
|
|
808
|
+
### Special Commands (Game-Specific)
|
|
809
|
+
- **Logic Grid**: `connect` and `exclude` commands for associations
|
|
810
|
+
- See in-game `help` for game-specific commands
|
|
811
|
+
|
|
812
|
+
## Example Gameplay Sessions
|
|
813
|
+
|
|
814
|
+
### Sudoku
|
|
815
|
+
|
|
816
|
+
```
|
|
817
|
+
> sudoku medium
|
|
818
|
+
|
|
819
|
+
==================================================
|
|
820
|
+
SUDOKU - MEDIUM MODE
|
|
821
|
+
==================================================
|
|
822
|
+
Fill the grid so that every row, column, and 3x3 box
|
|
823
|
+
contains the digits 1-9 without repetition.
|
|
824
|
+
|
|
825
|
+
Type 'help' for commands or 'hint' for a clue.
|
|
826
|
+
==================================================
|
|
827
|
+
|
|
828
|
+
| 1 2 3 | 4 5 6 | 7 8 9 |
|
|
829
|
+
-------------------------
|
|
830
|
+
1 | . . 3 | . 2 . | 6 . . |
|
|
831
|
+
2 | 9 . . | 3 . 5 | . . 1 |
|
|
832
|
+
3 | . . 1 | 8 . 6 | 4 . . |
|
|
833
|
+
-------------------------
|
|
834
|
+
4 | . . 8 | 1 . 2 | 9 . . |
|
|
835
|
+
5 | 7 . . | . . . | . . 8 |
|
|
836
|
+
6 | . . 6 | 7 . 8 | 2 . . |
|
|
837
|
+
-------------------------
|
|
838
|
+
7 | . . 2 | 6 . 9 | 5 . . |
|
|
839
|
+
8 | 8 . . | 2 . 3 | . . 9 |
|
|
840
|
+
9 | . . 5 | . 1 . | 3 . . |
|
|
841
|
+
-------------------------
|
|
842
|
+
Moves made: 0
|
|
843
|
+
==================================================
|
|
844
|
+
|
|
845
|
+
> hint
|
|
846
|
+
Hint: Try placing 4 at row 1, column 1
|
|
847
|
+
|
|
848
|
+
> place 1 1 4
|
|
849
|
+
Number placed successfully!
|
|
850
|
+
|
|
851
|
+
> check
|
|
852
|
+
Puzzle not yet complete. Keep going!
|
|
853
|
+
Moves made: 1
|
|
854
|
+
```
|
|
855
|
+
|
|
856
|
+
### KenKen
|
|
857
|
+
|
|
858
|
+
```
|
|
859
|
+
> kenken easy
|
|
860
|
+
|
|
861
|
+
==================================================
|
|
862
|
+
KENKEN - EASY MODE
|
|
863
|
+
==================================================
|
|
864
|
+
KENKEN RULES:
|
|
865
|
+
- Fill 4x4 grid with 1-4
|
|
866
|
+
- No repeats in rows or columns
|
|
867
|
+
- Satisfy cage arithmetic constraints
|
|
868
|
+
- Operations: + - * /
|
|
869
|
+
==================================================
|
|
870
|
+
|
|
871
|
+
| 1 | 2 | 3 | 4 |
|
|
872
|
+
+----+----+----+----+
|
|
873
|
+
1 | .8+| . | .3 | .2 |
|
|
874
|
+
+----+----+----+----+
|
|
875
|
+
2 | . | .6+| . | .3-|
|
|
876
|
+
+----+----+----+----+
|
|
877
|
+
3 | .2 | .6+| .8+| . |
|
|
878
|
+
+----+----+----+----+
|
|
879
|
+
4 | . | . | . | . |
|
|
880
|
+
+----+----+----+----+
|
|
881
|
+
|
|
882
|
+
Cages:
|
|
883
|
+
8+: (1,1), (1,2), (2,1)
|
|
884
|
+
3: (1,3)
|
|
885
|
+
2: (1,4)
|
|
886
|
+
...
|
|
887
|
+
|
|
888
|
+
> place 1 3 3
|
|
889
|
+
Number placed successfully!
|
|
890
|
+
```
|
|
891
|
+
|
|
892
|
+
## Architecture
|
|
893
|
+
|
|
894
|
+
This server is built on the [chuk-protocol-server](https://github.com/chrishayuk/chuk-protocol-server) framework, which provides:
|
|
895
|
+
|
|
896
|
+
- Multiple transport protocol support (Telnet, TCP, WebSocket, WS-Telnet)
|
|
897
|
+
- Telnet protocol negotiation (IAC, WILL, WONT, DO, DONT)
|
|
898
|
+
- WebSocket handling with ping/pong keepalive
|
|
899
|
+
- Connection management and monitoring
|
|
900
|
+
- Asynchronous I/O with Python asyncio
|
|
901
|
+
|
|
902
|
+
### Game Architecture
|
|
903
|
+
|
|
904
|
+
Each game is a **self-contained module** with all logic co-located:
|
|
905
|
+
|
|
906
|
+
```
|
|
907
|
+
games/
|
|
908
|
+
├── _base/ # Base classes
|
|
909
|
+
│ ├── game.py # PuzzleGame ABC
|
|
910
|
+
│ └── commands.py # GameCommandHandler ABC
|
|
911
|
+
├── sudoku/
|
|
912
|
+
│ ├── __init__.py # Exports SudokuGame
|
|
913
|
+
│ ├── game.py # Game logic
|
|
914
|
+
│ ├── config.py # SudokuConfig
|
|
915
|
+
│ └── commands.py # Command handler
|
|
916
|
+
├── minesweeper/
|
|
917
|
+
│ ├── __init__.py
|
|
918
|
+
│ ├── game.py
|
|
919
|
+
│ └── config.py
|
|
920
|
+
└── ... (24 games total)
|
|
921
|
+
```
|
|
922
|
+
|
|
923
|
+
All games extend the `PuzzleGame` abstract base class with **deterministic seeding**:
|
|
924
|
+
|
|
925
|
+
```python
|
|
926
|
+
from chuk_puzzles_gym.games._base import PuzzleGame
|
|
927
|
+
|
|
928
|
+
class PuzzleGame(ABC):
|
|
929
|
+
def __init__(self, difficulty: str = "easy", seed: int | None = None):
|
|
930
|
+
self.seed = seed if seed is not None else random.randint(0, 2**32 - 1)
|
|
931
|
+
self._rng = random.Random(self.seed) # Deterministic RNG
|
|
932
|
+
# ...
|
|
933
|
+
|
|
934
|
+
@property
|
|
935
|
+
@abstractmethod
|
|
936
|
+
def name(self) -> str: ...
|
|
937
|
+
|
|
938
|
+
@property
|
|
939
|
+
@abstractmethod
|
|
940
|
+
def constraint_types(self) -> list[str]: ...
|
|
941
|
+
|
|
942
|
+
@property
|
|
943
|
+
@abstractmethod
|
|
944
|
+
def business_analogies(self) -> list[str]: ...
|
|
945
|
+
|
|
946
|
+
@abstractmethod
|
|
947
|
+
async def generate_puzzle(self) -> None: ...
|
|
948
|
+
|
|
949
|
+
@abstractmethod
|
|
950
|
+
async def validate_move(self, *args) -> MoveResult: ...
|
|
951
|
+
|
|
952
|
+
@abstractmethod
|
|
953
|
+
def is_complete(self) -> bool: ...
|
|
954
|
+
|
|
955
|
+
@abstractmethod
|
|
956
|
+
def render_grid(self) -> str: ...
|
|
957
|
+
```
|
|
958
|
+
|
|
959
|
+
### Handler Architecture
|
|
960
|
+
|
|
961
|
+
The `ArcadeHandler` class manages:
|
|
962
|
+
- Menu-driven game selection
|
|
963
|
+
- Command parsing and routing (delegating to game-specific handlers)
|
|
964
|
+
- Grid display with proper formatting
|
|
965
|
+
- Game state management per connection
|
|
966
|
+
- Multi-game support
|
|
967
|
+
|
|
968
|
+
## Development
|
|
969
|
+
|
|
970
|
+
### Setup Development Environment
|
|
971
|
+
|
|
972
|
+
```bash
|
|
973
|
+
# Clone the repository
|
|
974
|
+
git clone https://github.com/chrishayuk/chuk-puzzles-gym.git
|
|
975
|
+
cd chuk-puzzles-gym
|
|
976
|
+
|
|
977
|
+
# Install development dependencies (with UV)
|
|
978
|
+
make dev-install
|
|
979
|
+
|
|
980
|
+
# Or with pip
|
|
981
|
+
pip install -e ".[dev]"
|
|
982
|
+
```
|
|
983
|
+
|
|
984
|
+
### Testing
|
|
985
|
+
|
|
986
|
+
The project has comprehensive test coverage (94%, 1067 tests):
|
|
987
|
+
|
|
988
|
+
```bash
|
|
989
|
+
# Run all tests
|
|
990
|
+
make test
|
|
991
|
+
|
|
992
|
+
# Run tests with coverage report
|
|
993
|
+
make test-cov
|
|
994
|
+
|
|
995
|
+
# Run tests in watch mode
|
|
996
|
+
make test-watch
|
|
997
|
+
|
|
998
|
+
# View coverage report in browser
|
|
999
|
+
make serve-coverage
|
|
1000
|
+
```
|
|
1001
|
+
|
|
1002
|
+
### Coverage by Module
|
|
1003
|
+
|
|
1004
|
+
```
|
|
1005
|
+
src/chuk_puzzles_gym/games/_base/ 86% # Base classes (abstract defaults)
|
|
1006
|
+
src/chuk_puzzles_gym/games/sudoku/ 92% # Sudoku module
|
|
1007
|
+
src/chuk_puzzles_gym/games/kenken/ 90% # KenKen module
|
|
1008
|
+
src/chuk_puzzles_gym/games/minesweeper/ 96% # Minesweeper module
|
|
1009
|
+
src/chuk_puzzles_gym/games/sokoban/ 83% # Sokoban (complex pathfinding)
|
|
1010
|
+
src/chuk_puzzles_gym/games/.../ 90%+ # All other games
|
|
1011
|
+
src/chuk_puzzles_gym/gym_env.py 90% # Gymnasium environment
|
|
1012
|
+
src/chuk_puzzles_gym/models/ 90%+ # Pydantic models
|
|
1013
|
+
------------------------------------------------------
|
|
1014
|
+
TOTAL 94% 🎯
|
|
1015
|
+
```
|
|
1016
|
+
|
|
1017
|
+
**Most modules meet the 90%+ coverage threshold.** The remaining gaps are in abstract base class defaults and complex pathfinding algorithms.
|
|
1018
|
+
|
|
1019
|
+
### Code Quality
|
|
1020
|
+
|
|
1021
|
+
The project follows modern Python best practices with a **9.8/10 compliance score**:
|
|
1022
|
+
|
|
1023
|
+
#### Tooling
|
|
1024
|
+
- **Ruff**: Fast linter and formatter (replaces black + flake8)
|
|
1025
|
+
- **MyPy**: Static type checking
|
|
1026
|
+
- **Pytest**: Testing framework with async support
|
|
1027
|
+
- **Bandit**: Security vulnerability scanning
|
|
1028
|
+
|
|
1029
|
+
#### Code Standards
|
|
1030
|
+
- ✅ **Pydantic v2 Native** (10/10) - All models use `ConfigDict`, zero deprecation warnings
|
|
1031
|
+
- ✅ **Async Native** (9.5/10) - All I/O operations use async/await properly
|
|
1032
|
+
- ✅ **Type-Safe** (10/10) - No `dict["key"]` patterns, only typed Pydantic models
|
|
1033
|
+
- ✅ **No Magic Strings** (10/10) - All constants use enums or typed constants
|
|
1034
|
+
- ✅ **Test Coverage** (9.5/10) - 94% overall, most files ≥90%
|
|
1035
|
+
|
|
1036
|
+
#### Quality Metrics
|
|
1037
|
+
- **1067 tests** - All passing ✅
|
|
1038
|
+
- **94% coverage** - Exceeds 90% threshold ✅
|
|
1039
|
+
- **Zero linting errors** - Clean codebase ✅
|
|
1040
|
+
- **Full type safety** - MyPy passes ✅
|
|
1041
|
+
- **Deterministic seeding** - Reproducible puzzles ✅
|
|
1042
|
+
|
|
1043
|
+
```bash
|
|
1044
|
+
# Run all checks (lint + typecheck + test + security)
|
|
1045
|
+
make check
|
|
1046
|
+
|
|
1047
|
+
# Run linter
|
|
1048
|
+
make lint
|
|
1049
|
+
|
|
1050
|
+
# Format code
|
|
1051
|
+
make format
|
|
1052
|
+
|
|
1053
|
+
# Type checking
|
|
1054
|
+
make typecheck
|
|
1055
|
+
|
|
1056
|
+
# Security scanning
|
|
1057
|
+
make security
|
|
1058
|
+
```
|
|
1059
|
+
|
|
1060
|
+
### Running Example Clients
|
|
1061
|
+
|
|
1062
|
+
```bash
|
|
1063
|
+
# Telnet client examples
|
|
1064
|
+
make example-telnet # Browse all games
|
|
1065
|
+
make example-telnet-sudoku # Sudoku demo
|
|
1066
|
+
make example-telnet-kenken # KenKen demo
|
|
1067
|
+
make example-telnet-interactive # Interactive mode
|
|
1068
|
+
|
|
1069
|
+
# WebSocket client examples
|
|
1070
|
+
make example-ws # Tour all games
|
|
1071
|
+
make example-ws-sudoku # Sudoku demo
|
|
1072
|
+
make example-ws-binary # Binary puzzle demo
|
|
1073
|
+
make example-ws-solve # Solve with hints
|
|
1074
|
+
make example-ws-interactive # Interactive mode
|
|
1075
|
+
```
|
|
1076
|
+
|
|
1077
|
+
### CI/CD
|
|
1078
|
+
|
|
1079
|
+
The project includes GitHub Actions workflows:
|
|
1080
|
+
|
|
1081
|
+
- **test.yml**: Runs tests on Ubuntu, Windows, macOS with Python 3.11, 3.12, 3.13
|
|
1082
|
+
- **publish.yml**: Publishes to PyPI on release
|
|
1083
|
+
- **release.yml**: Creates GitHub releases
|
|
1084
|
+
- **fly-deploy.yml**: Auto-deploys to Fly.io on main branch push
|
|
1085
|
+
|
|
1086
|
+
Coverage threshold is set to 90% - builds fail if coverage drops below this.
|
|
1087
|
+
|
|
1088
|
+
## Deployment to Fly.io
|
|
1089
|
+
|
|
1090
|
+
### Using Make (Recommended)
|
|
1091
|
+
|
|
1092
|
+
```bash
|
|
1093
|
+
# Deploy to Fly.io
|
|
1094
|
+
make fly-deploy
|
|
1095
|
+
|
|
1096
|
+
# Check status
|
|
1097
|
+
make fly-status
|
|
1098
|
+
|
|
1099
|
+
# View logs
|
|
1100
|
+
make fly-logs
|
|
1101
|
+
```
|
|
1102
|
+
|
|
1103
|
+
### Manual Deployment
|
|
1104
|
+
|
|
1105
|
+
1. Install the Fly CLI: https://fly.io/docs/hands-on/install-flyctl/
|
|
1106
|
+
|
|
1107
|
+
2. Login to Fly:
|
|
1108
|
+
```bash
|
|
1109
|
+
fly auth login
|
|
1110
|
+
```
|
|
1111
|
+
|
|
1112
|
+
3. Create and deploy the app:
|
|
1113
|
+
```bash
|
|
1114
|
+
# First deployment (creates the app)
|
|
1115
|
+
fly launch --config fly.toml --now
|
|
1116
|
+
|
|
1117
|
+
# Subsequent deployments
|
|
1118
|
+
fly deploy
|
|
1119
|
+
```
|
|
1120
|
+
|
|
1121
|
+
4. **Important:** Allocate a public IPv6 address for TCP services:
|
|
1122
|
+
```bash
|
|
1123
|
+
# Allocate IPv6 (free)
|
|
1124
|
+
fly ips allocate-v6
|
|
1125
|
+
|
|
1126
|
+
# Verify IP is allocated
|
|
1127
|
+
fly ips list
|
|
1128
|
+
```
|
|
1129
|
+
|
|
1130
|
+
5. Check the status:
|
|
1131
|
+
```bash
|
|
1132
|
+
fly status
|
|
1133
|
+
```
|
|
1134
|
+
|
|
1135
|
+
6. View logs:
|
|
1136
|
+
```bash
|
|
1137
|
+
fly logs
|
|
1138
|
+
```
|
|
1139
|
+
|
|
1140
|
+
7. Connect to your Puzzle Arcade server:
|
|
1141
|
+
```bash
|
|
1142
|
+
# Get your app's IPv6 address
|
|
1143
|
+
fly ips list
|
|
1144
|
+
|
|
1145
|
+
# Connect via telnet using IPv6 (free tier)
|
|
1146
|
+
telnet <your-ipv6> 8023
|
|
1147
|
+
|
|
1148
|
+
# WebSocket connections work with hostname
|
|
1149
|
+
# ws://<your-app>.fly.dev:8025/ws
|
|
1150
|
+
```
|
|
1151
|
+
|
|
1152
|
+
**Note:** TCP services (Telnet, raw TCP) require a public IP address on Fly.io. We use IPv6 which is free. IPv4 costs $2/month and is not needed for most users.
|
|
1153
|
+
|
|
1154
|
+
## Project Structure
|
|
1155
|
+
|
|
1156
|
+
```
|
|
1157
|
+
chuk-puzzles-gym/
|
|
1158
|
+
├── src/
|
|
1159
|
+
│ └── chuk_puzzles_gym/
|
|
1160
|
+
│ ├── __init__.py # Package initialization
|
|
1161
|
+
│ ├── server.py # Main arcade handler
|
|
1162
|
+
│ ├── constants.py # Game constants
|
|
1163
|
+
│ ├── models/ # Pydantic models
|
|
1164
|
+
│ │ ├── __init__.py
|
|
1165
|
+
│ │ ├── base.py # GridPosition, MoveResult
|
|
1166
|
+
│ │ ├── config.py # Base GameConfig
|
|
1167
|
+
│ │ ├── enums.py # DifficultyLevel, GameCommand, etc.
|
|
1168
|
+
│ │ └── games.py # Game-specific models (Cage, Task, etc.)
|
|
1169
|
+
│ └── games/ # Self-contained game modules
|
|
1170
|
+
│ ├── __init__.py # AVAILABLE_GAMES registry
|
|
1171
|
+
│ ├── _base/ # Base classes
|
|
1172
|
+
│ │ ├── __init__.py
|
|
1173
|
+
│ │ ├── game.py # PuzzleGame ABC
|
|
1174
|
+
│ │ └── commands.py # GameCommandHandler ABC
|
|
1175
|
+
│ ├── sudoku/ # Example game module
|
|
1176
|
+
│ │ ├── __init__.py # Exports SudokuGame
|
|
1177
|
+
│ │ ├── game.py # SudokuGame class
|
|
1178
|
+
│ │ ├── config.py # SudokuConfig
|
|
1179
|
+
│ │ └── commands.py # SudokuCommandHandler
|
|
1180
|
+
│ ├── minesweeper/ # Each game is self-contained
|
|
1181
|
+
│ │ ├── __init__.py
|
|
1182
|
+
│ │ ├── game.py
|
|
1183
|
+
│ │ └── config.py
|
|
1184
|
+
│ └── ... (24 games total)
|
|
1185
|
+
├── tests/
|
|
1186
|
+
│ ├── test_puzzle_game.py # Base class tests
|
|
1187
|
+
│ ├── test_deterministic_seeding.py # Seeding tests
|
|
1188
|
+
│ ├── test_sudoku_game.py # Sudoku tests
|
|
1189
|
+
│ ├── test_minesweeper.py # Minesweeper tests
|
|
1190
|
+
│ └── ... (tests for all 24 games)
|
|
1191
|
+
├── examples/
|
|
1192
|
+
│ ├── simple_client.py # Telnet client example
|
|
1193
|
+
│ ├── websocket_client.py # WebSocket client example
|
|
1194
|
+
│ └── README.md # Example usage guide
|
|
1195
|
+
├── .github/workflows/ # CI/CD workflows
|
|
1196
|
+
├── pyproject.toml # Modern Python project config
|
|
1197
|
+
├── config.yaml # Multi-transport server configuration
|
|
1198
|
+
├── Dockerfile # Docker build instructions
|
|
1199
|
+
├── fly.toml # Fly.io deployment config
|
|
1200
|
+
├── Makefile # Development commands (50+ targets)
|
|
1201
|
+
└── README.md # This file
|
|
1202
|
+
```
|
|
1203
|
+
|
|
1204
|
+
### Key Statistics
|
|
1205
|
+
|
|
1206
|
+
- **Test Coverage**: 94% overall (1067 tests, all passing)
|
|
1207
|
+
- **Code Quality Score**: 9.8/10 (near perfect compliance)
|
|
1208
|
+
- **Games Implemented**: 24 complete puzzle types
|
|
1209
|
+
- 7 Classic Logic Puzzles
|
|
1210
|
+
- 7 Advanced CP-SAT Puzzles
|
|
1211
|
+
- 5 Specialized Constraint Puzzles
|
|
1212
|
+
- 2 Optimization Challenges
|
|
1213
|
+
- 3 Advanced Reasoning Puzzles
|
|
1214
|
+
- **Supported Transports**: 4 (Telnet, TCP, WebSocket, WS-Telnet)
|
|
1215
|
+
- **Agent-Friendly Mode**: Structured output for AI tools
|
|
1216
|
+
- **Gymnasium API**: RL-compatible environment for all games
|
|
1217
|
+
- **Deterministic Seeding**: Reproducible puzzles for testing
|
|
1218
|
+
|
|
1219
|
+
## Use Cases
|
|
1220
|
+
|
|
1221
|
+
### 1. LLM Reasoning Demonstration
|
|
1222
|
+
|
|
1223
|
+
Perfect for demonstrating LLM reasoning capabilities:
|
|
1224
|
+
|
|
1225
|
+
1. **LLM connects** via telnet: `telnet localhost 8023`
|
|
1226
|
+
2. **Selects a puzzle**: `sudoku hard`
|
|
1227
|
+
3. **Receives puzzle** in clean ASCII format
|
|
1228
|
+
4. **Analyzes constraints** and generates solution
|
|
1229
|
+
5. **Submits moves**: `place 1 5 7`
|
|
1230
|
+
6. **Server validates** each move
|
|
1231
|
+
7. **Puzzle solved!** Proof of reasoning capability
|
|
1232
|
+
|
|
1233
|
+
### 2. Constraint Solver Testing
|
|
1234
|
+
|
|
1235
|
+
Test the generality of constraint solvers (like MCP solvers):
|
|
1236
|
+
|
|
1237
|
+
- **Different puzzle types** → Same underlying solver
|
|
1238
|
+
- **Clean ASCII output** → Easy for solver parsing
|
|
1239
|
+
- **Simple interface** → Focus on solving, not UI
|
|
1240
|
+
- **Pure validation** → Server validates, doesn't solve
|
|
1241
|
+
|
|
1242
|
+
### 3. Educational Tool
|
|
1243
|
+
|
|
1244
|
+
Learn about constraint satisfaction problems:
|
|
1245
|
+
|
|
1246
|
+
- **24 different puzzle types** demonstrating various constraint types:
|
|
1247
|
+
- AllDifferent constraints (Sudoku, KenKen, Futoshiki)
|
|
1248
|
+
- Arithmetic constraints (KenKen, Kakuro, Killer Sudoku)
|
|
1249
|
+
- Boolean/SAT constraints (Lights Out, Binary Puzzle)
|
|
1250
|
+
- Loop/Edge constraints (Slitherlink)
|
|
1251
|
+
- Deduction constraints (Mastermind, Logic Grid, Einstein's Puzzle)
|
|
1252
|
+
- Optimization objectives (Knapsack, Task Scheduler)
|
|
1253
|
+
- Temporal reasoning (Task Scheduler)
|
|
1254
|
+
- Connectivity constraints (Nurikabe, Slitherlink)
|
|
1255
|
+
- Probabilistic reasoning (Minesweeper)
|
|
1256
|
+
- And more!
|
|
1257
|
+
- **Well-documented code** showing puzzle generation algorithms
|
|
1258
|
+
- **Comprehensive tests** (1067 tests, 94% coverage) demonstrating validation
|
|
1259
|
+
- **Deterministic seeding** - Reproduce any puzzle for debugging/testing
|
|
1260
|
+
- **Production-ready** - 9.8/10 code quality score
|
|
1261
|
+
- **Type-safe** - Full Pydantic v2 and MyPy compliance
|
|
1262
|
+
- **Modular architecture** - Each game is self-contained in its own folder
|
|
1263
|
+
|
|
1264
|
+
## Adding New Puzzle Games
|
|
1265
|
+
|
|
1266
|
+
1. Create a new game folder in `src/chuk_puzzles_gym/games/`:
|
|
1267
|
+
|
|
1268
|
+
```
|
|
1269
|
+
games/
|
|
1270
|
+
└── my_puzzle/
|
|
1271
|
+
├── __init__.py # Export the game class
|
|
1272
|
+
├── game.py # Game logic
|
|
1273
|
+
└── config.py # Game configuration
|
|
1274
|
+
```
|
|
1275
|
+
|
|
1276
|
+
2. Create the config in `config.py`:
|
|
1277
|
+
|
|
1278
|
+
```python
|
|
1279
|
+
from pydantic import Field
|
|
1280
|
+
from ...models import DifficultyLevel, GameConfig
|
|
1281
|
+
|
|
1282
|
+
class MyPuzzleConfig(GameConfig):
|
|
1283
|
+
grid_size: int = Field(default=5, description="Grid size")
|
|
1284
|
+
|
|
1285
|
+
@classmethod
|
|
1286
|
+
def from_difficulty(cls, difficulty: DifficultyLevel) -> "MyPuzzleConfig":
|
|
1287
|
+
sizes = {DifficultyLevel.EASY: 5, DifficultyLevel.MEDIUM: 7, DifficultyLevel.HARD: 9}
|
|
1288
|
+
return cls(difficulty=difficulty, grid_size=sizes[difficulty])
|
|
1289
|
+
```
|
|
1290
|
+
|
|
1291
|
+
3. Create the game in `game.py`:
|
|
1292
|
+
|
|
1293
|
+
```python
|
|
1294
|
+
from .._base import PuzzleGame
|
|
1295
|
+
from ...models import MoveResult
|
|
1296
|
+
from .config import MyPuzzleConfig
|
|
1297
|
+
|
|
1298
|
+
class MyPuzzleGame(PuzzleGame):
|
|
1299
|
+
def __init__(self, difficulty: str = "easy", seed: int | None = None):
|
|
1300
|
+
super().__init__(difficulty, seed)
|
|
1301
|
+
self.config = MyPuzzleConfig.from_difficulty(self.difficulty)
|
|
1302
|
+
# Use self._rng for all randomness (deterministic seeding)
|
|
1303
|
+
|
|
1304
|
+
@property
|
|
1305
|
+
def name(self) -> str:
|
|
1306
|
+
return "My Puzzle"
|
|
1307
|
+
|
|
1308
|
+
@property
|
|
1309
|
+
def constraint_types(self) -> list[str]:
|
|
1310
|
+
return ["all_different", "sum_constraint"]
|
|
1311
|
+
|
|
1312
|
+
@property
|
|
1313
|
+
def business_analogies(self) -> list[str]:
|
|
1314
|
+
return ["resource_allocation", "scheduling"]
|
|
1315
|
+
|
|
1316
|
+
async def generate_puzzle(self) -> None:
|
|
1317
|
+
# Use self._rng.randint(), self._rng.choice(), etc.
|
|
1318
|
+
self.game_started = True
|
|
1319
|
+
|
|
1320
|
+
async def validate_move(self, row: int, col: int, num: int) -> MoveResult:
|
|
1321
|
+
# Validate and apply move
|
|
1322
|
+
return MoveResult(success=True, message="Number placed!")
|
|
1323
|
+
|
|
1324
|
+
def is_complete(self) -> bool:
|
|
1325
|
+
return all(cell != 0 for row in self.grid for cell in row)
|
|
1326
|
+
|
|
1327
|
+
def render_grid(self) -> str:
|
|
1328
|
+
return " | 1 | 2 | 3 |\n" + ...
|
|
1329
|
+
|
|
1330
|
+
def get_stats(self) -> str:
|
|
1331
|
+
return f"Moves: {self.moves_made} | Seed: {self.seed}"
|
|
1332
|
+
```
|
|
1333
|
+
|
|
1334
|
+
4. Export in `__init__.py`:
|
|
1335
|
+
|
|
1336
|
+
```python
|
|
1337
|
+
from .game import MyPuzzleGame
|
|
1338
|
+
__all__ = ["MyPuzzleGame"]
|
|
1339
|
+
```
|
|
1340
|
+
|
|
1341
|
+
5. Register in `src/chuk_puzzles_gym/games/__init__.py`:
|
|
1342
|
+
|
|
1343
|
+
```python
|
|
1344
|
+
from .my_puzzle import MyPuzzleGame
|
|
1345
|
+
|
|
1346
|
+
AVAILABLE_GAMES = {
|
|
1347
|
+
# ... other games
|
|
1348
|
+
"mypuzzle": MyPuzzleGame,
|
|
1349
|
+
}
|
|
1350
|
+
```
|
|
1351
|
+
|
|
1352
|
+
6. Add tests in `tests/test_my_puzzle_game.py`:
|
|
1353
|
+
|
|
1354
|
+
```python
|
|
1355
|
+
from chuk_puzzles_gym.games.my_puzzle import MyPuzzleGame
|
|
1356
|
+
|
|
1357
|
+
class TestMyPuzzleGame:
|
|
1358
|
+
async def test_deterministic_seeding(self):
|
|
1359
|
+
game1 = MyPuzzleGame("easy", seed=12345)
|
|
1360
|
+
game2 = MyPuzzleGame("easy", seed=12345)
|
|
1361
|
+
await game1.generate_puzzle()
|
|
1362
|
+
await game2.generate_puzzle()
|
|
1363
|
+
assert game1.render_grid() == game2.render_grid()
|
|
1364
|
+
|
|
1365
|
+
def test_seed_in_stats(self):
|
|
1366
|
+
game = MyPuzzleGame("easy", seed=42)
|
|
1367
|
+
assert "Seed: 42" in game.get_stats()
|
|
1368
|
+
```
|
|
1369
|
+
|
|
1370
|
+
7. Run tests and verify:
|
|
1371
|
+
|
|
1372
|
+
```bash
|
|
1373
|
+
make test-cov
|
|
1374
|
+
make check
|
|
1375
|
+
```
|
|
1376
|
+
|
|
1377
|
+
## Contributing
|
|
1378
|
+
|
|
1379
|
+
Contributions are welcome! Please follow these guidelines:
|
|
1380
|
+
|
|
1381
|
+
1. Fork the repository
|
|
1382
|
+
2. Create a feature branch (`git checkout -b feature/amazing-puzzle`)
|
|
1383
|
+
3. Make your changes
|
|
1384
|
+
4. Run tests and checks (`make check`)
|
|
1385
|
+
5. Ensure coverage stays above 90% (`make test-cov`)
|
|
1386
|
+
6. Commit your changes (`git commit -m 'Add amazing puzzle'`)
|
|
1387
|
+
7. Push to the branch (`git push origin feature/amazing-puzzle`)
|
|
1388
|
+
8. Open a Pull Request
|
|
1389
|
+
|
|
1390
|
+
### Development Guidelines
|
|
1391
|
+
|
|
1392
|
+
- Follow PEP 8 style guide (enforced by ruff)
|
|
1393
|
+
- Add type hints to all functions
|
|
1394
|
+
- Write tests for new features (>90% coverage)
|
|
1395
|
+
- Update documentation as needed
|
|
1396
|
+
- Ensure all grid headers align properly with rows
|
|
1397
|
+
|
|
1398
|
+
## Troubleshooting
|
|
1399
|
+
|
|
1400
|
+
### Server won't start
|
|
1401
|
+
- Ensure chuk-protocol-server is installed: `uv pip install chuk-protocol-server`
|
|
1402
|
+
- Check ports aren't already in use: `lsof -i :8023,8024,8025,8026`
|
|
1403
|
+
- Verify Python version is 3.11+: `python --version`
|
|
1404
|
+
|
|
1405
|
+
### Tests failing
|
|
1406
|
+
- Install dev dependencies: `make dev-install`
|
|
1407
|
+
- Clear cache: `make clean`
|
|
1408
|
+
- Check Python version compatibility
|
|
1409
|
+
|
|
1410
|
+
### Coverage too low
|
|
1411
|
+
- Run coverage report: `make test-cov`
|
|
1412
|
+
- View HTML report: `make serve-coverage`
|
|
1413
|
+
- Add tests for uncovered code
|
|
1414
|
+
|
|
1415
|
+
### Grid alignment issues
|
|
1416
|
+
- All grid headers must align with row pipes
|
|
1417
|
+
- Use the format `" |"` for headers to match row format `"N |"`
|
|
1418
|
+
- Test visually: `make example-telnet-kenken`
|
|
1419
|
+
|
|
1420
|
+
## Roadmap
|
|
1421
|
+
|
|
1422
|
+
See [ROADMAP.md](ROADMAP.md) for the full development roadmap.
|
|
1423
|
+
|
|
1424
|
+
### Highlights
|
|
1425
|
+
|
|
1426
|
+
**Benchmarking & Metrics**
|
|
1427
|
+
- Puzzle complexity metrics (constraint count, variable count, branching factor)
|
|
1428
|
+
- Episode model for tracking game sessions
|
|
1429
|
+
- Trace logging for offline analysis
|
|
1430
|
+
|
|
1431
|
+
**Agent Evaluation Tools**
|
|
1432
|
+
- Batch evaluation harness CLI
|
|
1433
|
+
- Solver vs Model comparison mode
|
|
1434
|
+
- JSON protocol for structured agent communication
|
|
1435
|
+
|
|
1436
|
+
**Learning & Curriculum**
|
|
1437
|
+
- Constraint concept progression graph
|
|
1438
|
+
- Tagged puzzle sets for educators
|
|
1439
|
+
- Difficulty scaling based on constraint complexity
|
|
1440
|
+
|
|
1441
|
+
**Ecosystem Integrations**
|
|
1442
|
+
- MCP native mode for agent frameworks
|
|
1443
|
+
- Python client library
|
|
1444
|
+
- REST/WebSocket API documentation
|
|
1445
|
+
|
|
1446
|
+
**UX & Community**
|
|
1447
|
+
- Interactive web viewer with replay mode
|
|
1448
|
+
- Public benchmark packs (versioned, citable)
|
|
1449
|
+
- Community leaderboards
|
|
1450
|
+
|
|
1451
|
+
## License
|
|
1452
|
+
|
|
1453
|
+
MIT License - see the main chuk-protocol-server project for details.
|
|
1454
|
+
|
|
1455
|
+
## Credits
|
|
1456
|
+
|
|
1457
|
+
- Built using the [chuk-protocol-server](https://github.com/chrishayuk/chuk-protocol-server) framework
|
|
1458
|
+
- Puzzle generation algorithms based on backtracking and constraint propagation
|
|
1459
|
+
- Uses modern Python tooling: UV, Ruff, MyPy, Pytest
|
|
1460
|
+
|
|
1461
|
+
## Links
|
|
1462
|
+
|
|
1463
|
+
- [chuk-protocol-server](https://github.com/chrishayuk/chuk-protocol-server) - Multi-transport server framework
|
|
1464
|
+
- [sudoku-telnet-server](https://github.com/chrishayuk/sudoku-telnet-server) - Original single-game implementation
|
|
1465
|
+
- [UV](https://github.com/astral-sh/uv) - Fast Python package manager
|
|
1466
|
+
- [Ruff](https://github.com/astral-sh/ruff) - Fast Python linter and formatter
|
|
1467
|
+
- [Fly.io](https://fly.io) - Cloud deployment platform
|
|
1468
|
+
|
|
1469
|
+
---
|
|
1470
|
+
|
|
1471
|
+
**Ready to test your solver?** Connect now and start solving! 🎮
|