chuk-puzzles-gym 0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. chuk_puzzles_gym/__init__.py +19 -0
  2. chuk_puzzles_gym/constants.py +9 -0
  3. chuk_puzzles_gym/eval.py +763 -0
  4. chuk_puzzles_gym/export/__init__.py +20 -0
  5. chuk_puzzles_gym/export/dataset.py +376 -0
  6. chuk_puzzles_gym/games/__init__.py +94 -0
  7. chuk_puzzles_gym/games/_base/__init__.py +6 -0
  8. chuk_puzzles_gym/games/_base/commands.py +91 -0
  9. chuk_puzzles_gym/games/_base/game.py +337 -0
  10. chuk_puzzles_gym/games/binary/__init__.py +6 -0
  11. chuk_puzzles_gym/games/binary/config.py +23 -0
  12. chuk_puzzles_gym/games/binary/game.py +434 -0
  13. chuk_puzzles_gym/games/bridges/__init__.py +6 -0
  14. chuk_puzzles_gym/games/bridges/config.py +24 -0
  15. chuk_puzzles_gym/games/bridges/game.py +489 -0
  16. chuk_puzzles_gym/games/einstein/__init__.py +6 -0
  17. chuk_puzzles_gym/games/einstein/config.py +23 -0
  18. chuk_puzzles_gym/games/einstein/constants.py +13 -0
  19. chuk_puzzles_gym/games/einstein/game.py +366 -0
  20. chuk_puzzles_gym/games/einstein/models.py +35 -0
  21. chuk_puzzles_gym/games/fillomino/__init__.py +6 -0
  22. chuk_puzzles_gym/games/fillomino/config.py +24 -0
  23. chuk_puzzles_gym/games/fillomino/game.py +516 -0
  24. chuk_puzzles_gym/games/futoshiki/__init__.py +6 -0
  25. chuk_puzzles_gym/games/futoshiki/config.py +23 -0
  26. chuk_puzzles_gym/games/futoshiki/game.py +391 -0
  27. chuk_puzzles_gym/games/hidato/__init__.py +6 -0
  28. chuk_puzzles_gym/games/hidato/config.py +24 -0
  29. chuk_puzzles_gym/games/hidato/game.py +403 -0
  30. chuk_puzzles_gym/games/hitori/__init__.py +6 -0
  31. chuk_puzzles_gym/games/hitori/config.py +23 -0
  32. chuk_puzzles_gym/games/hitori/game.py +451 -0
  33. chuk_puzzles_gym/games/kakuro/__init__.py +6 -0
  34. chuk_puzzles_gym/games/kakuro/config.py +24 -0
  35. chuk_puzzles_gym/games/kakuro/game.py +399 -0
  36. chuk_puzzles_gym/games/kenken/__init__.py +6 -0
  37. chuk_puzzles_gym/games/kenken/config.py +24 -0
  38. chuk_puzzles_gym/games/kenken/enums.py +13 -0
  39. chuk_puzzles_gym/games/kenken/game.py +486 -0
  40. chuk_puzzles_gym/games/kenken/models.py +15 -0
  41. chuk_puzzles_gym/games/killer_sudoku/__init__.py +6 -0
  42. chuk_puzzles_gym/games/killer_sudoku/config.py +23 -0
  43. chuk_puzzles_gym/games/killer_sudoku/game.py +502 -0
  44. chuk_puzzles_gym/games/killer_sudoku/models.py +15 -0
  45. chuk_puzzles_gym/games/knapsack/__init__.py +6 -0
  46. chuk_puzzles_gym/games/knapsack/config.py +24 -0
  47. chuk_puzzles_gym/games/knapsack/enums.py +10 -0
  48. chuk_puzzles_gym/games/knapsack/game.py +340 -0
  49. chuk_puzzles_gym/games/knapsack/models.py +13 -0
  50. chuk_puzzles_gym/games/lights_out/__init__.py +6 -0
  51. chuk_puzzles_gym/games/lights_out/config.py +24 -0
  52. chuk_puzzles_gym/games/lights_out/game.py +249 -0
  53. chuk_puzzles_gym/games/logic_grid/__init__.py +6 -0
  54. chuk_puzzles_gym/games/logic_grid/config.py +24 -0
  55. chuk_puzzles_gym/games/logic_grid/constants.py +12 -0
  56. chuk_puzzles_gym/games/logic_grid/game.py +333 -0
  57. chuk_puzzles_gym/games/logic_grid/models.py +24 -0
  58. chuk_puzzles_gym/games/mastermind/__init__.py +6 -0
  59. chuk_puzzles_gym/games/mastermind/config.py +25 -0
  60. chuk_puzzles_gym/games/mastermind/game.py +297 -0
  61. chuk_puzzles_gym/games/minesweeper/__init__.py +6 -0
  62. chuk_puzzles_gym/games/minesweeper/config.py +24 -0
  63. chuk_puzzles_gym/games/minesweeper/enums.py +12 -0
  64. chuk_puzzles_gym/games/minesweeper/game.py +432 -0
  65. chuk_puzzles_gym/games/nonogram/__init__.py +6 -0
  66. chuk_puzzles_gym/games/nonogram/config.py +23 -0
  67. chuk_puzzles_gym/games/nonogram/game.py +296 -0
  68. chuk_puzzles_gym/games/nurikabe/__init__.py +6 -0
  69. chuk_puzzles_gym/games/nurikabe/config.py +24 -0
  70. chuk_puzzles_gym/games/nurikabe/enums.py +14 -0
  71. chuk_puzzles_gym/games/nurikabe/game.py +586 -0
  72. chuk_puzzles_gym/games/scheduler/__init__.py +6 -0
  73. chuk_puzzles_gym/games/scheduler/config.py +25 -0
  74. chuk_puzzles_gym/games/scheduler/constants.py +15 -0
  75. chuk_puzzles_gym/games/scheduler/enums.py +10 -0
  76. chuk_puzzles_gym/games/scheduler/game.py +431 -0
  77. chuk_puzzles_gym/games/scheduler/models.py +14 -0
  78. chuk_puzzles_gym/games/shikaku/__init__.py +6 -0
  79. chuk_puzzles_gym/games/shikaku/config.py +24 -0
  80. chuk_puzzles_gym/games/shikaku/game.py +419 -0
  81. chuk_puzzles_gym/games/slitherlink/__init__.py +6 -0
  82. chuk_puzzles_gym/games/slitherlink/config.py +23 -0
  83. chuk_puzzles_gym/games/slitherlink/game.py +386 -0
  84. chuk_puzzles_gym/games/sokoban/__init__.py +6 -0
  85. chuk_puzzles_gym/games/sokoban/config.py +24 -0
  86. chuk_puzzles_gym/games/sokoban/game.py +671 -0
  87. chuk_puzzles_gym/games/star_battle/__init__.py +6 -0
  88. chuk_puzzles_gym/games/star_battle/config.py +24 -0
  89. chuk_puzzles_gym/games/star_battle/game.py +390 -0
  90. chuk_puzzles_gym/games/sudoku/__init__.py +7 -0
  91. chuk_puzzles_gym/games/sudoku/commands.py +96 -0
  92. chuk_puzzles_gym/games/sudoku/config.py +22 -0
  93. chuk_puzzles_gym/games/sudoku/game.py +328 -0
  94. chuk_puzzles_gym/games/tents/__init__.py +6 -0
  95. chuk_puzzles_gym/games/tents/config.py +24 -0
  96. chuk_puzzles_gym/games/tents/game.py +416 -0
  97. chuk_puzzles_gym/gym_env.py +465 -0
  98. chuk_puzzles_gym/models/__init__.py +47 -0
  99. chuk_puzzles_gym/models/base.py +30 -0
  100. chuk_puzzles_gym/models/config.py +11 -0
  101. chuk_puzzles_gym/models/enums.py +104 -0
  102. chuk_puzzles_gym/models/evaluation.py +487 -0
  103. chuk_puzzles_gym/models/games.py +12 -0
  104. chuk_puzzles_gym/server.py +1171 -0
  105. chuk_puzzles_gym/trace/__init__.py +10 -0
  106. chuk_puzzles_gym/trace/generator.py +726 -0
  107. chuk_puzzles_gym/utils/__init__.py +4 -0
  108. chuk_puzzles_gym-0.9.dist-info/METADATA +1471 -0
  109. chuk_puzzles_gym-0.9.dist-info/RECORD +112 -0
  110. chuk_puzzles_gym-0.9.dist-info/WHEEL +5 -0
  111. chuk_puzzles_gym-0.9.dist-info/entry_points.txt +4 -0
  112. chuk_puzzles_gym-0.9.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1471 @@
1
+ Metadata-Version: 2.4
2
+ Name: chuk-puzzles-gym
3
+ Version: 0.9
4
+ Summary: Multi-game puzzle gym for LLM training and benchmarking - 24 constraint puzzles with synthetic data generation
5
+ Author: Chris Hay
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/chrishayuk/chuk-puzzles-gym
8
+ Project-URL: Repository, https://github.com/chrishayuk/chuk-puzzles-gym
9
+ Project-URL: Documentation, https://github.com/chrishayuk/chuk-puzzles-gym#readme
10
+ Keywords: puzzle,gym,llm,training,benchmark,sudoku,kenken,constraint-satisfaction,reasoning,mcp
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Python: >=3.11
17
+ Description-Content-Type: text/markdown
18
+ Requires-Dist: chuk-gym-core>=0.1.1
19
+ Requires-Dist: chuk-protocol-server>=0.1.0
20
+ Requires-Dist: pydantic>=2.0.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
23
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
24
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
25
+ Requires-Dist: pytest-watch>=4.2.0; extra == "dev"
26
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
27
+ Requires-Dist: mypy>=1.4.0; extra == "dev"
28
+ Requires-Dist: bandit>=1.7.5; extra == "dev"
29
+ Requires-Dist: websockets>=11.0.0; extra == "dev"
30
+ Requires-Dist: ipython>=8.14.0; extra == "dev"
31
+
32
+ # chuk-puzzles-gym
33
+
34
+ [![PyPI](https://img.shields.io/pypi/v/chuk-puzzles-gym.svg)](https://pypi.org/project/chuk-puzzles-gym/)
35
+ [![Test](https://github.com/chrishayuk/chuk-puzzles-gym/workflows/Test/badge.svg)](https://github.com/chrishayuk/chuk-puzzles-gym/actions)
36
+ [![Coverage](https://img.shields.io/badge/coverage-94%25-brightgreen)](htmlcov/index.html)
37
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
38
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
39
+ [![Pydantic v2](https://img.shields.io/badge/pydantic-v2-purple.svg)](https://docs.pydantic.dev/)
40
+ [![Type Checked](https://img.shields.io/badge/type%20checked-mypy-blue.svg)](http://mypy-lang.org/)
41
+
42
+ A **multi-game puzzle gym** for **LLM training and benchmarking**, hosting 24 different logic puzzle types with synthetic data generation. Built using [chuk-gym-core](https://github.com/chrishayuk/chuk-gym-core) and [chuk-protocol-server](https://github.com/chrishayuk/chuk-protocol-server).
43
+
44
+ **Perfect for:**
45
+ - 🤖 **LLM Agent Testing** - Benchmark reasoning capabilities across constraint types
46
+ - 🎯 **CP-SAT Education** - Learn constraint programming through progressive puzzles
47
+ - 💼 **Business Demos** - Map puzzle patterns to real scheduling, optimization, and allocation problems
48
+ - 🔧 **MCP Tool Integration** - Showcase CHUK + constraint solver workflows
49
+
50
+ Each puzzle demonstrates specific **constraint patterns** (AllDifferent, Optimization, Connectivity, Boolean SAT, etc.) and maps to **business use cases** (scheduling, resource allocation, routing, etc.).
51
+
52
+ ## Try It Now
53
+
54
+ ### Run Locally with uvx
55
+
56
+ No installation required - run directly with [uvx](https://docs.astral.sh/uv/guides/tools/):
57
+
58
+ ```bash
59
+ # Start the puzzle server
60
+ uvx chuk-puzzles-gym
61
+
62
+ # Generate training datasets
63
+ uvx --from chuk-puzzles-gym chuk-puzzles-export -g sudoku -n 100 -o data.jsonl
64
+
65
+ # Benchmark an agent
66
+ uvx --from chuk-puzzles-gym chuk-puzzles-eval -g sudoku -n 10
67
+ ```
68
+
69
+ ### Connect to Live Demo
70
+
71
+ A live demo server is running on Fly.io:
72
+
73
+ ```bash
74
+ # Connect via Telnet (IPv6)
75
+ telnet 2a09:8280:1::b8:79f4:0 8023
76
+
77
+ # WebSocket connections
78
+ ws://chuk-puzzles-gym.fly.dev:8025/ws
79
+ ```
80
+
81
+ Once connected, type `help` to see available games, or `sudoku easy` to start playing!
82
+
83
+ ## Features
84
+
85
+ - **24 Puzzle Games** with three difficulty levels each (easy, medium, hard)
86
+ - **7 Classic Logic Puzzles** - Sudoku, KenKen, Kakuro, Binary, Futoshiki, Nonogram, Logic Grid
87
+ - **7 Advanced CP-SAT Puzzles** - Killer Sudoku, Lights Out, Mastermind, Slitherlink, Bridges, Hitori, Shikaku
88
+ - **5 Specialized Constraint Puzzles** - Hidato, Tents and Trees, Fillomino, Star Battle, Sokoban
89
+ - **2 Optimization Challenges** - Knapsack, Task Scheduler
90
+ - **3 Advanced Reasoning Puzzles** - Nurikabe, Einstein's Puzzle, Minesweeper
91
+ - **Agent-Friendly Mode** - Structured output with clear markers for AI agents and tools
92
+ - Enable with `mode agent` command
93
+ - Machine-parseable grid format with clear start/end markers
94
+ - Compact output optimized for LLM tool integration
95
+ - **Evaluation Harness** (`chuk-puzzles-eval`) - Built-in benchmarking CLI
96
+ - Batch evaluation with configurable episodes
97
+ - Multiple output formats (JSON, CSV, Markdown)
98
+ - Metrics: moves, invalid moves, hints, solve time
99
+ - Reproducible with deterministic seeds
100
+ - **Dataset Export** (`chuk-puzzles-export`) - Synthetic data generation for LLM training
101
+ - JSONL output with complete problem definitions and solutions
102
+ - Step-by-step reasoning traces for teacher-forcing
103
+ - Constraint metadata and difficulty profiles
104
+ - Compatible with chuk-gym-core schema
105
+ - **Multiple transport protocols:**
106
+ - **Telnet** (port 8023) - Classic telnet protocol
107
+ - **TCP** (port 8024) - Raw TCP connections
108
+ - **WebSocket** (port 8025) - Modern WebSocket protocol
109
+ - **WebSocket-Telnet** (port 8026) - WebSocket with telnet negotiation
110
+ - **Interactive menu-driven interface** with game selection
111
+ - **Hint system** for when you're stuck
112
+ - **Solution checker** and auto-solver for all games
113
+ - **Clean ASCII art grids** - perfectly aligned for easy parsing
114
+ - **Deterministic seeding** - Replay any puzzle with the same seed
115
+ - **Gymnasium-compatible RL Environment** (`PuzzleEnv`) for training agents
116
+ - **Comprehensive test suite** (1067 tests, 94% coverage)
117
+ - **Modern Python best practices:**
118
+ - **Pydantic v2 native** - All models use ConfigDict for type safety
119
+ - **Async native** - Full async/await support throughout
120
+ - **Type-safe** - No dict["key"] patterns, only typed models
121
+ - **Enum-based** - No magic strings, proper enum constants
122
+ - **Modern Python packaging** with pyproject.toml
123
+ - **Docker and Fly.io deployment** ready
124
+
125
+ ## Available Games
126
+
127
+ ### Classic Logic Puzzles
128
+
129
+ | Game | Grid Size | Constraint Types | Status |
130
+ |------|-----------|------------------|--------|
131
+ | **Sudoku** | 9×9 | AllDifferent (rows, cols, boxes) | ✅ Complete |
132
+ | **KenKen** | 4×4 to 6×6 | Arithmetic cages + AllDifferent | ✅ Complete |
133
+ | **Kakuro** | 5×5 to 8×8 | Sum constraints + AllDifferent | ✅ Complete |
134
+ | **Binary Puzzle** | 6×6 to 10×10 | Adjacency limits + Equal counts | ✅ Complete |
135
+ | **Futoshiki** | 4×4 to 6×6 | Inequalities + AllDifferent | ✅ Complete |
136
+ | **Nonogram** | 5×5 to 10×10 | Line sum constraints + Blocks | ✅ Complete |
137
+ | **Logic Grid** | Variable | Category associations + Logic | ✅ Complete |
138
+
139
+ ### Advanced CP-SAT Puzzles
140
+
141
+ | Game | Grid Size | Constraint Types | Status |
142
+ |------|-----------|------------------|--------|
143
+ | **Killer Sudoku** | 9×9 | Linear constraints + AllDifferent + Cages | ✅ Complete |
144
+ | **Lights Out** | 5×5 to 7×7 | Boolean XOR constraints (SAT) | ✅ Complete |
145
+ | **Mastermind** | 4-6 pegs | Deduction + Feedback constraints | ✅ Complete |
146
+ | **Slitherlink** | 5×5 to 10×10 | Global loop + Edge constraints | ✅ Complete |
147
+ | **Bridges** | 7×7 to 11×11 | Connectivity + Degree constraints | ✅ Complete |
148
+ | **Hitori** | 5×5 to 9×9 | AllDifferent + Adjacency + Connectivity | ✅ Complete |
149
+ | **Shikaku** | 6×6 to 10×10 | Area partitioning + Rectangle covering | ✅ Complete |
150
+
151
+ ### Specialized Constraint Puzzles
152
+
153
+ | Game | Grid Size | Constraint Types | Status |
154
+ |------|-----------|------------------|--------|
155
+ | **Hidato** | 5×5 to 9×9 | Sequential adjacency + Hamiltonian path | ✅ Complete |
156
+ | **Tents and Trees** | 6×6 to 10×10 | Bipartite matching + Adjacency avoidance | ✅ Complete |
157
+ | **Fillomino** | 6×6 to 10×10 | Region growth + Self-referential constraints | ✅ Complete |
158
+ | **Star Battle** | 6×6 to 10×10 | Multi-region placement + Adjacency avoidance | ✅ Complete |
159
+ | **Sokoban** | 6×6 to 10×10 | Spatial planning + Irreversible actions (optimization) | ✅ Complete |
160
+
161
+ ### Optimization Challenges
162
+
163
+ | Game | Problem Size | Constraint Types | Status |
164
+ |------|-------------|------------------|--------|
165
+ | **Knapsack** | 5-12 items | Value maximization + Capacity constraint | ✅ Complete |
166
+ | **Task Scheduler** | 4-8 tasks | Makespan minimization + Dependencies + Resources | ✅ Complete |
167
+
168
+ ### Advanced Reasoning Puzzles
169
+
170
+ | Game | Grid Size | Constraint Types | Status |
171
+ |------|-----------|------------------|--------|
172
+ | **Nurikabe** | 6×6 to 10×10 | Connectivity + Island sizes + No 2×2 blocks | ✅ Complete |
173
+ | **Einstein's Puzzle** | 5 houses × 5 attributes | Multi-attribute deduction + Logic chains | ✅ Complete |
174
+ | **Minesweeper** | 6×6 to 10×10 | Probabilistic reasoning + Safe deduction | ✅ Complete |
175
+
176
+ ## Solver Profiles & Business Mapping
177
+
178
+ Each game includes metadata for **constraint types**, **business analogies**, and **complexity profiles**, making it easy to:
179
+
180
+ - **Select puzzles by constraint pattern** - Need to demonstrate Boolean SAT? → Lights Out
181
+ - **Map to business use cases** - Task Scheduler → Sprint Planning, Knapsack → Portfolio Selection
182
+ - **Benchmark LLM reasoning** - Compare model performance across different constraint densities
183
+
184
+ ### Example: Query Games by Profile
185
+
186
+ ```python
187
+ from chuk_puzzles_gym.games import AVAILABLE_GAMES
188
+
189
+ # Find all optimization problems
190
+ optimization_games = [
191
+ name for name, game_class in AVAILABLE_GAMES.items()
192
+ if "optimization" in game_class().constraint_types
193
+ ]
194
+ # → ['knapsack', 'scheduler']
195
+
196
+ # Find games that model resource allocation
197
+ resource_games = [
198
+ name for name, game_class in AVAILABLE_GAMES.items()
199
+ if "resource_allocation" in game_class().business_analogies
200
+ ]
201
+ # → ['scheduler', 'knapsack']
202
+ ```
203
+
204
+ ### Quick Reference: Constraint Types to Business Problems
205
+
206
+ | Constraint Pattern | Puzzle Examples | Business Use Cases |
207
+ |-------------------|-----------------|-------------------|
208
+ | **Optimization** | Knapsack, Scheduler | Portfolio selection, Sprint planning, Budget allocation |
209
+ | **Precedence** | Scheduler | Project dependencies, Workflow sequencing |
210
+ | **Sequential Adjacency** | Hidato | Path planning, Route sequencing, Tour optimization |
211
+ | **Hamiltonian Path** | Hidato | Traveling salesman, Circuit design |
212
+ | **Bipartite Matching** | Tents and Trees | Job assignment, Resource pairing |
213
+ | **Region Growth** | Fillomino | Territory expansion, Cluster formation |
214
+ | **Spatial Planning** | Sokoban | Warehouse logistics, Movement planning |
215
+ | **Connectivity** | Nurikabe, Slitherlink | Network design, Routing, Zone planning |
216
+ | **Global Loop** | Slitherlink | Circuit design, Path finding |
217
+ | **Boolean SAT** | Lights Out | Feature dependencies, Toggle systems |
218
+ | **Cage Sums** | Killer Sudoku, Kakuro | Team budgets, Grouped constraints |
219
+ | **AllDifferent** | Sudoku, KenKen | Resource uniqueness, Assignment problems |
220
+
221
+ ## Quick Start
222
+
223
+ ### Prerequisites
224
+
225
+ - Python 3.11 or higher
226
+ - [UV](https://github.com/astral-sh/uv) (recommended) or pip
227
+
228
+ ### Installation
229
+
230
+ #### Using uvx (No Installation Required)
231
+
232
+ Run directly without installing using [uvx](https://docs.astral.sh/uv/guides/tools/):
233
+
234
+ ```bash
235
+ # Run the puzzle server
236
+ uvx chuk-puzzles-gym
237
+
238
+ # Generate synthetic datasets
239
+ uvx --from chuk-puzzles-gym chuk-puzzles-export -o puzzles.jsonl
240
+
241
+ # Run evaluation harness
242
+ uvx --from chuk-puzzles-gym chuk-puzzles-eval -g sudoku -n 10
243
+ ```
244
+
245
+ #### From PyPI
246
+
247
+ ```bash
248
+ # Install with pip
249
+ pip install chuk-puzzles-gym
250
+
251
+ # Or with uv
252
+ uv pip install chuk-puzzles-gym
253
+
254
+ # Then run commands directly
255
+ chuk-puzzles-server # Start the server
256
+ chuk-puzzles-export # Generate datasets
257
+ chuk-puzzles-eval # Run evaluation
258
+ ```
259
+
260
+ #### From Source (Development)
261
+
262
+ ##### Using UV (Recommended)
263
+
264
+ ```bash
265
+ # Clone the repository
266
+ git clone https://github.com/chrishayuk/chuk-puzzles-gym.git
267
+ cd chuk-puzzles-gym
268
+
269
+ # Install UV if you haven't already
270
+ curl -LsSf https://astral.sh/uv/install.sh | sh
271
+
272
+ # Install development dependencies
273
+ make dev-install
274
+
275
+ # Run the server
276
+ make run
277
+ ```
278
+
279
+ ##### Using pip
280
+
281
+ ```bash
282
+ # Clone the repository
283
+ git clone https://github.com/chrishayuk/chuk-puzzles-gym.git
284
+ cd chuk-puzzles-gym
285
+
286
+ # Install in development mode with dev dependencies
287
+ pip install -e ".[dev]"
288
+
289
+ # Run the server
290
+ PYTHONPATH=. uv run --with chuk-protocol-server chuk-protocol-server server-launcher -c config.yaml
291
+ ```
292
+
293
+ ### Using Make (All Commands)
294
+
295
+ ```bash
296
+ # See all available commands
297
+ make help
298
+
299
+ # Development workflow
300
+ make dev-install # Install dev dependencies
301
+ make run # Run the server
302
+ make test # Run tests
303
+ make test-cov # Run tests with coverage report
304
+ make check # Run linting and type checking
305
+ make format # Format code with ruff
306
+ make security # Run security checks
307
+
308
+ # Docker workflow
309
+ make docker-build # Build Docker image
310
+ make docker-run # Run in Docker container
311
+
312
+ # Examples
313
+ make example-telnet # Browse games via telnet
314
+ make example-telnet-sudoku # Sudoku demo
315
+ make example-telnet-kenken # KenKen demo
316
+ make example-ws # WebSocket tour
317
+ make example-ws-interactive # Interactive WebSocket mode
318
+
319
+ # Deployment
320
+ make fly-deploy # Deploy to Fly.io
321
+ make fly-logs # View Fly.io logs
322
+ ```
323
+
324
+ ### Docker Setup
325
+
326
+ Build and run with Docker:
327
+
328
+ ```bash
329
+ # Using Make
330
+ make docker-run
331
+
332
+ # Or manually
333
+ docker build -t chuk-puzzles-gym .
334
+ docker run -p 8023:8023 -p 8024:8024 -p 8025:8025 -p 8026:8026 chuk-puzzles-gym
335
+ ```
336
+
337
+ ## Connecting to the Server
338
+
339
+ ### Local Development
340
+
341
+ **Via Telnet:**
342
+ ```bash
343
+ telnet localhost 8023
344
+ ```
345
+
346
+ **Via Netcat (TCP):**
347
+ ```bash
348
+ nc localhost 8024
349
+ ```
350
+
351
+ **Via WebSocket:**
352
+ ```
353
+ ws://localhost:8025/ws
354
+ ws://localhost:8026/ws
355
+ ```
356
+
357
+ ## Game Menu
358
+
359
+ When you connect, you'll see the main menu:
360
+
361
+ ```
362
+ ==================================================
363
+ WELCOME TO THE PUZZLE ARCADE!
364
+ ==================================================
365
+
366
+ CLASSIC LOGIC PUZZLES:
367
+ 1) Sudoku - Classic logic puzzle - fill 9x9 grid with digits 1-9
368
+ 2) KenKen - Arithmetic cage puzzle - combine math and logic
369
+ 3) Kakuro - Crossword math puzzle - fill runs with unique digits that sum to clues
370
+ 4) Binary Puzzle - Fill grid with 0s and 1s - no three in a row, equal counts
371
+ 5) Futoshiki - Inequality number puzzle - fill grid with constraints
372
+ 6) Nonogram - Picture logic puzzle - reveal image from number clues
373
+ 7) Logic Grid - Deductive reasoning puzzle - match attributes using logic
374
+
375
+ ADVANCED CP-SAT PUZZLES:
376
+ 8) Killer Sudoku - Sudoku + Kakuro - regions must sum to targets
377
+ 9) Lights Out - Toggle lights to turn all off - XOR constraint puzzle
378
+ 10) Mastermind - Code-breaking with logical deduction and feedback
379
+ 11) Slitherlink - Draw a single loop - numbers show edge counts
380
+ 12) Bridges - Connect islands with bridges - satisfy all numbers
381
+ 13) Hitori - Shade cells to eliminate duplicates - no adjacent shading
382
+ 14) Shikaku - Divide grid into rectangles matching areas
383
+
384
+ SPECIALIZED CONSTRAINT PUZZLES:
385
+ 15) Hidato - Sequential path puzzle - connect numbers adjacently
386
+ 16) Tents - Place tents next to trees - bipartite matching puzzle
387
+ 17) Fillomino - Fill regions with numbers matching region size
388
+ 18) Star Battle - Place stars avoiding adjacency - multi-region placement
389
+ 19) Sokoban - Push boxes to targets - spatial planning puzzle
390
+
391
+ OPTIMIZATION CHALLENGES:
392
+ 20) Knapsack - Maximize value within capacity constraints
393
+ 21) Task Scheduler - Minimize makespan with dependencies and resources
394
+
395
+ ADVANCED REASONING PUZZLES:
396
+ 22) Nurikabe - Island and sea puzzle - connectivity constraints
397
+ 23) Einstein's Puzzle - Who owns the fish? Multi-attribute deduction
398
+ 24) Minesweeper - Find all mines using logical deduction
399
+
400
+ Commands:
401
+ <number> - Select game by number
402
+ <name> - Select game by name (e.g., 'sudoku')
403
+ help - Show this menu again
404
+ quit - Exit the server
405
+ ==================================================
406
+ ```
407
+
408
+ ## Agent-Friendly Mode
409
+
410
+ The server includes a special **agent mode** designed for AI tools and LLM integration:
411
+
412
+ ### Enabling Agent Mode
413
+
414
+ ```
415
+ > mode agent
416
+ Output mode set to: agent
417
+ ```
418
+
419
+ ### Agent Mode Features
420
+
421
+ **Structured Output** - Grid data is wrapped with clear start/end markers:
422
+ ```
423
+ ---GAME-START---
424
+ GAME: Sudoku
425
+ DIFFICULTY: medium
426
+ MOVES: 3
427
+ ---GRID-START---
428
+ | 1 2 3 | 4 5 6 | 7 8 9 |
429
+ -------------------------
430
+ 1 | . . 3 | . 2 . | 6 . . |
431
+ ...
432
+ ---GRID-END---
433
+ ---GAME-END---
434
+ ```
435
+
436
+ **Benefits for AI Agents:**
437
+ - Easy parsing with regex: `---GRID-START---(.*?)---GRID-END---`
438
+ - Consistent metadata format (GAME, DIFFICULTY, MOVES)
439
+ - No decorative text or banners to filter out
440
+ - Minimal token usage compared to normal mode
441
+
442
+ **Switching Modes:**
443
+ - `mode normal` - Human-friendly output (default)
444
+ - `mode agent` - Machine-parseable structured output
445
+ - `mode compact` - Reserved for future use
446
+
447
+ ## Gymnasium-Compatible RL Environment
448
+
449
+ The project includes a **Gymnasium-compatible environment** for training reinforcement learning agents:
450
+
451
+ ### Quick Start
452
+
453
+ ```python
454
+ from chuk_puzzles_gym.gym_env import PuzzleEnv
455
+
456
+ # Create environment for any of the 24 games
457
+ env = PuzzleEnv("sudoku", difficulty="easy", seed=42)
458
+
459
+ # Reset to start a new episode
460
+ obs, info = await env.reset()
461
+
462
+ # Take actions (text commands or tuples)
463
+ obs, reward, terminated, truncated, info = await env.step("place 1 1 5")
464
+
465
+ # Or use tuple format
466
+ obs, reward, terminated, truncated, info = await env.step(("place", 1, 1, 5))
467
+
468
+ # Get available games
469
+ games = PuzzleEnv.available_games()
470
+ # → ['sudoku', 'kenken', 'minesweeper', ...]
471
+ ```
472
+
473
+ ### Features
474
+
475
+ - **All 24 games** accessible through unified API
476
+ - **Configurable rewards** for correct moves, invalid attempts, completion bonuses
477
+ - **Hint system** with optional budget limits
478
+ - **Solver-free mode** for pure reasoning benchmarks
479
+ - **Efficiency scoring** based on optimal step counts
480
+ - **Deterministic seeding** for reproducible experiments
481
+
482
+ ### Observation Space
483
+
484
+ ```python
485
+ obs = {
486
+ "game": "sudoku",
487
+ "difficulty": "easy",
488
+ "seed": 42,
489
+ "moves": 5,
490
+ "invalid_moves": 1,
491
+ "hints_used": 2,
492
+ "is_complete": False,
493
+ "grid": [[4, 0, 8, ...], ...] # Game-specific state
494
+ }
495
+ ```
496
+
497
+ ### Reward Configuration
498
+
499
+ ```python
500
+ env = PuzzleEnv("kenken", reward_config={
501
+ "correct_placement": 1.0, # Reward for valid moves
502
+ "invalid_attempt": -0.5, # Penalty for invalid moves
503
+ "completion_bonus": 10.0, # Bonus for solving
504
+ "hint_penalty": -0.1, # Penalty for using hints
505
+ "efficiency_multiplier": 2.0, # Scales completion bonus by efficiency
506
+ })
507
+ ```
508
+
509
+ ### Solver Configuration
510
+
511
+ ```python
512
+ from chuk_puzzles_gym.models import SolverConfig
513
+
514
+ # Solver-free mode (no hints allowed)
515
+ config = SolverConfig.solver_free()
516
+ env = PuzzleEnv("sudoku", solver_config=config)
517
+
518
+ # Limited hints
519
+ config = SolverConfig(hint_budget=5, hint_penalty=0.1)
520
+ env = PuzzleEnv("sudoku", solver_config=config)
521
+ ```
522
+
523
+ ## Evaluation Harness
524
+
525
+ The project includes a built-in **evaluation harness** for benchmarking puzzle-solving agents:
526
+
527
+ ### Quick Start
528
+
529
+ ```bash
530
+ # List all available games
531
+ chuk-puzzles-eval --list-games
532
+
533
+ # Evaluate a specific game (10 episodes, medium difficulty)
534
+ chuk-puzzles-eval sudoku -d medium -n 10 -v
535
+
536
+ # Evaluate all games (5 episodes each)
537
+ chuk-puzzles-eval --all -d easy -n 5
538
+
539
+ # Output as JSON for analysis
540
+ chuk-puzzles-eval sudoku -n 20 -o json > results.json
541
+ ```
542
+
543
+ ### Using Make Targets
544
+
545
+ ```bash
546
+ make eval # Quick evaluation (3 episodes per game)
547
+ make eval-sudoku # Evaluate Sudoku (10 episodes)
548
+ make eval-all # Evaluate all games (10 episodes each)
549
+ make eval-json # Output as JSON
550
+ make list-games # List available games
551
+ ```
552
+
553
+ ### Sample Output
554
+
555
+ ```
556
+ Sudoku Medium Evaluation (10 episodes)
557
+ ==================================================
558
+ Solved: 10/10 (100.0%)
559
+ Avg Moves: 45.3
560
+ Avg Invalid: 0.0
561
+ Avg Time: 12ms
562
+ ```
563
+
564
+ ### Output Formats
565
+
566
+ - **text** (default) - Human-readable summary
567
+ - **json** - Structured JSON for programmatic analysis
568
+ - **csv** - Spreadsheet-compatible format
569
+ - **markdown** - Documentation-ready tables
570
+
571
+ ### Metrics Collected
572
+
573
+ | Metric | Description |
574
+ |--------|-------------|
575
+ | `solved` | Whether the puzzle was solved |
576
+ | `moves_made` | Number of valid moves |
577
+ | `invalid_moves` | Number of rejected moves |
578
+ | `hints_used` | Number of hints requested |
579
+ | `wall_time_ms` | Time to solve in milliseconds |
580
+ | `seed` | Puzzle seed for reproducibility |
581
+
582
+ ## Dataset Export
583
+
584
+ Generate synthetic puzzle datasets for training and benchmarking LLMs and constraint solvers. The export system produces JSONL files with complete problem definitions, solutions, and step-by-step reasoning traces.
585
+
586
+ ### CLI Usage
587
+
588
+ ```bash
589
+ # Generate 100 puzzles per game/difficulty for all 24 games
590
+ chuk-puzzles-export -o puzzles.jsonl
591
+
592
+ # Specific games only
593
+ chuk-puzzles-export -g sudoku kenken einstein -n 100 -o selected.jsonl
594
+
595
+ # Single difficulty level
596
+ chuk-puzzles-export -d easy -n 50 -o easy_puzzles.jsonl
597
+
598
+ # Multiple difficulties
599
+ chuk-puzzles-export -d easy medium -n 100 -o train_data.jsonl
600
+
601
+ # Reproducible generation with seed
602
+ chuk-puzzles-export -g sudoku -s 0 -n 1000 -o sudoku_seed0.jsonl
603
+
604
+ # Without step-by-step traces (smaller files)
605
+ chuk-puzzles-export --no-trace -n 500 -o compact.jsonl
606
+
607
+ # List all available games
608
+ chuk-puzzles-export --list-games
609
+ ```
610
+
611
+ ### CLI Options
612
+
613
+ | Option | Description | Default |
614
+ |--------|-------------|---------|
615
+ | `-o, --output` | Output file path | `puzzles.jsonl` |
616
+ | `-g, --games` | Games to include (space-separated) | All games |
617
+ | `-n, --count` | Problems per game/difficulty combo | 100 |
618
+ | `-d, --difficulties` | Difficulty levels to include | easy, medium, hard |
619
+ | `-s, --seed` | Starting seed for reproducibility | 0 |
620
+ | `--no-trace` | Exclude step-by-step solution traces | False |
621
+ | `--list-games` | List available games and exit | - |
622
+
623
+ ### Python API
624
+
625
+ ```python
626
+ import asyncio
627
+ from chuk_puzzles_gym.export import DatasetExporter, generate_dataset
628
+ from chuk_gym_core import DifficultyLevel
629
+
630
+ # Quick generation with async function
631
+ async def generate():
632
+ total = await generate_dataset(
633
+ output_path="data.jsonl",
634
+ games=["sudoku", "kenken", "einstein"],
635
+ count_per_game=100,
636
+ difficulties=["easy", "medium", "hard"],
637
+ include_trace=True,
638
+ )
639
+ print(f"Generated {total} problems")
640
+
641
+ asyncio.run(generate())
642
+
643
+ # Fine-grained control with context manager
644
+ async def export_custom():
645
+ with DatasetExporter("puzzles.jsonl", include_trace=True) as exporter:
646
+ # Export specific game
647
+ await exporter.export_game(
648
+ game_name="sudoku",
649
+ count=500,
650
+ difficulty=DifficultyLevel.MEDIUM,
651
+ start_seed=0,
652
+ )
653
+
654
+ # Export all games
655
+ await exporter.export_all_games(
656
+ count_per_game=50,
657
+ difficulties=[DifficultyLevel.EASY, DifficultyLevel.HARD],
658
+ )
659
+
660
+ print(f"Total exported: {exporter.count}")
661
+
662
+ asyncio.run(export_custom())
663
+ ```
664
+
665
+ ### Output Format
666
+
667
+ Each line in the JSONL file contains a complete problem definition:
668
+
669
+ ```json
670
+ {
671
+ "id": "sudoku_medium_42",
672
+ "seed": 42,
673
+ "domain": "sudoku",
674
+ "difficulty": "medium",
675
+ "prompt": "Sudoku: Classic 9x9 logic puzzle...\n\nRULES:\n...\n\n[grid]",
676
+ "initial_state": [[0,0,3,...], ...],
677
+ "gold_answer": "[[4,8,3,...], ...]",
678
+ "constraint_types": ["all_different_rows", "all_different_columns", "all_different_boxes"],
679
+ "business_analogies": ["resource_allocation", "scheduling", "assignment_problems"],
680
+ "difficulty_profile": {
681
+ "logic_depth": 45,
682
+ "branching_factor": 3.2,
683
+ "state_observability": 0.88,
684
+ "constraint_density": 0.75
685
+ },
686
+ "operation_count": 47,
687
+ "tags": ["sudoku", "medium"]
688
+ }
689
+ ```
690
+
691
+ ### Solution Traces
692
+
693
+ When `include_trace=True` (default), each problem includes step-by-step solution traces for teacher-forcing training:
694
+
695
+ ```json
696
+ {
697
+ "problem": { ... },
698
+ "trace": {
699
+ "problem_id": "sudoku_medium_42",
700
+ "steps": [
701
+ {
702
+ "index": 0,
703
+ "operation": "PLACE",
704
+ "before_state": "cell(r1,c1)=empty",
705
+ "after_state": "cell(r1,c1)=4",
706
+ "output_value": 4,
707
+ "position": [1, 1],
708
+ "rule_applied": "naked_single_row",
709
+ "explanation": "Place 4 at row 1, column 1. This is the only valid digit considering row 1, column 1, and box 1 constraints."
710
+ },
711
+ {
712
+ "index": 1,
713
+ "operation": "PLACE",
714
+ "before_state": "cell(r1,c3)=empty",
715
+ "after_state": "cell(r1,c3)=7",
716
+ "output_value": 7,
717
+ "position": [1, 3],
718
+ "rule_applied": "naked_single_box",
719
+ "explanation": "Place 7 at row 1, column 3..."
720
+ }
721
+ ],
722
+ "checkpoints": [0, 12, 24, 47]
723
+ }
724
+ }
725
+ ```
726
+
727
+ ### Trace Operations
728
+
729
+ | Operation | Description | Used By |
730
+ |-----------|-------------|---------|
731
+ | `PLACE` | Place a value in a cell | Sudoku, KenKen, Nonogram, etc. |
732
+ | `ELIMINATE` | Mark a cell as excluded/shaded | Hitori, Minesweeper |
733
+ | `DEDUCE` | Logical deduction step | Einstein, Logic Grid, Mastermind |
734
+
735
+ ### Rule Types by Game
736
+
737
+ | Game | Rules Applied |
738
+ |------|--------------|
739
+ | Sudoku | `naked_single_row`, `naked_single_column`, `naked_single_box`, `elimination` |
740
+ | Binary | `balance_constraint` |
741
+ | KenKen/Kakuro | `arithmetic_constraint` |
742
+ | Nonogram | `line_constraint` |
743
+ | Einstein | `logical_deduction` |
744
+ | Hitori | `duplicate_elimination` |
745
+ | Bridges | `connectivity_constraint` |
746
+ | Slitherlink | `loop_constraint` |
747
+ | Others | `constraint_propagation` |
748
+
749
+ ### Example: Generate Training Data
750
+
751
+ ```bash
752
+ # Generate large training dataset
753
+ chuk-puzzles-export \
754
+ -g sudoku kenken kakuro binary futoshiki \
755
+ -n 1000 \
756
+ -d easy medium hard \
757
+ -s 0 \
758
+ -o training_data.jsonl
759
+
760
+ # Generate evaluation set (different seed range)
761
+ chuk-puzzles-export \
762
+ -g sudoku kenken kakuro binary futoshiki \
763
+ -n 100 \
764
+ -d easy medium hard \
765
+ -s 100000 \
766
+ -o eval_data.jsonl
767
+ ```
768
+
769
+ ### Dataset Statistics
770
+
771
+ With default settings (`-n 100` per game/difficulty):
772
+
773
+ | Configuration | Problems Generated |
774
+ |--------------|-------------------|
775
+ | All games, all difficulties | 24 games × 3 difficulties × 100 = 7,200 |
776
+ | Single game, all difficulties | 1 × 3 × 100 = 300 |
777
+ | All games, single difficulty | 24 × 1 × 100 = 2,400 |
778
+
779
+ ### Integration with chuk-gym-core
780
+
781
+ The export system uses [chuk-gym-core](https://pypi.org/project/chuk-gym-core/) for consistent output format, compatible with:
782
+
783
+ - **chuk-math-gym** - Mathematical reasoning datasets
784
+ - **Teacher-forcing training** - Step-by-step trace supervision
785
+ - **Evaluation pipelines** - Standardized problem/solution schema
786
+
787
+ ## Universal Game Commands
788
+
789
+ All games support these commands:
790
+
791
+ ### Starting and Managing Games
792
+ - `<number> [difficulty]` - Select game by number (e.g., `1 medium`)
793
+ - `<name> [difficulty]` - Select game by name (e.g., `sudoku hard`)
794
+ - `show` - Display the current grid
795
+ - `mode <normal|agent|compact>` - Set output mode
796
+ - `help` - Show game-specific commands and rules
797
+ - `menu` - Return to main menu
798
+ - `quit` - Exit the server
799
+
800
+ ### Playing Games
801
+ - `place <row> <col> <value>` - Place a number/value on the grid
802
+ - Example: `place 1 5 7` (places 7 at row 1, column 5)
803
+ - `clear <row> <col>` - Clear a cell you've filled
804
+ - `hint` - Get a hint for the next move
805
+ - `check` - Check your progress
806
+ - `solve` - Show the solution (ends current game)
807
+
808
+ ### Special Commands (Game-Specific)
809
+ - **Logic Grid**: `connect` and `exclude` commands for associations
810
+ - See in-game `help` for game-specific commands
811
+
812
+ ## Example Gameplay Sessions
813
+
814
+ ### Sudoku
815
+
816
+ ```
817
+ > sudoku medium
818
+
819
+ ==================================================
820
+ SUDOKU - MEDIUM MODE
821
+ ==================================================
822
+ Fill the grid so that every row, column, and 3x3 box
823
+ contains the digits 1-9 without repetition.
824
+
825
+ Type 'help' for commands or 'hint' for a clue.
826
+ ==================================================
827
+
828
+ | 1 2 3 | 4 5 6 | 7 8 9 |
829
+ -------------------------
830
+ 1 | . . 3 | . 2 . | 6 . . |
831
+ 2 | 9 . . | 3 . 5 | . . 1 |
832
+ 3 | . . 1 | 8 . 6 | 4 . . |
833
+ -------------------------
834
+ 4 | . . 8 | 1 . 2 | 9 . . |
835
+ 5 | 7 . . | . . . | . . 8 |
836
+ 6 | . . 6 | 7 . 8 | 2 . . |
837
+ -------------------------
838
+ 7 | . . 2 | 6 . 9 | 5 . . |
839
+ 8 | 8 . . | 2 . 3 | . . 9 |
840
+ 9 | . . 5 | . 1 . | 3 . . |
841
+ -------------------------
842
+ Moves made: 0
843
+ ==================================================
844
+
845
+ > hint
846
+ Hint: Try placing 4 at row 1, column 1
847
+
848
+ > place 1 1 4
849
+ Number placed successfully!
850
+
851
+ > check
852
+ Puzzle not yet complete. Keep going!
853
+ Moves made: 1
854
+ ```
855
+
856
+ ### KenKen
857
+
858
+ ```
859
+ > kenken easy
860
+
861
+ ==================================================
862
+ KENKEN - EASY MODE
863
+ ==================================================
864
+ KENKEN RULES:
865
+ - Fill 4x4 grid with 1-4
866
+ - No repeats in rows or columns
867
+ - Satisfy cage arithmetic constraints
868
+ - Operations: + - * /
869
+ ==================================================
870
+
871
+ | 1 | 2 | 3 | 4 |
872
+ +----+----+----+----+
873
+ 1 | .8+| . | .3 | .2 |
874
+ +----+----+----+----+
875
+ 2 | . | .6+| . | .3-|
876
+ +----+----+----+----+
877
+ 3 | .2 | .6+| .8+| . |
878
+ +----+----+----+----+
879
+ 4 | . | . | . | . |
880
+ +----+----+----+----+
881
+
882
+ Cages:
883
+ 8+: (1,1), (1,2), (2,1)
884
+ 3: (1,3)
885
+ 2: (1,4)
886
+ ...
887
+
888
+ > place 1 3 3
889
+ Number placed successfully!
890
+ ```
891
+
892
+ ## Architecture
893
+
894
+ This server is built on the [chuk-protocol-server](https://github.com/chrishayuk/chuk-protocol-server) framework, which provides:
895
+
896
+ - Multiple transport protocol support (Telnet, TCP, WebSocket, WS-Telnet)
897
+ - Telnet protocol negotiation (IAC, WILL, WONT, DO, DONT)
898
+ - WebSocket handling with ping/pong keepalive
899
+ - Connection management and monitoring
900
+ - Asynchronous I/O with Python asyncio
901
+
902
+ ### Game Architecture
903
+
904
+ Each game is a **self-contained module** with all logic co-located:
905
+
906
+ ```
907
+ games/
908
+ ├── _base/ # Base classes
909
+ │ ├── game.py # PuzzleGame ABC
910
+ │ └── commands.py # GameCommandHandler ABC
911
+ ├── sudoku/
912
+ │ ├── __init__.py # Exports SudokuGame
913
+ │ ├── game.py # Game logic
914
+ │ ├── config.py # SudokuConfig
915
+ │ └── commands.py # Command handler
916
+ ├── minesweeper/
917
+ │ ├── __init__.py
918
+ │ ├── game.py
919
+ │ └── config.py
920
+ └── ... (24 games total)
921
+ ```
922
+
923
+ All games extend the `PuzzleGame` abstract base class with **deterministic seeding**:
924
+
925
+ ```python
926
+ from chuk_puzzles_gym.games._base import PuzzleGame
927
+
928
+ class PuzzleGame(ABC):
929
+ def __init__(self, difficulty: str = "easy", seed: int | None = None):
930
+ self.seed = seed if seed is not None else random.randint(0, 2**32 - 1)
931
+ self._rng = random.Random(self.seed) # Deterministic RNG
932
+ # ...
933
+
934
+ @property
935
+ @abstractmethod
936
+ def name(self) -> str: ...
937
+
938
+ @property
939
+ @abstractmethod
940
+ def constraint_types(self) -> list[str]: ...
941
+
942
+ @property
943
+ @abstractmethod
944
+ def business_analogies(self) -> list[str]: ...
945
+
946
+ @abstractmethod
947
+ async def generate_puzzle(self) -> None: ...
948
+
949
+ @abstractmethod
950
+ async def validate_move(self, *args) -> MoveResult: ...
951
+
952
+ @abstractmethod
953
+ def is_complete(self) -> bool: ...
954
+
955
+ @abstractmethod
956
+ def render_grid(self) -> str: ...
957
+ ```
958
+
959
+ ### Handler Architecture
960
+
961
+ The `ArcadeHandler` class manages:
962
+ - Menu-driven game selection
963
+ - Command parsing and routing (delegating to game-specific handlers)
964
+ - Grid display with proper formatting
965
+ - Game state management per connection
966
+ - Multi-game support
967
+
968
+ ## Development
969
+
970
+ ### Setup Development Environment
971
+
972
+ ```bash
973
+ # Clone the repository
974
+ git clone https://github.com/chrishayuk/chuk-puzzles-gym.git
975
+ cd chuk-puzzles-gym
976
+
977
+ # Install development dependencies (with UV)
978
+ make dev-install
979
+
980
+ # Or with pip
981
+ pip install -e ".[dev]"
982
+ ```
983
+
984
+ ### Testing
985
+
986
+ The project has comprehensive test coverage (94%, 1067 tests):
987
+
988
+ ```bash
989
+ # Run all tests
990
+ make test
991
+
992
+ # Run tests with coverage report
993
+ make test-cov
994
+
995
+ # Run tests in watch mode
996
+ make test-watch
997
+
998
+ # View coverage report in browser
999
+ make serve-coverage
1000
+ ```
1001
+
1002
+ ### Coverage by Module
1003
+
1004
+ ```
1005
+ src/chuk_puzzles_gym/games/_base/ 86% # Base classes (abstract defaults)
1006
+ src/chuk_puzzles_gym/games/sudoku/ 92% # Sudoku module
1007
+ src/chuk_puzzles_gym/games/kenken/ 90% # KenKen module
1008
+ src/chuk_puzzles_gym/games/minesweeper/ 96% # Minesweeper module
1009
+ src/chuk_puzzles_gym/games/sokoban/ 83% # Sokoban (complex pathfinding)
1010
+ src/chuk_puzzles_gym/games/.../ 90%+ # All other games
1011
+ src/chuk_puzzles_gym/gym_env.py 90% # Gymnasium environment
1012
+ src/chuk_puzzles_gym/models/ 90%+ # Pydantic models
1013
+ ------------------------------------------------------
1014
+ TOTAL 94% 🎯
1015
+ ```
1016
+
1017
+ **Most modules meet the 90%+ coverage threshold.** The remaining gaps are in abstract base class defaults and complex pathfinding algorithms.
1018
+
1019
+ ### Code Quality
1020
+
1021
+ The project follows modern Python best practices with a **9.8/10 compliance score**:
1022
+
1023
+ #### Tooling
1024
+ - **Ruff**: Fast linter and formatter (replaces black + flake8)
1025
+ - **MyPy**: Static type checking
1026
+ - **Pytest**: Testing framework with async support
1027
+ - **Bandit**: Security vulnerability scanning
1028
+
1029
+ #### Code Standards
1030
+ - ✅ **Pydantic v2 Native** (10/10) - All models use `ConfigDict`, zero deprecation warnings
1031
+ - ✅ **Async Native** (9.5/10) - All I/O operations use async/await properly
1032
+ - ✅ **Type-Safe** (10/10) - No `dict["key"]` patterns, only typed Pydantic models
1033
+ - ✅ **No Magic Strings** (10/10) - All constants use enums or typed constants
1034
+ - ✅ **Test Coverage** (9.5/10) - 94% overall, most files ≥90%
1035
+
1036
+ #### Quality Metrics
1037
+ - **1067 tests** - All passing ✅
1038
+ - **94% coverage** - Exceeds 90% threshold ✅
1039
+ - **Zero linting errors** - Clean codebase ✅
1040
+ - **Full type safety** - MyPy passes ✅
1041
+ - **Deterministic seeding** - Reproducible puzzles ✅
1042
+
1043
+ ```bash
1044
+ # Run all checks (lint + typecheck + test + security)
1045
+ make check
1046
+
1047
+ # Run linter
1048
+ make lint
1049
+
1050
+ # Format code
1051
+ make format
1052
+
1053
+ # Type checking
1054
+ make typecheck
1055
+
1056
+ # Security scanning
1057
+ make security
1058
+ ```
1059
+
1060
+ ### Running Example Clients
1061
+
1062
+ ```bash
1063
+ # Telnet client examples
1064
+ make example-telnet # Browse all games
1065
+ make example-telnet-sudoku # Sudoku demo
1066
+ make example-telnet-kenken # KenKen demo
1067
+ make example-telnet-interactive # Interactive mode
1068
+
1069
+ # WebSocket client examples
1070
+ make example-ws # Tour all games
1071
+ make example-ws-sudoku # Sudoku demo
1072
+ make example-ws-binary # Binary puzzle demo
1073
+ make example-ws-solve # Solve with hints
1074
+ make example-ws-interactive # Interactive mode
1075
+ ```
1076
+
1077
+ ### CI/CD
1078
+
1079
+ The project includes GitHub Actions workflows:
1080
+
1081
+ - **test.yml**: Runs tests on Ubuntu, Windows, macOS with Python 3.11, 3.12, 3.13
1082
+ - **publish.yml**: Publishes to PyPI on release
1083
+ - **release.yml**: Creates GitHub releases
1084
+ - **fly-deploy.yml**: Auto-deploys to Fly.io on main branch push
1085
+
1086
+ Coverage threshold is set to 90% - builds fail if coverage drops below this.
1087
+
1088
+ ## Deployment to Fly.io
1089
+
1090
+ ### Using Make (Recommended)
1091
+
1092
+ ```bash
1093
+ # Deploy to Fly.io
1094
+ make fly-deploy
1095
+
1096
+ # Check status
1097
+ make fly-status
1098
+
1099
+ # View logs
1100
+ make fly-logs
1101
+ ```
1102
+
1103
+ ### Manual Deployment
1104
+
1105
+ 1. Install the Fly CLI: https://fly.io/docs/hands-on/install-flyctl/
1106
+
1107
+ 2. Login to Fly:
1108
+ ```bash
1109
+ fly auth login
1110
+ ```
1111
+
1112
+ 3. Create and deploy the app:
1113
+ ```bash
1114
+ # First deployment (creates the app)
1115
+ fly launch --config fly.toml --now
1116
+
1117
+ # Subsequent deployments
1118
+ fly deploy
1119
+ ```
1120
+
1121
+ 4. **Important:** Allocate a public IPv6 address for TCP services:
1122
+ ```bash
1123
+ # Allocate IPv6 (free)
1124
+ fly ips allocate-v6
1125
+
1126
+ # Verify IP is allocated
1127
+ fly ips list
1128
+ ```
1129
+
1130
+ 5. Check the status:
1131
+ ```bash
1132
+ fly status
1133
+ ```
1134
+
1135
+ 6. View logs:
1136
+ ```bash
1137
+ fly logs
1138
+ ```
1139
+
1140
+ 7. Connect to your Puzzle Arcade server:
1141
+ ```bash
1142
+ # Get your app's IPv6 address
1143
+ fly ips list
1144
+
1145
+ # Connect via telnet using IPv6 (free tier)
1146
+ telnet <your-ipv6> 8023
1147
+
1148
+ # WebSocket connections work with hostname
1149
+ # ws://<your-app>.fly.dev:8025/ws
1150
+ ```
1151
+
1152
+ **Note:** TCP services (Telnet, raw TCP) require a public IP address on Fly.io. We use IPv6 which is free. IPv4 costs $2/month and is not needed for most users.
1153
+
1154
+ ## Project Structure
1155
+
1156
+ ```
1157
+ chuk-puzzles-gym/
1158
+ ├── src/
1159
+ │ └── chuk_puzzles_gym/
1160
+ │ ├── __init__.py # Package initialization
1161
+ │ ├── server.py # Main arcade handler
1162
+ │ ├── constants.py # Game constants
1163
+ │ ├── models/ # Pydantic models
1164
+ │ │ ├── __init__.py
1165
+ │ │ ├── base.py # GridPosition, MoveResult
1166
+ │ │ ├── config.py # Base GameConfig
1167
+ │ │ ├── enums.py # DifficultyLevel, GameCommand, etc.
1168
+ │ │ └── games.py # Game-specific models (Cage, Task, etc.)
1169
+ │ └── games/ # Self-contained game modules
1170
+ │ ├── __init__.py # AVAILABLE_GAMES registry
1171
+ │ ├── _base/ # Base classes
1172
+ │ │ ├── __init__.py
1173
+ │ │ ├── game.py # PuzzleGame ABC
1174
+ │ │ └── commands.py # GameCommandHandler ABC
1175
+ │ ├── sudoku/ # Example game module
1176
+ │ │ ├── __init__.py # Exports SudokuGame
1177
+ │ │ ├── game.py # SudokuGame class
1178
+ │ │ ├── config.py # SudokuConfig
1179
+ │ │ └── commands.py # SudokuCommandHandler
1180
+ │ ├── minesweeper/ # Each game is self-contained
1181
+ │ │ ├── __init__.py
1182
+ │ │ ├── game.py
1183
+ │ │ └── config.py
1184
+ │ └── ... (24 games total)
1185
+ ├── tests/
1186
+ │ ├── test_puzzle_game.py # Base class tests
1187
+ │ ├── test_deterministic_seeding.py # Seeding tests
1188
+ │ ├── test_sudoku_game.py # Sudoku tests
1189
+ │ ├── test_minesweeper.py # Minesweeper tests
1190
+ │ └── ... (tests for all 24 games)
1191
+ ├── examples/
1192
+ │ ├── simple_client.py # Telnet client example
1193
+ │ ├── websocket_client.py # WebSocket client example
1194
+ │ └── README.md # Example usage guide
1195
+ ├── .github/workflows/ # CI/CD workflows
1196
+ ├── pyproject.toml # Modern Python project config
1197
+ ├── config.yaml # Multi-transport server configuration
1198
+ ├── Dockerfile # Docker build instructions
1199
+ ├── fly.toml # Fly.io deployment config
1200
+ ├── Makefile # Development commands (50+ targets)
1201
+ └── README.md # This file
1202
+ ```
1203
+
1204
+ ### Key Statistics
1205
+
1206
+ - **Test Coverage**: 94% overall (1067 tests, all passing)
1207
+ - **Code Quality Score**: 9.8/10 (near perfect compliance)
1208
+ - **Games Implemented**: 24 complete puzzle types
1209
+ - 7 Classic Logic Puzzles
1210
+ - 7 Advanced CP-SAT Puzzles
1211
+ - 5 Specialized Constraint Puzzles
1212
+ - 2 Optimization Challenges
1213
+ - 3 Advanced Reasoning Puzzles
1214
+ - **Supported Transports**: 4 (Telnet, TCP, WebSocket, WS-Telnet)
1215
+ - **Agent-Friendly Mode**: Structured output for AI tools
1216
+ - **Gymnasium API**: RL-compatible environment for all games
1217
+ - **Deterministic Seeding**: Reproducible puzzles for testing
1218
+
1219
+ ## Use Cases
1220
+
1221
+ ### 1. LLM Reasoning Demonstration
1222
+
1223
+ Perfect for demonstrating LLM reasoning capabilities:
1224
+
1225
+ 1. **LLM connects** via telnet: `telnet localhost 8023`
1226
+ 2. **Selects a puzzle**: `sudoku hard`
1227
+ 3. **Receives puzzle** in clean ASCII format
1228
+ 4. **Analyzes constraints** and generates solution
1229
+ 5. **Submits moves**: `place 1 5 7`
1230
+ 6. **Server validates** each move
1231
+ 7. **Puzzle solved!** Proof of reasoning capability
1232
+
1233
+ ### 2. Constraint Solver Testing
1234
+
1235
+ Test the generality of constraint solvers (like MCP solvers):
1236
+
1237
+ - **Different puzzle types** → Same underlying solver
1238
+ - **Clean ASCII output** → Easy for solver parsing
1239
+ - **Simple interface** → Focus on solving, not UI
1240
+ - **Pure validation** → Server validates, doesn't solve
1241
+
1242
+ ### 3. Educational Tool
1243
+
1244
+ Learn about constraint satisfaction problems:
1245
+
1246
+ - **24 different puzzle types** demonstrating various constraint types:
1247
+ - AllDifferent constraints (Sudoku, KenKen, Futoshiki)
1248
+ - Arithmetic constraints (KenKen, Kakuro, Killer Sudoku)
1249
+ - Boolean/SAT constraints (Lights Out, Binary Puzzle)
1250
+ - Loop/Edge constraints (Slitherlink)
1251
+ - Deduction constraints (Mastermind, Logic Grid, Einstein's Puzzle)
1252
+ - Optimization objectives (Knapsack, Task Scheduler)
1253
+ - Temporal reasoning (Task Scheduler)
1254
+ - Connectivity constraints (Nurikabe, Slitherlink)
1255
+ - Probabilistic reasoning (Minesweeper)
1256
+ - And more!
1257
+ - **Well-documented code** showing puzzle generation algorithms
1258
+ - **Comprehensive tests** (1067 tests, 94% coverage) demonstrating validation
1259
+ - **Deterministic seeding** - Reproduce any puzzle for debugging/testing
1260
+ - **Production-ready** - 9.8/10 code quality score
1261
+ - **Type-safe** - Full Pydantic v2 and MyPy compliance
1262
+ - **Modular architecture** - Each game is self-contained in its own folder
1263
+
1264
+ ## Adding New Puzzle Games
1265
+
1266
+ 1. Create a new game folder in `src/chuk_puzzles_gym/games/`:
1267
+
1268
+ ```
1269
+ games/
1270
+ └── my_puzzle/
1271
+ ├── __init__.py # Export the game class
1272
+ ├── game.py # Game logic
1273
+ └── config.py # Game configuration
1274
+ ```
1275
+
1276
+ 2. Create the config in `config.py`:
1277
+
1278
+ ```python
1279
+ from pydantic import Field
1280
+ from ...models import DifficultyLevel, GameConfig
1281
+
1282
+ class MyPuzzleConfig(GameConfig):
1283
+ grid_size: int = Field(default=5, description="Grid size")
1284
+
1285
+ @classmethod
1286
+ def from_difficulty(cls, difficulty: DifficultyLevel) -> "MyPuzzleConfig":
1287
+ sizes = {DifficultyLevel.EASY: 5, DifficultyLevel.MEDIUM: 7, DifficultyLevel.HARD: 9}
1288
+ return cls(difficulty=difficulty, grid_size=sizes[difficulty])
1289
+ ```
1290
+
1291
+ 3. Create the game in `game.py`:
1292
+
1293
+ ```python
1294
+ from .._base import PuzzleGame
1295
+ from ...models import MoveResult
1296
+ from .config import MyPuzzleConfig
1297
+
1298
+ class MyPuzzleGame(PuzzleGame):
1299
+ def __init__(self, difficulty: str = "easy", seed: int | None = None):
1300
+ super().__init__(difficulty, seed)
1301
+ self.config = MyPuzzleConfig.from_difficulty(self.difficulty)
1302
+ # Use self._rng for all randomness (deterministic seeding)
1303
+
1304
+ @property
1305
+ def name(self) -> str:
1306
+ return "My Puzzle"
1307
+
1308
+ @property
1309
+ def constraint_types(self) -> list[str]:
1310
+ return ["all_different", "sum_constraint"]
1311
+
1312
+ @property
1313
+ def business_analogies(self) -> list[str]:
1314
+ return ["resource_allocation", "scheduling"]
1315
+
1316
+ async def generate_puzzle(self) -> None:
1317
+ # Use self._rng.randint(), self._rng.choice(), etc.
1318
+ self.game_started = True
1319
+
1320
+ async def validate_move(self, row: int, col: int, num: int) -> MoveResult:
1321
+ # Validate and apply move
1322
+ return MoveResult(success=True, message="Number placed!")
1323
+
1324
+ def is_complete(self) -> bool:
1325
+ return all(cell != 0 for row in self.grid for cell in row)
1326
+
1327
+ def render_grid(self) -> str:
1328
+ return " | 1 | 2 | 3 |\n" + ...
1329
+
1330
+ def get_stats(self) -> str:
1331
+ return f"Moves: {self.moves_made} | Seed: {self.seed}"
1332
+ ```
1333
+
1334
+ 4. Export in `__init__.py`:
1335
+
1336
+ ```python
1337
+ from .game import MyPuzzleGame
1338
+ __all__ = ["MyPuzzleGame"]
1339
+ ```
1340
+
1341
+ 5. Register in `src/chuk_puzzles_gym/games/__init__.py`:
1342
+
1343
+ ```python
1344
+ from .my_puzzle import MyPuzzleGame
1345
+
1346
+ AVAILABLE_GAMES = {
1347
+ # ... other games
1348
+ "mypuzzle": MyPuzzleGame,
1349
+ }
1350
+ ```
1351
+
1352
+ 6. Add tests in `tests/test_my_puzzle_game.py`:
1353
+
1354
+ ```python
1355
+ from chuk_puzzles_gym.games.my_puzzle import MyPuzzleGame
1356
+
1357
+ class TestMyPuzzleGame:
1358
+ async def test_deterministic_seeding(self):
1359
+ game1 = MyPuzzleGame("easy", seed=12345)
1360
+ game2 = MyPuzzleGame("easy", seed=12345)
1361
+ await game1.generate_puzzle()
1362
+ await game2.generate_puzzle()
1363
+ assert game1.render_grid() == game2.render_grid()
1364
+
1365
+ def test_seed_in_stats(self):
1366
+ game = MyPuzzleGame("easy", seed=42)
1367
+ assert "Seed: 42" in game.get_stats()
1368
+ ```
1369
+
1370
+ 7. Run tests and verify:
1371
+
1372
+ ```bash
1373
+ make test-cov
1374
+ make check
1375
+ ```
1376
+
1377
+ ## Contributing
1378
+
1379
+ Contributions are welcome! Please follow these guidelines:
1380
+
1381
+ 1. Fork the repository
1382
+ 2. Create a feature branch (`git checkout -b feature/amazing-puzzle`)
1383
+ 3. Make your changes
1384
+ 4. Run tests and checks (`make check`)
1385
+ 5. Ensure coverage stays above 90% (`make test-cov`)
1386
+ 6. Commit your changes (`git commit -m 'Add amazing puzzle'`)
1387
+ 7. Push to the branch (`git push origin feature/amazing-puzzle`)
1388
+ 8. Open a Pull Request
1389
+
1390
+ ### Development Guidelines
1391
+
1392
+ - Follow PEP 8 style guide (enforced by ruff)
1393
+ - Add type hints to all functions
1394
+ - Write tests for new features (>90% coverage)
1395
+ - Update documentation as needed
1396
+ - Ensure all grid headers align properly with rows
1397
+
1398
+ ## Troubleshooting
1399
+
1400
+ ### Server won't start
1401
+ - Ensure chuk-protocol-server is installed: `uv pip install chuk-protocol-server`
1402
+ - Check ports aren't already in use: `lsof -i :8023,8024,8025,8026`
1403
+ - Verify Python version is 3.11+: `python --version`
1404
+
1405
+ ### Tests failing
1406
+ - Install dev dependencies: `make dev-install`
1407
+ - Clear cache: `make clean`
1408
+ - Check Python version compatibility
1409
+
1410
+ ### Coverage too low
1411
+ - Run coverage report: `make test-cov`
1412
+ - View HTML report: `make serve-coverage`
1413
+ - Add tests for uncovered code
1414
+
1415
+ ### Grid alignment issues
1416
+ - All grid headers must align with row pipes
1417
+ - Use the format `" |"` for headers to match row format `"N |"`
1418
+ - Test visually: `make example-telnet-kenken`
1419
+
1420
+ ## Roadmap
1421
+
1422
+ See [ROADMAP.md](ROADMAP.md) for the full development roadmap.
1423
+
1424
+ ### Highlights
1425
+
1426
+ **Benchmarking & Metrics**
1427
+ - Puzzle complexity metrics (constraint count, variable count, branching factor)
1428
+ - Episode model for tracking game sessions
1429
+ - Trace logging for offline analysis
1430
+
1431
+ **Agent Evaluation Tools**
1432
+ - Batch evaluation harness CLI
1433
+ - Solver vs Model comparison mode
1434
+ - JSON protocol for structured agent communication
1435
+
1436
+ **Learning & Curriculum**
1437
+ - Constraint concept progression graph
1438
+ - Tagged puzzle sets for educators
1439
+ - Difficulty scaling based on constraint complexity
1440
+
1441
+ **Ecosystem Integrations**
1442
+ - MCP native mode for agent frameworks
1443
+ - Python client library
1444
+ - REST/WebSocket API documentation
1445
+
1446
+ **UX & Community**
1447
+ - Interactive web viewer with replay mode
1448
+ - Public benchmark packs (versioned, citable)
1449
+ - Community leaderboards
1450
+
1451
+ ## License
1452
+
1453
+ MIT License - see the main chuk-protocol-server project for details.
1454
+
1455
+ ## Credits
1456
+
1457
+ - Built using the [chuk-protocol-server](https://github.com/chrishayuk/chuk-protocol-server) framework
1458
+ - Puzzle generation algorithms based on backtracking and constraint propagation
1459
+ - Uses modern Python tooling: UV, Ruff, MyPy, Pytest
1460
+
1461
+ ## Links
1462
+
1463
+ - [chuk-protocol-server](https://github.com/chrishayuk/chuk-protocol-server) - Multi-transport server framework
1464
+ - [sudoku-telnet-server](https://github.com/chrishayuk/sudoku-telnet-server) - Original single-game implementation
1465
+ - [UV](https://github.com/astral-sh/uv) - Fast Python package manager
1466
+ - [Ruff](https://github.com/astral-sh/ruff) - Fast Python linter and formatter
1467
+ - [Fly.io](https://fly.io) - Cloud deployment platform
1468
+
1469
+ ---
1470
+
1471
+ **Ready to test your solver?** Connect now and start solving! 🎮