chuk-puzzles-gym 0.10.1__tar.gz → 0.10.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. {chuk_puzzles_gym-0.10.1/src/chuk_puzzles_gym.egg-info → chuk_puzzles_gym-0.10.2}/PKG-INFO +124 -7
  2. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/README.md +123 -6
  3. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/pyproject.toml +1 -1
  4. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/eval.py +168 -46
  5. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/export/dataset.py +7 -1
  6. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/_base/game.py +123 -0
  7. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/gym_env.py +21 -5
  8. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/models/__init__.py +2 -0
  9. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/models/evaluation.py +165 -1
  10. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/server.py +34 -3
  11. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2/src/chuk_puzzles_gym.egg-info}/PKG-INFO +124 -7
  12. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/MANIFEST.in +0 -0
  13. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/config.yaml +0 -0
  14. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/setup.cfg +0 -0
  15. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/__init__.py +0 -0
  16. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/constants.py +0 -0
  17. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/export/__init__.py +0 -0
  18. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/__init__.py +0 -0
  19. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/_base/__init__.py +0 -0
  20. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/_base/commands.py +0 -0
  21. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/binary/__init__.py +0 -0
  22. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/binary/config.py +0 -0
  23. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/binary/game.py +0 -0
  24. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/bridges/__init__.py +0 -0
  25. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/bridges/config.py +0 -0
  26. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/bridges/game.py +0 -0
  27. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/cryptarithmetic/__init__.py +0 -0
  28. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/cryptarithmetic/commands.py +0 -0
  29. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/cryptarithmetic/config.py +0 -0
  30. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/cryptarithmetic/game.py +0 -0
  31. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/einstein/__init__.py +0 -0
  32. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/einstein/config.py +0 -0
  33. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/einstein/constants.py +0 -0
  34. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/einstein/game.py +0 -0
  35. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/einstein/models.py +0 -0
  36. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/fillomino/__init__.py +0 -0
  37. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/fillomino/config.py +0 -0
  38. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/fillomino/game.py +0 -0
  39. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/futoshiki/__init__.py +0 -0
  40. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/futoshiki/config.py +0 -0
  41. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/futoshiki/game.py +0 -0
  42. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/graph_coloring/__init__.py +0 -0
  43. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/graph_coloring/commands.py +0 -0
  44. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/graph_coloring/config.py +0 -0
  45. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/graph_coloring/game.py +0 -0
  46. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/hidato/__init__.py +0 -0
  47. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/hidato/config.py +0 -0
  48. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/hidato/game.py +0 -0
  49. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/hitori/__init__.py +0 -0
  50. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/hitori/config.py +0 -0
  51. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/hitori/game.py +0 -0
  52. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/kakuro/__init__.py +0 -0
  53. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/kakuro/config.py +0 -0
  54. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/kakuro/game.py +0 -0
  55. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/kenken/__init__.py +0 -0
  56. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/kenken/config.py +0 -0
  57. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/kenken/enums.py +0 -0
  58. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/kenken/game.py +0 -0
  59. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/kenken/models.py +0 -0
  60. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/killer_sudoku/__init__.py +0 -0
  61. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/killer_sudoku/config.py +0 -0
  62. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/killer_sudoku/game.py +0 -0
  63. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/killer_sudoku/models.py +0 -0
  64. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/knapsack/__init__.py +0 -0
  65. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/knapsack/config.py +0 -0
  66. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/knapsack/enums.py +0 -0
  67. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/knapsack/game.py +0 -0
  68. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/knapsack/models.py +0 -0
  69. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/lights_out/__init__.py +0 -0
  70. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/lights_out/config.py +0 -0
  71. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/lights_out/game.py +0 -0
  72. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/logic_grid/__init__.py +0 -0
  73. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/logic_grid/config.py +0 -0
  74. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/logic_grid/constants.py +0 -0
  75. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/logic_grid/game.py +0 -0
  76. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/logic_grid/models.py +0 -0
  77. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/mastermind/__init__.py +0 -0
  78. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/mastermind/config.py +0 -0
  79. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/mastermind/game.py +0 -0
  80. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/minesweeper/__init__.py +0 -0
  81. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/minesweeper/config.py +0 -0
  82. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/minesweeper/enums.py +0 -0
  83. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/minesweeper/game.py +0 -0
  84. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nonogram/__init__.py +0 -0
  85. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nonogram/config.py +0 -0
  86. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nonogram/game.py +0 -0
  87. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nqueens/__init__.py +0 -0
  88. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nqueens/config.py +0 -0
  89. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nqueens/game.py +0 -0
  90. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/numberlink/__init__.py +0 -0
  91. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/numberlink/config.py +0 -0
  92. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/numberlink/game.py +0 -0
  93. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nurikabe/__init__.py +0 -0
  94. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nurikabe/config.py +0 -0
  95. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nurikabe/enums.py +0 -0
  96. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/nurikabe/game.py +0 -0
  97. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/rush_hour/__init__.py +0 -0
  98. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/rush_hour/commands.py +0 -0
  99. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/rush_hour/config.py +0 -0
  100. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/rush_hour/game.py +0 -0
  101. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/rush_hour/models.py +0 -0
  102. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/scheduler/__init__.py +0 -0
  103. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/scheduler/config.py +0 -0
  104. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/scheduler/constants.py +0 -0
  105. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/scheduler/enums.py +0 -0
  106. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/scheduler/game.py +0 -0
  107. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/scheduler/models.py +0 -0
  108. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/shikaku/__init__.py +0 -0
  109. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/shikaku/config.py +0 -0
  110. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/shikaku/game.py +0 -0
  111. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/skyscrapers/__init__.py +0 -0
  112. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/skyscrapers/config.py +0 -0
  113. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/skyscrapers/game.py +0 -0
  114. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/slitherlink/__init__.py +0 -0
  115. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/slitherlink/config.py +0 -0
  116. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/slitherlink/game.py +0 -0
  117. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/sokoban/__init__.py +0 -0
  118. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/sokoban/config.py +0 -0
  119. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/sokoban/game.py +0 -0
  120. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/star_battle/__init__.py +0 -0
  121. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/star_battle/config.py +0 -0
  122. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/star_battle/game.py +0 -0
  123. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/sudoku/__init__.py +0 -0
  124. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/sudoku/commands.py +0 -0
  125. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/sudoku/config.py +0 -0
  126. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/sudoku/game.py +0 -0
  127. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/tents/__init__.py +0 -0
  128. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/tents/config.py +0 -0
  129. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/games/tents/game.py +0 -0
  130. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/models/base.py +0 -0
  131. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/models/config.py +0 -0
  132. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/models/enums.py +0 -0
  133. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/models/games.py +0 -0
  134. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/trace/__init__.py +0 -0
  135. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/trace/generator.py +0 -0
  136. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym/utils/__init__.py +0 -0
  137. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym.egg-info/SOURCES.txt +0 -0
  138. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym.egg-info/dependency_links.txt +0 -0
  139. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym.egg-info/entry_points.txt +0 -0
  140. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym.egg-info/requires.txt +0 -0
  141. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/src/chuk_puzzles_gym.egg-info/top_level.txt +0 -0
  142. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_base_models.py +0 -0
  143. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_binary_game.py +0 -0
  144. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_bridges.py +0 -0
  145. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_command_handlers.py +0 -0
  146. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_cryptarithmetic_game.py +0 -0
  147. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_deterministic_seeding.py +0 -0
  148. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_einstein.py +0 -0
  149. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_eval.py +0 -0
  150. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_fillomino.py +0 -0
  151. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_futoshiki_game.py +0 -0
  152. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_game_configs.py +0 -0
  153. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_graph_coloring_game.py +0 -0
  154. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_gym_env.py +0 -0
  155. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_hidato.py +0 -0
  156. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_hitori.py +0 -0
  157. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_kakuro_game.py +0 -0
  158. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_kenken_game.py +0 -0
  159. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_killer_sudoku.py +0 -0
  160. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_knapsack.py +0 -0
  161. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_lights_out.py +0 -0
  162. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_logic_grid_game.py +0 -0
  163. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_mastermind.py +0 -0
  164. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_minesweeper.py +0 -0
  165. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_nonogram_game.py +0 -0
  166. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_nqueens_game.py +0 -0
  167. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_numberlink_game.py +0 -0
  168. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_nurikabe.py +0 -0
  169. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_puzzle_game.py +0 -0
  170. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_rush_hour_game.py +0 -0
  171. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_scheduler.py +0 -0
  172. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_shikaku.py +0 -0
  173. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_skyscrapers_game.py +0 -0
  174. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_slitherlink.py +0 -0
  175. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_sokoban.py +0 -0
  176. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_star_battle.py +0 -0
  177. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_sudoku_game.py +0 -0
  178. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_tents.py +0 -0
  179. {chuk_puzzles_gym-0.10.1 → chuk_puzzles_gym-0.10.2}/tests/test_trace_generator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chuk-puzzles-gym
3
- Version: 0.10.1
3
+ Version: 0.10.2
4
4
  Summary: Multi-game puzzle gym for LLM training and benchmarking - 30 constraint puzzles with synthetic data generation
5
5
  Author: Chris Hay
6
6
  License: MIT
@@ -93,10 +93,17 @@ Once connected, type `help` to see available games, or `sudoku easy` to start pl
93
93
  - Enable with `mode agent` command
94
94
  - Machine-parseable grid format with clear start/end markers
95
95
  - Compact output optimized for LLM tool integration
96
+ - **Reasoning Depth Metrics** - Measure *how* agents reason, not just if they succeed
97
+ - Backtrack detection (did the agent revise previous placements?)
98
+ - Progress steadiness (monotonic advance toward solution?)
99
+ - Error streak analysis (isolated mistakes vs. clustered confusion?)
100
+ - Reasoning overhead (wasted work relative to optimal path)
101
+ - Solver distance traces (remaining work after each valid move)
102
+ - Available in all paths: Gym env, eval harness, and server (telnet/WebSocket)
96
103
  - **Evaluation Harness** (`chuk-puzzles-eval`) - Built-in benchmarking CLI
97
104
  - Batch evaluation with configurable episodes
98
105
  - Multiple output formats (JSON, CSV, Markdown)
99
- - Metrics: moves, invalid moves, hints, solve time
106
+ - Metrics: moves, invalid moves, hints, solve time, reasoning depth
100
107
  - Reproducible with deterministic seeds
101
108
  - **Dataset Export** (`chuk-puzzles-export`) - Synthetic data generation for LLM training
102
109
  - JSONL output with complete problem definitions and solutions
@@ -500,6 +507,7 @@ games = PuzzleEnv.available_games()
500
507
 
501
508
  - **All 30 games** accessible through unified API
502
509
  - **Configurable rewards** for correct moves, invalid attempts, completion bonuses
510
+ - **Reasoning depth metrics** tracking backtracks, progress steadiness, error patterns
503
511
  - **Hint system** with optional budget limits
504
512
  - **Solver-free mode** for pure reasoning benchmarks
505
513
  - **Efficiency scoring** based on optimal step counts
@@ -515,8 +523,25 @@ obs = {
515
523
  "moves": 5,
516
524
  "invalid_moves": 1,
517
525
  "hints_used": 2,
526
+ "hints_remaining": 98,
518
527
  "is_complete": False,
519
- "grid": [[4, 0, 8, ...], ...] # Game-specific state
528
+ "grid": [[4, 0, 8, ...], ...], # Game-specific state
529
+ "render": " | 1 2 3 | ...", # ASCII grid
530
+ }
531
+
532
+ # Info dict includes reasoning metrics and difficulty profile
533
+ info = {
534
+ "optimal_steps": 45,
535
+ "difficulty_profile": {"logic_depth": 2, "branching_factor": 2.0, ...},
536
+ "reasoning_metrics": {
537
+ "backtrack_count": 0,
538
+ "backtrack_rate": 0.0,
539
+ "progress_velocity": 1.0,
540
+ "progress_steadiness": 1.0,
541
+ "reasoning_overhead": 1.0,
542
+ "error_streak_max": 0,
543
+ "solver_distance_trace": [44, 43, 42, ...],
544
+ },
520
545
  }
521
546
  ```
522
547
 
@@ -546,6 +571,89 @@ config = SolverConfig(hint_budget=5, hint_penalty=0.1)
546
571
  env = PuzzleEnv("sudoku", solver_config=config)
547
572
  ```
548
573
 
574
+ ## Reasoning Depth Metrics
575
+
576
+ Beyond binary success/failure, the system measures **how** an agent reasons through puzzles. These metrics are available in all interaction paths: the Gym environment, the evaluation harness, and the telnet/WebSocket server.
577
+
578
+ ### Metrics
579
+
580
+ | Metric | Description | Perfect Score |
581
+ |--------|-------------|---------------|
582
+ | `backtrack_count` | Times the agent revised a previous placement | 0 |
583
+ | `backtrack_rate` | Fraction of valid moves that were backtracks | 0% |
584
+ | `progress_velocity` | Average cells solved per step | 1.0 |
585
+ | `progress_steadiness` | How monotonically remaining work decreases (1.0 = never stalls) | 100% |
586
+ | `reasoning_overhead` | Total actions / optimal path length (1.0 = no waste) | 1.0x |
587
+ | `error_streak_max` | Longest run of consecutive invalid moves | 0 |
588
+ | `avg_error_streak` | Average length of error bursts | 0.0 |
589
+ | `solver_distance_trace` | Remaining positions after each valid move | Monotonically decreasing |
590
+
591
+ ### Usage in Gym Environment
592
+
593
+ ```python
594
+ from chuk_puzzles_gym.gym_env import PuzzleEnv
595
+
596
+ env = PuzzleEnv("sudoku", difficulty="easy", seed=42)
597
+ obs, info = await env.reset()
598
+
599
+ # Reasoning metrics available in info after reset
600
+ print(info["reasoning_metrics"])
601
+
602
+ # ... agent plays ...
603
+ obs, reward, terminated, truncated, info = await env.step("place 1 1 5")
604
+
605
+ # On episode end, info includes full reasoning metrics
606
+ if terminated:
607
+ metrics = info["reasoning_metrics"]
608
+ print(f"Backtrack rate: {metrics['backtrack_rate']:.0%}")
609
+ print(f"Overhead: {metrics['reasoning_overhead']:.1f}x")
610
+ print(f"Steadiness: {metrics['progress_steadiness']:.0%}")
611
+ ```
612
+
613
+ ### Usage in Server (Telnet/WebSocket)
614
+
615
+ Reasoning metrics are included automatically in server output:
616
+
617
+ - **JSON mode**: `reasoning_metrics` dict in every state response and completion message
618
+ - **STRICT mode**: `BT=`, `OH=`, `ST=` fields appended to STATS and COMPLETE messages
619
+ - **Normal mode**: "Reasoning Depth" section shown on completion and in `stats` command
620
+
621
+ ```
622
+ > mode json
623
+ > place 1 1 5
624
+ {"type":"result","success":true,...,"state":{...,"reasoning_metrics":{"backtrack_count":0,...}}}
625
+
626
+ > stats
627
+ {"type":"stats",...,"reasoning_metrics":{"backtrack_count":0,"backtrack_rate":0.0,...}}
628
+ ```
629
+
630
+ ### Usage in Evaluation Harness
631
+
632
+ ```bash
633
+ # Reasoning metrics included in all output formats
634
+ chuk-puzzles-eval sudoku -d easy -n 10 -o json
635
+ ```
636
+
637
+ ```python
638
+ from chuk_puzzles_gym.eval import evaluate_game
639
+
640
+ report = await evaluate_game("sudoku", difficulty="easy", episodes=10)
641
+ report.print_summary() # Includes "Reasoning Depth" section
642
+
643
+ # Aggregate metrics
644
+ print(f"Avg backtrack rate: {report.avg_backtrack_rate:.0%}")
645
+ print(f"Avg overhead: {report.avg_reasoning_overhead:.1f}x")
646
+ print(f"Avg steadiness: {report.avg_progress_steadiness:.0%}")
647
+ ```
648
+
649
+ ### What the Metrics Reveal
650
+
651
+ A **perfect solver** shows: 0 backtracks, 1.0x overhead, 100% steadiness, 1.0 velocity.
652
+
653
+ A **struggling agent** shows: high backtrack rate (revising decisions), error streaks (clustered confusion), low steadiness (stalling progress), and high overhead (wasted work).
654
+
655
+ These patterns are visible even when two agents both eventually solve a puzzle — the metrics expose the **quality of the reasoning path**, not just the outcome.
656
+
549
657
  ## Evaluation Harness
550
658
 
551
659
  The project includes a built-in **evaluation harness** for benchmarking puzzle-solving agents:
@@ -604,6 +712,12 @@ Avg Time: 12ms
604
712
  | `hints_used` | Number of hints requested |
605
713
  | `wall_time_ms` | Time to solve in milliseconds |
606
714
  | `seed` | Puzzle seed for reproducibility |
715
+ | `backtrack_count` | Times agent revised a previous placement |
716
+ | `backtrack_rate` | Fraction of valid moves that were backtracks |
717
+ | `progress_steadiness` | How monotonically progress advances (1.0 = perfect) |
718
+ | `reasoning_overhead` | Total actions / optimal path (1.0 = no waste) |
719
+ | `error_streak_max` | Longest run of consecutive invalid moves |
720
+ | `progress_velocity` | Average cells solved per step |
607
721
 
608
722
  ## Dataset Export
609
723
 
@@ -1194,12 +1308,13 @@ chuk-puzzles-gym/
1194
1308
  │ │ ├── base.py # GridPosition, MoveResult
1195
1309
  │ │ ├── config.py # Base GameConfig
1196
1310
  │ │ ├── enums.py # DifficultyLevel, GameCommand, etc.
1311
+ │ │ ├── evaluation.py # ReasoningMetrics, EpisodeResult, EvaluationSummary
1197
1312
  │ │ └── games.py # Game-specific models (Cage, Task, etc.)
1198
1313
  │ └── games/ # Self-contained game modules
1199
1314
  │ ├── __init__.py # AVAILABLE_GAMES registry
1200
1315
  │ ├── _base/ # Base classes
1201
1316
  │ │ ├── __init__.py
1202
- │ │ ├── game.py # PuzzleGame ABC
1317
+ │ │ ├── game.py # PuzzleGame ABC + ReasoningTracker
1203
1318
  │ │ └── commands.py # GameCommandHandler ABC
1204
1319
  │ ├── sudoku/ # Example game module
1205
1320
  │ │ ├── __init__.py # Exports SudokuGame
@@ -1226,6 +1341,7 @@ chuk-puzzles-gym/
1226
1341
  │ ├── example_graph_coloring.py # Graph Coloring game logic demo
1227
1342
  │ ├── example_cryptarithmetic.py# Cryptarithmetic game logic demo
1228
1343
  │ ├── example_rush_hour.py # Rush Hour game logic demo
1344
+ │ ├── example_reasoning_metrics.py # Reasoning depth metrics demo
1229
1345
  │ └── README.md # Example usage guide
1230
1346
  ├── .github/workflows/ # CI/CD workflows
1231
1347
  ├── pyproject.toml # Modern Python project config
@@ -1465,9 +1581,10 @@ See [ROADMAP.md](ROADMAP.md) for the full development roadmap.
1465
1581
  ### Highlights
1466
1582
 
1467
1583
  **Benchmarking & Metrics**
1468
- - Puzzle complexity metrics (constraint count, variable count, branching factor)
1469
- - Episode model for tracking game sessions
1470
- - Trace logging for offline analysis
1584
+ - ~~Puzzle complexity metrics~~ (implemented: constraint count, variable count, branching factor)
1585
+ - ~~Episode model for tracking game sessions~~ (implemented: EpisodeResult with ReasoningMetrics)
1586
+ - ~~Reasoning depth metrics~~ (implemented: backtrack detection, progress steadiness, error patterns)
1587
+ - ~~Trace logging for offline analysis~~ (implemented: solver distance traces in all output paths)
1471
1588
 
1472
1589
  **Agent Evaluation Tools**
1473
1590
  - Batch evaluation harness CLI
@@ -62,10 +62,17 @@ Once connected, type `help` to see available games, or `sudoku easy` to start pl
62
62
  - Enable with `mode agent` command
63
63
  - Machine-parseable grid format with clear start/end markers
64
64
  - Compact output optimized for LLM tool integration
65
+ - **Reasoning Depth Metrics** - Measure *how* agents reason, not just if they succeed
66
+ - Backtrack detection (did the agent revise previous placements?)
67
+ - Progress steadiness (monotonic advance toward solution?)
68
+ - Error streak analysis (isolated mistakes vs. clustered confusion?)
69
+ - Reasoning overhead (wasted work relative to optimal path)
70
+ - Solver distance traces (remaining work after each valid move)
71
+ - Available in all paths: Gym env, eval harness, and server (telnet/WebSocket)
65
72
  - **Evaluation Harness** (`chuk-puzzles-eval`) - Built-in benchmarking CLI
66
73
  - Batch evaluation with configurable episodes
67
74
  - Multiple output formats (JSON, CSV, Markdown)
68
- - Metrics: moves, invalid moves, hints, solve time
75
+ - Metrics: moves, invalid moves, hints, solve time, reasoning depth
69
76
  - Reproducible with deterministic seeds
70
77
  - **Dataset Export** (`chuk-puzzles-export`) - Synthetic data generation for LLM training
71
78
  - JSONL output with complete problem definitions and solutions
@@ -469,6 +476,7 @@ games = PuzzleEnv.available_games()
469
476
 
470
477
  - **All 30 games** accessible through unified API
471
478
  - **Configurable rewards** for correct moves, invalid attempts, completion bonuses
479
+ - **Reasoning depth metrics** tracking backtracks, progress steadiness, error patterns
472
480
  - **Hint system** with optional budget limits
473
481
  - **Solver-free mode** for pure reasoning benchmarks
474
482
  - **Efficiency scoring** based on optimal step counts
@@ -484,8 +492,25 @@ obs = {
484
492
  "moves": 5,
485
493
  "invalid_moves": 1,
486
494
  "hints_used": 2,
495
+ "hints_remaining": 98,
487
496
  "is_complete": False,
488
- "grid": [[4, 0, 8, ...], ...] # Game-specific state
497
+ "grid": [[4, 0, 8, ...], ...], # Game-specific state
498
+ "render": " | 1 2 3 | ...", # ASCII grid
499
+ }
500
+
501
+ # Info dict includes reasoning metrics and difficulty profile
502
+ info = {
503
+ "optimal_steps": 45,
504
+ "difficulty_profile": {"logic_depth": 2, "branching_factor": 2.0, ...},
505
+ "reasoning_metrics": {
506
+ "backtrack_count": 0,
507
+ "backtrack_rate": 0.0,
508
+ "progress_velocity": 1.0,
509
+ "progress_steadiness": 1.0,
510
+ "reasoning_overhead": 1.0,
511
+ "error_streak_max": 0,
512
+ "solver_distance_trace": [44, 43, 42, ...],
513
+ },
489
514
  }
490
515
  ```
491
516
 
@@ -515,6 +540,89 @@ config = SolverConfig(hint_budget=5, hint_penalty=0.1)
515
540
  env = PuzzleEnv("sudoku", solver_config=config)
516
541
  ```
517
542
 
543
+ ## Reasoning Depth Metrics
544
+
545
+ Beyond binary success/failure, the system measures **how** an agent reasons through puzzles. These metrics are available in all interaction paths: the Gym environment, the evaluation harness, and the telnet/WebSocket server.
546
+
547
+ ### Metrics
548
+
549
+ | Metric | Description | Perfect Score |
550
+ |--------|-------------|---------------|
551
+ | `backtrack_count` | Times the agent revised a previous placement | 0 |
552
+ | `backtrack_rate` | Fraction of valid moves that were backtracks | 0% |
553
+ | `progress_velocity` | Average cells solved per step | 1.0 |
554
+ | `progress_steadiness` | How monotonically remaining work decreases (1.0 = never stalls) | 100% |
555
+ | `reasoning_overhead` | Total actions / optimal path length (1.0 = no waste) | 1.0x |
556
+ | `error_streak_max` | Longest run of consecutive invalid moves | 0 |
557
+ | `avg_error_streak` | Average length of error bursts | 0.0 |
558
+ | `solver_distance_trace` | Remaining positions after each valid move | Monotonically decreasing |
559
+
560
+ ### Usage in Gym Environment
561
+
562
+ ```python
563
+ from chuk_puzzles_gym.gym_env import PuzzleEnv
564
+
565
+ env = PuzzleEnv("sudoku", difficulty="easy", seed=42)
566
+ obs, info = await env.reset()
567
+
568
+ # Reasoning metrics available in info after reset
569
+ print(info["reasoning_metrics"])
570
+
571
+ # ... agent plays ...
572
+ obs, reward, terminated, truncated, info = await env.step("place 1 1 5")
573
+
574
+ # On episode end, info includes full reasoning metrics
575
+ if terminated:
576
+ metrics = info["reasoning_metrics"]
577
+ print(f"Backtrack rate: {metrics['backtrack_rate']:.0%}")
578
+ print(f"Overhead: {metrics['reasoning_overhead']:.1f}x")
579
+ print(f"Steadiness: {metrics['progress_steadiness']:.0%}")
580
+ ```
581
+
582
+ ### Usage in Server (Telnet/WebSocket)
583
+
584
+ Reasoning metrics are included automatically in server output:
585
+
586
+ - **JSON mode**: `reasoning_metrics` dict in every state response and completion message
587
+ - **STRICT mode**: `BT=`, `OH=`, `ST=` fields appended to STATS and COMPLETE messages
588
+ - **Normal mode**: "Reasoning Depth" section shown on completion and in `stats` command
589
+
590
+ ```
591
+ > mode json
592
+ > place 1 1 5
593
+ {"type":"result","success":true,...,"state":{...,"reasoning_metrics":{"backtrack_count":0,...}}}
594
+
595
+ > stats
596
+ {"type":"stats",...,"reasoning_metrics":{"backtrack_count":0,"backtrack_rate":0.0,...}}
597
+ ```
598
+
599
+ ### Usage in Evaluation Harness
600
+
601
+ ```bash
602
+ # Reasoning metrics included in all output formats
603
+ chuk-puzzles-eval sudoku -d easy -n 10 -o json
604
+ ```
605
+
606
+ ```python
607
+ from chuk_puzzles_gym.eval import evaluate_game
608
+
609
+ report = await evaluate_game("sudoku", difficulty="easy", episodes=10)
610
+ report.print_summary() # Includes "Reasoning Depth" section
611
+
612
+ # Aggregate metrics
613
+ print(f"Avg backtrack rate: {report.avg_backtrack_rate:.0%}")
614
+ print(f"Avg overhead: {report.avg_reasoning_overhead:.1f}x")
615
+ print(f"Avg steadiness: {report.avg_progress_steadiness:.0%}")
616
+ ```
617
+
618
+ ### What the Metrics Reveal
619
+
620
+ A **perfect solver** shows: 0 backtracks, 1.0x overhead, 100% steadiness, 1.0 velocity.
621
+
622
+ A **struggling agent** shows: high backtrack rate (revising decisions), error streaks (clustered confusion), low steadiness (stalling progress), and high overhead (wasted work).
623
+
624
+ These patterns are visible even when two agents both eventually solve a puzzle — the metrics expose the **quality of the reasoning path**, not just the outcome.
625
+
518
626
  ## Evaluation Harness
519
627
 
520
628
  The project includes a built-in **evaluation harness** for benchmarking puzzle-solving agents:
@@ -573,6 +681,12 @@ Avg Time: 12ms
573
681
  | `hints_used` | Number of hints requested |
574
682
  | `wall_time_ms` | Time to solve in milliseconds |
575
683
  | `seed` | Puzzle seed for reproducibility |
684
+ | `backtrack_count` | Times agent revised a previous placement |
685
+ | `backtrack_rate` | Fraction of valid moves that were backtracks |
686
+ | `progress_steadiness` | How monotonically progress advances (1.0 = perfect) |
687
+ | `reasoning_overhead` | Total actions / optimal path (1.0 = no waste) |
688
+ | `error_streak_max` | Longest run of consecutive invalid moves |
689
+ | `progress_velocity` | Average cells solved per step |
576
690
 
577
691
  ## Dataset Export
578
692
 
@@ -1163,12 +1277,13 @@ chuk-puzzles-gym/
1163
1277
  │ │ ├── base.py # GridPosition, MoveResult
1164
1278
  │ │ ├── config.py # Base GameConfig
1165
1279
  │ │ ├── enums.py # DifficultyLevel, GameCommand, etc.
1280
+ │ │ ├── evaluation.py # ReasoningMetrics, EpisodeResult, EvaluationSummary
1166
1281
  │ │ └── games.py # Game-specific models (Cage, Task, etc.)
1167
1282
  │ └── games/ # Self-contained game modules
1168
1283
  │ ├── __init__.py # AVAILABLE_GAMES registry
1169
1284
  │ ├── _base/ # Base classes
1170
1285
  │ │ ├── __init__.py
1171
- │ │ ├── game.py # PuzzleGame ABC
1286
+ │ │ ├── game.py # PuzzleGame ABC + ReasoningTracker
1172
1287
  │ │ └── commands.py # GameCommandHandler ABC
1173
1288
  │ ├── sudoku/ # Example game module
1174
1289
  │ │ ├── __init__.py # Exports SudokuGame
@@ -1195,6 +1310,7 @@ chuk-puzzles-gym/
1195
1310
  │ ├── example_graph_coloring.py # Graph Coloring game logic demo
1196
1311
  │ ├── example_cryptarithmetic.py# Cryptarithmetic game logic demo
1197
1312
  │ ├── example_rush_hour.py # Rush Hour game logic demo
1313
+ │ ├── example_reasoning_metrics.py # Reasoning depth metrics demo
1198
1314
  │ └── README.md # Example usage guide
1199
1315
  ├── .github/workflows/ # CI/CD workflows
1200
1316
  ├── pyproject.toml # Modern Python project config
@@ -1434,9 +1550,10 @@ See [ROADMAP.md](ROADMAP.md) for the full development roadmap.
1434
1550
  ### Highlights
1435
1551
 
1436
1552
  **Benchmarking & Metrics**
1437
- - Puzzle complexity metrics (constraint count, variable count, branching factor)
1438
- - Episode model for tracking game sessions
1439
- - Trace logging for offline analysis
1553
+ - ~~Puzzle complexity metrics~~ (implemented: constraint count, variable count, branching factor)
1554
+ - ~~Episode model for tracking game sessions~~ (implemented: EpisodeResult with ReasoningMetrics)
1555
+ - ~~Reasoning depth metrics~~ (implemented: backtrack detection, progress steadiness, error patterns)
1556
+ - ~~Trace logging for offline analysis~~ (implemented: solver distance traces in all output paths)
1440
1557
 
1441
1558
  **Agent Evaluation Tools**
1442
1559
  - Batch evaluation harness CLI
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chuk-puzzles-gym"
7
- version = "0.10.1"
7
+ version = "0.10.2"
8
8
  description = "Multi-game puzzle gym for LLM training and benchmarking - 30 constraint puzzles with synthetic data generation"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"