PyPI - chuk-puzzles-gym - Versions diffs - 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl - Mend

chuk-puzzles-gym 0.10.1py3-none-any.whl → 0.10.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

chuk_puzzles_gym/eval.py CHANGED Viewed

@@ -25,7 +25,7 @@ import sys
 import time
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     pass
@@ -100,8 +100,36 @@ class EvaluationReport:
             return 0.0
         return sum(e.hints_used for e in self.episodes) / self.total_episodes
+    @property
+    def avg_backtrack_rate(self) -> float:
+        """Average backtrack rate across episodes with reasoning metrics."""
+        with_metrics = [e for e in self.episodes if e.reasoning_metrics is not None]
+        if not with_metrics:
+            return 0.0
+        return sum(e.reasoning_metrics.backtrack_rate for e in with_metrics) / len(with_metrics)  # type: ignore[union-attr]
+    @property
+    def avg_reasoning_overhead(self) -> float:
+        """Average reasoning overhead across episodes with reasoning metrics."""
+        with_metrics = [
+            e for e in self.episodes if e.reasoning_metrics is not None and e.reasoning_metrics.reasoning_overhead > 0
+        ]
+        if not with_metrics:
+            return 0.0
+        return sum(e.reasoning_metrics.reasoning_overhead for e in with_metrics) / len(with_metrics)  # type: ignore[union-attr]
+    @property
+    def avg_progress_steadiness(self) -> float:
+        """Average progress steadiness across episodes with reasoning metrics."""
+        with_metrics = [e for e in self.episodes if e.reasoning_metrics is not None]
+        if not with_metrics:
+            return 0.0
+        return sum(e.reasoning_metrics.progress_steadiness for e in with_metrics) / len(with_metrics)  # type: ignore[union-attr]
     def to_markdown(self) -> str:
         """Generate markdown report."""
+        has_reasoning = any(e.reasoning_metrics is not None for e in self.episodes)
         lines = [
             f"# {self.game.title()} {self.difficulty.title()} Evaluation",
             "",
@@ -112,24 +140,78 @@ class EvaluationReport:
             f"**Avg Hints:** {self.avg_hints:.1f}",
             f"**Avg Efficiency:** {self.avg_efficiency:.1%}",
             f"**Avg Time:** {self.avg_time_ms:.0f}ms",
-            "",
-            f"**Solver Config:** {'solver-free' if not self.solver_config.solver_allowed else f'budget={self.solver_config.hint_budget}, penalty={self.solver_config.hint_penalty}'}",
-            "",
-            "## Episode Details",
-            "",
-            "| Seed | Status | Steps | Invalid | Hints | Efficiency | Time (ms) |",
-            "|------|--------|-------|---------|-------|------------|-----------|",
         ]
-        for e in self.episodes:
-            status = "solved" if e.success else e.status.value
-            eff = f"{e.efficiency_score:.0%}" if e.success else "-"
+        if has_reasoning:
+            lines.extend(
+                [
+                    "",
+                    "### Reasoning Depth",
+                    f"**Avg Backtrack Rate:** {self.avg_backtrack_rate:.1%}",
+                    f"**Avg Reasoning Overhead:** {self.avg_reasoning_overhead:.2f}x",
+                    f"**Avg Progress Steadiness:** {self.avg_progress_steadiness:.1%}",
+                ]
+            )
+        lines.extend(
+            [
+                "",
+                f"**Solver Config:** {'solver-free' if not self.solver_config.solver_allowed else f'budget={self.solver_config.hint_budget}, penalty={self.solver_config.hint_penalty}'}",
+                "",
+                "## Episode Details",
+                "",
+            ]
+        )
+        if has_reasoning:
+            lines.append(
+                "| Seed | Status | Steps | Invalid | Hints | Efficiency | Backtracks | Steadiness | Time (ms) |"
+            )
             lines.append(
-                f"| {e.seed} | {status} | {e.steps_taken} | {e.invalid_actions} | {e.hints_used} | {eff} | {e.wall_time_ms} |"
+                "|------|--------|-------|---------|-------|------------|------------|------------|-----------|"
             )
+            for e in self.episodes:
+                status = "solved" if e.success else e.status.value
+                eff = f"{e.efficiency_score:.0%}" if e.success else "-"
+                bt = str(e.reasoning_metrics.backtrack_count) if e.reasoning_metrics else "-"
+                st = f"{e.reasoning_metrics.progress_steadiness:.0%}" if e.reasoning_metrics else "-"
+                lines.append(
+                    f"| {e.seed} | {status} | {e.steps_taken} | {e.invalid_actions} | {e.hints_used} | {eff} | {bt} | {st} | {e.wall_time_ms} |"
+                )
+        else:
+            lines.append("| Seed | Status | Steps | Invalid | Hints | Efficiency | Time (ms) |")
+            lines.append("|------|--------|-------|---------|-------|------------|-----------|")
+            for e in self.episodes:
+                status = "solved" if e.success else e.status.value
+                eff = f"{e.efficiency_score:.0%}" if e.success else "-"
+                lines.append(
+                    f"| {e.seed} | {status} | {e.steps_taken} | {e.invalid_actions} | {e.hints_used} | {eff} | {e.wall_time_ms} |"
+                )
         return "\n".join(lines)
     def to_json(self) -> str:
         """Generate JSON report."""
+        summary: dict[str, Any] = {
+            "total_episodes": self.total_episodes,
+            "solved_count": self.solved_count,
+            "solve_rate": self.solve_rate,
+            "avg_steps": self.avg_moves,
+            "avg_invalid": self.avg_invalid_moves,
+            "avg_hints": self.avg_hints,
+            "avg_efficiency": self.avg_efficiency,
+            "avg_time_ms": self.avg_time_ms,
+        }
+        # Add aggregate reasoning metrics if available
+        has_reasoning = any(e.reasoning_metrics is not None for e in self.episodes)
+        if has_reasoning:
+            summary["reasoning"] = {
+                "avg_backtrack_rate": round(self.avg_backtrack_rate, 3),
+                "avg_reasoning_overhead": round(self.avg_reasoning_overhead, 3),
+                "avg_progress_steadiness": round(self.avg_progress_steadiness, 3),
+            }
         return json.dumps(
             {
                 "game": self.game,
@@ -139,16 +221,7 @@ class EvaluationReport:
                     "hint_budget": self.solver_config.hint_budget,
                     "hint_penalty": self.solver_config.hint_penalty,
                 },
-                "summary": {
-                    "total_episodes": self.total_episodes,
-                    "solved_count": self.solved_count,
-                    "solve_rate": self.solve_rate,
-                    "avg_steps": self.avg_moves,
-                    "avg_invalid": self.avg_invalid_moves,
-                    "avg_hints": self.avg_hints,
-                    "avg_efficiency": self.avg_efficiency,
-                    "avg_time_ms": self.avg_time_ms,
-                },
+                "summary": summary,
                 "episodes": [e.to_summary_dict() for e in self.episodes],
             },
             indent=2,
@@ -158,35 +231,61 @@ class EvaluationReport:
         """Generate CSV report."""
         import io
+        has_reasoning = any(e.reasoning_metrics is not None for e in self.episodes)
         output = io.StringIO()
         writer = csv.writer(output)
-        writer.writerow(
-            [
-                "game",
-                "difficulty",
-                "seed",
-                "status",
-                "steps_taken",
-                "invalid_actions",
-                "hints_used",
-                "efficiency",
-                "wall_time_ms",
-            ]
-        )
-        for e in self.episodes:
-            writer.writerow(
+        header = [
+            "game",
+            "difficulty",
+            "seed",
+            "status",
+            "steps_taken",
+            "invalid_actions",
+            "hints_used",
+            "efficiency",
+            "wall_time_ms",
+        ]
+        if has_reasoning:
+            header.extend(
                 [
-                    e.game,
-                    e.difficulty.value,
-                    e.seed,
-                    e.status.value,
-                    e.steps_taken,
-                    e.invalid_actions,
-                    e.hints_used,
-                    f"{e.efficiency_score:.3f}",
-                    e.wall_time_ms,
+                    "backtrack_count",
+                    "backtrack_rate",
+                    "reasoning_overhead",
+                    "progress_steadiness",
+                    "error_streak_max",
                 ]
             )
+        writer.writerow(header)
+        for e in self.episodes:
+            row = [
+                e.game,
+                e.difficulty.value,
+                e.seed,
+                e.status.value,
+                e.steps_taken,
+                e.invalid_actions,
+                e.hints_used,
+                f"{e.efficiency_score:.3f}",
+                e.wall_time_ms,
+            ]
+            if has_reasoning:
+                rm = e.reasoning_metrics
+                if rm is not None:
+                    row.extend(
+                        [
+                            rm.backtrack_count,
+                            f"{rm.backtrack_rate:.3f}",
+                            f"{rm.reasoning_overhead:.3f}",
+                            f"{rm.progress_steadiness:.3f}",
+                            rm.error_streak_max,
+                        ]
+                    )
+                else:
+                    row.extend(["", "", "", "", ""])
+            writer.writerow(row)
         return output.getvalue()
     def print_summary(self) -> None:
@@ -206,6 +305,15 @@ class EvaluationReport:
         print(f"Avg Efficiency: {self.avg_efficiency:.1%}")
         print(f"Avg Time:   {self.avg_time_ms:.0f}ms")
+        # Reasoning depth metrics
+        has_reasoning = any(e.reasoning_metrics is not None for e in self.episodes)
+        if has_reasoning:
+            print("-" * 40)
+            print("Reasoning Depth:")
+            print(f"  Backtrack Rate:      {self.avg_backtrack_rate:.1%}")
+            print(f"  Reasoning Overhead:  {self.avg_reasoning_overhead:.2f}x")
+            print(f"  Progress Steadiness: {self.avg_progress_steadiness:.1%}")
 async def _apply_hint(game: PuzzleGame, hint_data: tuple) -> MoveResult:
     """Apply a hint to the game based on game type.
@@ -433,15 +541,22 @@ async def run_episode(
             # Apply the hint based on game type
             try:
                 result = await _apply_hint(game, hint_data)
+                # Normalize hint_data to a tuple for position tracking
+                position = hint_data if isinstance(hint_data, tuple) else (hint_data,)
                 if result.success:
                     steps_taken += 1
+                    # Use game's dynamic optimal_steps (reflects current state)
+                    remaining = game.optimal_steps or 0
+                    game.reasoning_tracker.record_valid_move(position, remaining)
                 else:
                     invalid_actions += 1
+                    game.reasoning_tracker.record_invalid_move()
                     # If we get too many consecutive invalid moves, break
                     if invalid_actions > 50:
                         break
             except (TypeError, ValueError, AttributeError, IndexError):
                 invalid_actions += 1
+                game.reasoning_tracker.record_invalid_move()
                 if invalid_actions > 50:
                     break
         elif not use_hints:
@@ -461,6 +576,12 @@ async def run_episode(
     # Get retries from game if tracked
     retries = getattr(game, "retries", 0)
+    # Collect reasoning depth metrics (use pre-solve optimal_steps since
+    # the game's optimal_steps may be 0 after solving)
+    reasoning_metrics = game.reasoning_tracker.to_metrics(
+        optimal_path_length=optimal_steps if optimal_steps and optimal_steps >= 1 else None,
+    )
     return EpisodeResult(
         game=game.name,
         difficulty=DifficultyLevel(difficulty),
@@ -475,6 +596,7 @@ async def run_episode(
         retries=retries,
         optimal_steps=optimal_steps,
         solver_config=solver_config,
+        reasoning_metrics=reasoning_metrics,
     )

chuk_puzzles_gym/export/dataset.py CHANGED Viewed

@@ -190,6 +190,12 @@ class DatasetExporter:
             if canonical:
                 gold_answer = str(canonical)
+        # Build reasoning tags from complexity profile
+        complexity_profile = game.complexity_profile
+        reasoning_type = complexity_profile.get("reasoning_type", "deductive")
+        search_space = complexity_profile.get("search_space", "medium")
+        tags = [domain, difficulty.value, f"reasoning:{reasoning_type}", f"search:{search_space}"]
         # Create Problem using core schema
         return Problem(
             # Identity
@@ -214,7 +220,7 @@ class DatasetExporter:
             ),
             # Metadata
             operation_count=game.optimal_steps,
-            tags=[domain, difficulty.value],
+            tags=tags,
         )
     @property

chuk_puzzles_gym/games/_base/game.py CHANGED Viewed

@@ -1,10 +1,99 @@
 """Abstract base class for all puzzle games."""
+from __future__ import annotations
 import random
 from abc import ABC, abstractmethod
 from typing import Any
 from ...models import DifficultyLevel, DifficultyProfile, MoveResult, SolverConfig
+from ...models.evaluation import ReasoningMetrics
+class ReasoningTracker:
+    """Tracks reasoning depth metrics during puzzle gameplay.
+    Accumulates data about backtrack behavior, solver distance progression,
+    and error patterns. Produces a ReasoningMetrics snapshot on demand.
+    This is a lightweight, non-Pydantic class meant to be mutated during play.
+    """
+    __slots__ = (
+        "_placed_positions",
+        "_solver_distance_trace",
+        "_backtrack_count",
+        "_consecutive_errors",
+        "_error_streaks",
+        "_max_error_streak",
+        "_total_actions",
+    )
+    def __init__(self) -> None:
+        self._placed_positions: set[tuple[Any, ...]] = set()
+        self._solver_distance_trace: list[int] = []
+        self._backtrack_count: int = 0
+        self._consecutive_errors: int = 0
+        self._error_streaks: list[int] = []
+        self._max_error_streak: int = 0
+        self._total_actions: int = 0
+    def record_valid_move(self, position: tuple[Any, ...], remaining_count: int) -> None:
+        """Record a valid (successful) move.
+        Args:
+            position: The position/target of the move (for backtrack detection)
+            remaining_count: How many positions remain to be filled after this move
+        """
+        self._total_actions += 1
+        # Detect backtrack: placing at a position already placed before
+        if position in self._placed_positions:
+            self._backtrack_count += 1
+        self._placed_positions.add(position)
+        self._solver_distance_trace.append(remaining_count)
+        # Finalize any pending error streak
+        if self._consecutive_errors > 0:
+            self._error_streaks.append(self._consecutive_errors)
+            self._consecutive_errors = 0
+    def record_invalid_move(self) -> None:
+        """Record an invalid (failed) move."""
+        self._total_actions += 1
+        self._consecutive_errors += 1
+        self._max_error_streak = max(self._max_error_streak, self._consecutive_errors)
+    def to_metrics(self, optimal_path_length: int | None = None) -> ReasoningMetrics:
+        """Produce a frozen ReasoningMetrics snapshot.
+        Args:
+            optimal_path_length: Minimum steps to solve (from solver), if known.
+        """
+        # Finalize any pending error streak
+        error_streaks = list(self._error_streaks)
+        if self._consecutive_errors > 0:
+            error_streaks.append(self._consecutive_errors)
+        return ReasoningMetrics(
+            backtrack_count=self._backtrack_count,
+            solver_distance_trace=list(self._solver_distance_trace),
+            error_streak_max=self._max_error_streak,
+            error_streaks=error_streaks,
+            total_actions=self._total_actions,
+            optimal_path_length=optimal_path_length,
+        )
+    def reset(self) -> None:
+        """Reset all tracked state."""
+        self._placed_positions.clear()
+        self._solver_distance_trace.clear()
+        self._backtrack_count = 0
+        self._consecutive_errors = 0
+        self._error_streaks.clear()
+        self._max_error_streak = 0
+        self._total_actions = 0
 class PuzzleGame(ABC):
@@ -64,6 +153,9 @@ class PuzzleGame(ABC):
         self.game_started = False
         self._last_move_position: tuple[Any, ...] | None = None  # For retry detection
+        # Reasoning depth tracker
+        self._reasoning_tracker = ReasoningTracker()
     @abstractmethod
     async def generate_puzzle(self) -> None:
         """Generate a new puzzle with a unique solution.
@@ -162,8 +254,11 @@ class PuzzleGame(ABC):
         """
         if success:
             self.moves_made += 1
+            remaining = self._compute_remaining()
+            self._reasoning_tracker.record_valid_move(position, remaining)
         else:
             self.invalid_moves += 1
+            self._reasoning_tracker.record_invalid_move()
         # Detect retries (same position attempted again)
         if self._last_move_position == position:
@@ -183,6 +278,34 @@ class PuzzleGame(ABC):
         self.hints_used += 1
         return True
+    def _compute_remaining(self) -> int:
+        """Compute how many positions remain to be filled.
+        Uses optimal_steps directly since it is typically dynamic
+        (reflects current game state, e.g. counting empty cells).
+        Override in subclasses for more accurate tracking.
+        """
+        return self.optimal_steps or 0
+    def get_reasoning_metrics(self) -> ReasoningMetrics:
+        """Get a snapshot of reasoning depth metrics for the current episode.
+        Returns:
+            Frozen ReasoningMetrics with all tracked data.
+        """
+        optimal = self.optimal_steps
+        # optimal_path_length requires ge=1; treat 0 or negative as unknown
+        if optimal is not None and optimal < 1:
+            optimal = None
+        return self._reasoning_tracker.to_metrics(
+            optimal_path_length=optimal,
+        )
+    @property
+    def reasoning_tracker(self) -> ReasoningTracker:
+        """Access the reasoning tracker directly."""
+        return self._reasoning_tracker
     def can_use_hint(self) -> bool:
         """Check if hints are available without consuming one.

chuk_puzzles_gym/gym_env.py CHANGED Viewed

@@ -197,6 +197,7 @@ class PuzzleEnv:
             result = await self._execute_action(cmd, args)
         except Exception as e:
             self._game.invalid_moves += 1
+            self._game.reasoning_tracker.record_invalid_move()
             return (
                 self._get_observation(),
                 self.reward_config["invalid_attempt"],
@@ -207,17 +208,25 @@ class PuzzleEnv:
         self._step_count += 1
+        # Build position tuple from parsed args for reasoning tracker
+        position = tuple(args)
         # Calculate reward
         if result.success:
             reward = self.reward_config["correct_placement"]
+            # Feed reasoning tracker
+            # optimal_steps is dynamic (reflects current state), so use it directly
+            remaining = self._game.optimal_steps or 0
+            self._game.reasoning_tracker.record_valid_move(position, remaining)
             # Check for completion
             terminated = self._game.is_complete()
             if terminated:
                 # Add completion bonus with efficiency multiplier
-                optimal = self._game.optimal_steps
-                if optimal and self._game.moves_made > 0:
-                    efficiency = min(1.0, optimal / self._game.moves_made)
+                opt = self._game.optimal_steps
+                if opt and self._game.moves_made > 0:
+                    efficiency = min(1.0, opt / self._game.moves_made)
                 else:
                     efficiency = 1.0
                 reward += (
@@ -226,11 +235,12 @@ class PuzzleEnv:
         else:
             reward = self.reward_config["invalid_attempt"]
             self._game.invalid_moves += 1
+            self._game.reasoning_tracker.record_invalid_move()
             terminated = False
         truncated = self._step_count >= self.max_steps
-        info = {
+        info: dict[str, Any] = {
             "action": action_str,
             "success": result.success,
             "message": result.message,
@@ -239,6 +249,10 @@ class PuzzleEnv:
             "hints_used": self._game.hints_used,
         }
+        # Include reasoning metrics on episode end
+        if terminated or truncated:
+            info["reasoning_metrics"] = self._game.get_reasoning_metrics().to_dict()
         return self._get_observation(), reward, terminated, truncated, info
     async def _execute_action(self, cmd: str, args: list[str]) -> Any:
@@ -371,7 +385,7 @@ class PuzzleEnv:
         if self._game is None:
             return {"error": "no_game"}
-        obs = {
+        obs: dict[str, Any] = {
             "game": self._game.name,
             "difficulty": self._game.difficulty.value,
             "seed": self._game.seed,
@@ -397,6 +411,7 @@ class PuzzleEnv:
             return {}
         profile = self._game.difficulty_profile
+        reasoning = self._game.get_reasoning_metrics()
         return {
             "optimal_steps": self._game.optimal_steps,
             "difficulty_profile": {
@@ -411,6 +426,7 @@ class PuzzleEnv:
                 "hint_budget": self.solver_config.hint_budget,
                 "hint_penalty": self.solver_config.hint_penalty,
             },
+            "reasoning_metrics": reasoning.to_dict(),
         }
     def render(self, mode: str = "ansi") -> str | None:

chuk_puzzles_gym/models/__init__.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .evaluation import (
     EpisodeTracer,
     EvaluationSummary,
     MoveRecord,
+    ReasoningMetrics,
     SolverConfig,
     TraceEvent,
 )
@@ -42,6 +43,7 @@ __all__ = [
     "EpisodeTracer",
     "EvaluationSummary",
     "MoveRecord",
+    "ReasoningMetrics",
     "SolverConfig",
     "TraceEvent",
 ]

chuk_puzzles_gym/models/evaluation.py CHANGED Viewed

@@ -38,6 +38,132 @@ class MoveRecord(BaseModel):
     timestamp_ms: int = Field(default=0, description="Milliseconds since episode start")
+class ReasoningMetrics(BaseModel):
+    """Reasoning depth metrics for evaluating quality of agent reasoning.
+    Goes beyond binary success/failure to measure *how* an agent reasons:
+    - Backtrack detection: did the agent revise previous placements?
+    - Progress tracking: how steadily did the agent make progress?
+    - Error patterns: were errors isolated or clustered in streaks?
+    - Reasoning overhead: how much wasted work relative to optimal?
+    """
+    model_config = ConfigDict(frozen=True)
+    # Raw tracking data
+    backtrack_count: int = Field(
+        default=0,
+        ge=0,
+        description="Times agent placed a value at a previously filled position",
+    )
+    solver_distance_trace: list[int] = Field(
+        default_factory=list,
+        description="Remaining positions to fill after each valid move",
+    )
+    error_streak_max: int = Field(
+        default=0,
+        ge=0,
+        description="Longest consecutive run of invalid moves",
+    )
+    error_streaks: list[int] = Field(
+        default_factory=list,
+        description="Lengths of each consecutive error streak",
+    )
+    total_actions: int = Field(
+        default=0,
+        ge=0,
+        description="Total actions taken (valid + invalid)",
+    )
+    optimal_path_length: int | None = Field(
+        default=None,
+        ge=1,
+        description="Minimum steps to solve (from solver)",
+    )
+    @computed_field
+    @property
+    def reasoning_overhead(self) -> float:
+        """Ratio of total actions to optimal path length.
+        1.0 = perfect (no wasted actions). Higher = more wasted reasoning.
+        Returns 0.0 if optimal path length is unknown.
+        """
+        if self.optimal_path_length is None or self.optimal_path_length == 0:
+            return 0.0
+        if self.total_actions == 0:
+            return 0.0
+        return self.total_actions / self.optimal_path_length
+    @computed_field
+    @property
+    def backtrack_rate(self) -> float:
+        """Fraction of valid moves that were backtracks (revisions).
+        0.0 = no backtracks, 1.0 = every move was a revision.
+        """
+        valid_moves = len(self.solver_distance_trace)
+        if valid_moves == 0:
+            return 0.0
+        return self.backtrack_count / valid_moves
+    @computed_field
+    @property
+    def progress_velocity(self) -> float:
+        """Average progress per valid move (cells solved per step).
+        Measures how much closer to the solution each move gets.
+        1.0 = every move reduces remaining by exactly 1. Lower = backtracks/plateaus.
+        Returns 0.0 if insufficient data.
+        """
+        trace = self.solver_distance_trace
+        if len(trace) < 2:
+            return 0.0
+        total_progress = trace[0] - trace[-1]
+        steps = len(trace) - 1
+        if steps == 0:
+            return 0.0
+        return total_progress / steps
+    @computed_field
+    @property
+    def progress_steadiness(self) -> float:
+        """Measure of how monotonically progress decreased (0.0 to 1.0).
+        1.0 = perfectly monotonic progress (every move reduced remaining count).
+        0.0 = no monotonic progress at all.
+        """
+        trace = self.solver_distance_trace
+        if len(trace) < 2:
+            return 1.0
+        monotonic_steps = sum(1 for i in range(1, len(trace)) if trace[i] < trace[i - 1])
+        return monotonic_steps / (len(trace) - 1)
+    @computed_field
+    @property
+    def avg_error_streak(self) -> float:
+        """Average length of consecutive error streaks.
+        Returns 0.0 if no error streaks occurred.
+        """
+        if not self.error_streaks:
+            return 0.0
+        return sum(self.error_streaks) / len(self.error_streaks)
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to flat dictionary for reporting."""
+        return {
+            "backtrack_count": self.backtrack_count,
+            "backtrack_rate": round(self.backtrack_rate, 3),
+            "reasoning_overhead": round(self.reasoning_overhead, 3),
+            "progress_velocity": round(self.progress_velocity, 3),
+            "progress_steadiness": round(self.progress_steadiness, 3),
+            "error_streak_max": self.error_streak_max,
+            "avg_error_streak": round(self.avg_error_streak, 3),
+            "total_actions": self.total_actions,
+            "optimal_path_length": self.optimal_path_length,
+        }
 class EpisodeResult(BaseModel):
     """Complete result of a single puzzle episode with normalized metrics.
@@ -91,6 +217,12 @@ class EpisodeResult(BaseModel):
         description="Complete move history for detailed analysis",
     )
+    # Reasoning depth metrics
+    reasoning_metrics: ReasoningMetrics | None = Field(
+        default=None,
+        description="Detailed reasoning depth metrics (backtracks, progress, error patterns)",
+    )
     # Computed normalized metrics
     @computed_field
     @property
@@ -154,7 +286,7 @@ class EpisodeResult(BaseModel):
     def to_summary_dict(self) -> dict[str, Any]:
         """One-line episode summary for logging/streaming."""
-        return {
+        d: dict[str, Any] = {
             "game": self.game,
             "seed": self.seed,
             "difficulty": self.difficulty.value,
@@ -165,6 +297,9 @@ class EpisodeResult(BaseModel):
             "efficiency": round(self.efficiency_score, 3),
             "time_ms": self.wall_time_ms,
         }
+        if self.reasoning_metrics is not None:
+            d["reasoning"] = self.reasoning_metrics.to_dict()
+        return d
     def to_jsonl(self) -> str:
         """Single-line JSON for streaming output."""
@@ -217,6 +352,35 @@ class EvaluationSummary(BaseModel):
             return 0.0
         return sum(e.wall_time_ms for e in self.episodes) / len(self.episodes)
+    @computed_field
+    @property
+    def avg_backtrack_rate(self) -> float:
+        """Average backtrack rate across episodes with reasoning metrics."""
+        with_metrics = [e for e in self.episodes if e.reasoning_metrics is not None]
+        if not with_metrics:
+            return 0.0
+        return sum(e.reasoning_metrics.backtrack_rate for e in with_metrics) / len(with_metrics)  # type: ignore[union-attr]
+    @computed_field
+    @property
+    def avg_reasoning_overhead(self) -> float:
+        """Average reasoning overhead across episodes with reasoning metrics."""
+        with_metrics = [
+            e for e in self.episodes if e.reasoning_metrics is not None and e.reasoning_metrics.reasoning_overhead > 0
+        ]
+        if not with_metrics:
+            return 0.0
+        return sum(e.reasoning_metrics.reasoning_overhead for e in with_metrics) / len(with_metrics)  # type: ignore[union-attr]
+    @computed_field
+    @property
+    def avg_progress_steadiness(self) -> float:
+        """Average progress steadiness across episodes with reasoning metrics."""
+        with_metrics = [e for e in self.episodes if e.reasoning_metrics is not None]
+        if not with_metrics:
+            return 0.0
+        return sum(e.reasoning_metrics.progress_steadiness for e in with_metrics) / len(with_metrics)  # type: ignore[union-attr]
 class TraceEvent(BaseModel):
     """A single event in an episode trace for JSONL logging."""

chuk_puzzles_gym/server.py CHANGED Viewed

@@ -63,6 +63,9 @@ class ArcadeHandler(TelnetHandler):
         if not self.current_game:
             return
+        # Get final reasoning metrics
+        reasoning = self.current_game.get_reasoning_metrics().to_dict()
         if self.output_mode == OutputMode.JSON:
             await self.send_json_response(
                 type="complete",
@@ -72,17 +75,27 @@ class ArcadeHandler(TelnetHandler):
                 invalid_moves=self.current_game.invalid_moves,
                 hints_used=self.current_game.hints_used,
                 optimal_steps=self.current_game.optimal_steps,
+                reasoning_metrics=reasoning,
             )
         elif self.output_mode == OutputMode.STRICT:
             await self.send_line(
                 f"COMPLETE:{self.current_game.moves_made}:{self.current_game.invalid_moves}:"
-                f"{self.current_game.hints_used}"
+                f"{self.current_game.hints_used}:"
+                f"BT={reasoning['backtrack_count']}:"
+                f"OH={reasoning['reasoning_overhead']:.2f}:"
+                f"ST={reasoning['progress_steadiness']:.2f}"
             )
         else:
             await self.send_line("\n" + "=" * 50)
             await self.send_line("CONGRATULATIONS! YOU SOLVED IT!")
             await self.send_line("=" * 50)
             await self.send_line(self.current_game.get_stats())
+            await self.send_line("")
+            await self.send_line("Reasoning Depth:")
+            await self.send_line(f"  Backtrack rate:      {reasoning['backtrack_rate']:.0%}")
+            await self.send_line(f"  Progress steadiness: {reasoning['progress_steadiness']:.0%}")
+            await self.send_line(f"  Reasoning overhead:  {reasoning['reasoning_overhead']:.1f}x optimal")
+            await self.send_line(f"  Error streak max:    {reasoning['error_streak_max']}")
             await self.send_line("\nType 'menu' to play another game.")
             await self.send_line("=" * 50 + "\n")
@@ -109,6 +122,9 @@ class ArcadeHandler(TelnetHandler):
             "constraint_density": profile.constraint_density,
         }
+        # Reasoning depth metrics
+        reasoning = self.current_game.get_reasoning_metrics().to_dict()
         return {
             "game": self.current_game.name,
             "difficulty": self.current_game.difficulty.value,
@@ -120,6 +136,7 @@ class ArcadeHandler(TelnetHandler):
             "optimal_steps": self.current_game.optimal_steps,
             "is_complete": self.current_game.is_complete(),
             "difficulty_profile": profile_dict,
+            "reasoning_metrics": reasoning,
             "grid": grid,
         }
@@ -435,9 +452,10 @@ class ArcadeHandler(TelnetHandler):
             return
         if cmd_enum == GameCommand.STATS:
-            # Show detailed stats including difficulty profile
+            # Show detailed stats including difficulty profile and reasoning metrics
             profile = self.current_game.difficulty_profile
             optimal = self.current_game.optimal_steps
+            reasoning = self.current_game.get_reasoning_metrics().to_dict()
             if self.output_mode == OutputMode.JSON:
                 await self.send_json_response(
@@ -455,11 +473,15 @@ class ArcadeHandler(TelnetHandler):
                         "state_observability": profile.state_observability,
                         "constraint_density": profile.constraint_density,
                     },
+                    reasoning_metrics=reasoning,
                 )
             elif self.output_mode == OutputMode.STRICT:
                 await self.send_line(
                     f"STATS:{self.current_game.moves_made}:{self.current_game.invalid_moves}:"
-                    f"{self.current_game.hints_used}:{optimal or 0}"
+                    f"{self.current_game.hints_used}:{optimal or 0}:"
+                    f"BT={reasoning['backtrack_count']}:"
+                    f"OH={reasoning['reasoning_overhead']:.2f}:"
+                    f"ST={reasoning['progress_steadiness']:.2f}"
                 )
             else:
                 await self.send_line("")
@@ -482,6 +504,15 @@ class ArcadeHandler(TelnetHandler):
                     await self.send_line(f"  Optimal steps: {optimal}")
                     await self.send_line(f"  Current efficiency: {efficiency:.1%}")
                 await self.send_line("")
+                await self.send_line("Reasoning Depth:")
+                await self.send_line(f"  Backtrack count:     {reasoning['backtrack_count']}")
+                await self.send_line(f"  Backtrack rate:      {reasoning['backtrack_rate']:.0%}")
+                await self.send_line(f"  Progress velocity:   {reasoning['progress_velocity']:.2f} cells/step")
+                await self.send_line(f"  Progress steadiness: {reasoning['progress_steadiness']:.0%}")
+                await self.send_line(f"  Reasoning overhead:  {reasoning['reasoning_overhead']:.1f}x optimal")
+                await self.send_line(f"  Error streak max:    {reasoning['error_streak_max']}")
+                await self.send_line(f"  Total actions:       {reasoning['total_actions']}")
+                await self.send_line("")
                 await self.send_line("Difficulty Profile:")
                 await self.send_line(f"  Logic depth: {profile.logic_depth}")
                 await self.send_line(f"  Branching factor: {profile.branching_factor:.1f}")

{chuk_puzzles_gym-0.10.1.dist-info → chuk_puzzles_gym-0.10.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chuk-puzzles-gym
-Version: 0.10.1
+Version: 0.10.2
 Summary: Multi-game puzzle gym for LLM training and benchmarking - 30 constraint puzzles with synthetic data generation
 Author: Chris Hay
 License: MIT
@@ -93,10 +93,17 @@ Once connected, type `help` to see available games, or `sudoku easy` to start pl
   - Enable with `mode agent` command
   - Machine-parseable grid format with clear start/end markers
   - Compact output optimized for LLM tool integration
+- **Reasoning Depth Metrics** - Measure *how* agents reason, not just if they succeed
+  - Backtrack detection (did the agent revise previous placements?)
+  - Progress steadiness (monotonic advance toward solution?)
+  - Error streak analysis (isolated mistakes vs. clustered confusion?)
+  - Reasoning overhead (wasted work relative to optimal path)
+  - Solver distance traces (remaining work after each valid move)
+  - Available in all paths: Gym env, eval harness, and server (telnet/WebSocket)
 - **Evaluation Harness** (`chuk-puzzles-eval`) - Built-in benchmarking CLI
   - Batch evaluation with configurable episodes
   - Multiple output formats (JSON, CSV, Markdown)
-  - Metrics: moves, invalid moves, hints, solve time
+  - Metrics: moves, invalid moves, hints, solve time, reasoning depth
   - Reproducible with deterministic seeds
 - **Dataset Export** (`chuk-puzzles-export`) - Synthetic data generation for LLM training
   - JSONL output with complete problem definitions and solutions
@@ -500,6 +507,7 @@ games = PuzzleEnv.available_games()
 - **All 30 games** accessible through unified API
 - **Configurable rewards** for correct moves, invalid attempts, completion bonuses
+- **Reasoning depth metrics** tracking backtracks, progress steadiness, error patterns
 - **Hint system** with optional budget limits
 - **Solver-free mode** for pure reasoning benchmarks
 - **Efficiency scoring** based on optimal step counts
@@ -515,8 +523,25 @@ obs = {
     "moves": 5,
     "invalid_moves": 1,
     "hints_used": 2,
+    "hints_remaining": 98,
     "is_complete": False,
-    "grid": [[4, 0, 8, ...], ...]  # Game-specific state
+    "grid": [[4, 0, 8, ...], ...],  # Game-specific state
+    "render": "  | 1 2 3 | ...",     # ASCII grid
+}
+# Info dict includes reasoning metrics and difficulty profile
+info = {
+    "optimal_steps": 45,
+    "difficulty_profile": {"logic_depth": 2, "branching_factor": 2.0, ...},
+    "reasoning_metrics": {
+        "backtrack_count": 0,
+        "backtrack_rate": 0.0,
+        "progress_velocity": 1.0,
+        "progress_steadiness": 1.0,
+        "reasoning_overhead": 1.0,
+        "error_streak_max": 0,
+        "solver_distance_trace": [44, 43, 42, ...],
+    },
 }
 ```
@@ -546,6 +571,89 @@ config = SolverConfig(hint_budget=5, hint_penalty=0.1)
 env = PuzzleEnv("sudoku", solver_config=config)
 ```
+## Reasoning Depth Metrics
+Beyond binary success/failure, the system measures **how** an agent reasons through puzzles. These metrics are available in all interaction paths: the Gym environment, the evaluation harness, and the telnet/WebSocket server.
+### Metrics
+| Metric | Description | Perfect Score |
+|--------|-------------|---------------|
+| `backtrack_count` | Times the agent revised a previous placement | 0 |
+| `backtrack_rate` | Fraction of valid moves that were backtracks | 0% |
+| `progress_velocity` | Average cells solved per step | 1.0 |
+| `progress_steadiness` | How monotonically remaining work decreases (1.0 = never stalls) | 100% |
+| `reasoning_overhead` | Total actions / optimal path length (1.0 = no waste) | 1.0x |
+| `error_streak_max` | Longest run of consecutive invalid moves | 0 |
+| `avg_error_streak` | Average length of error bursts | 0.0 |
+| `solver_distance_trace` | Remaining positions after each valid move | Monotonically decreasing |
+### Usage in Gym Environment
+```python
+from chuk_puzzles_gym.gym_env import PuzzleEnv
+env = PuzzleEnv("sudoku", difficulty="easy", seed=42)
+obs, info = await env.reset()
+# Reasoning metrics available in info after reset
+print(info["reasoning_metrics"])
+# ... agent plays ...
+obs, reward, terminated, truncated, info = await env.step("place 1 1 5")
+# On episode end, info includes full reasoning metrics
+if terminated:
+    metrics = info["reasoning_metrics"]
+    print(f"Backtrack rate: {metrics['backtrack_rate']:.0%}")
+    print(f"Overhead: {metrics['reasoning_overhead']:.1f}x")
+    print(f"Steadiness: {metrics['progress_steadiness']:.0%}")
+```
+### Usage in Server (Telnet/WebSocket)
+Reasoning metrics are included automatically in server output:
+- **JSON mode**: `reasoning_metrics` dict in every state response and completion message
+- **STRICT mode**: `BT=`, `OH=`, `ST=` fields appended to STATS and COMPLETE messages
+- **Normal mode**: "Reasoning Depth" section shown on completion and in `stats` command
+```
+> mode json
+> place 1 1 5
+{"type":"result","success":true,...,"state":{...,"reasoning_metrics":{"backtrack_count":0,...}}}
+> stats
+{"type":"stats",...,"reasoning_metrics":{"backtrack_count":0,"backtrack_rate":0.0,...}}
+```
+### Usage in Evaluation Harness
+```bash
+# Reasoning metrics included in all output formats
+chuk-puzzles-eval sudoku -d easy -n 10 -o json
+```
+```python
+from chuk_puzzles_gym.eval import evaluate_game
+report = await evaluate_game("sudoku", difficulty="easy", episodes=10)
+report.print_summary()  # Includes "Reasoning Depth" section
+# Aggregate metrics
+print(f"Avg backtrack rate: {report.avg_backtrack_rate:.0%}")
+print(f"Avg overhead: {report.avg_reasoning_overhead:.1f}x")
+print(f"Avg steadiness: {report.avg_progress_steadiness:.0%}")
+```
+### What the Metrics Reveal
+A **perfect solver** shows: 0 backtracks, 1.0x overhead, 100% steadiness, 1.0 velocity.
+A **struggling agent** shows: high backtrack rate (revising decisions), error streaks (clustered confusion), low steadiness (stalling progress), and high overhead (wasted work).
+These patterns are visible even when two agents both eventually solve a puzzle — the metrics expose the **quality of the reasoning path**, not just the outcome.
 ## Evaluation Harness
 The project includes a built-in **evaluation harness** for benchmarking puzzle-solving agents:
@@ -604,6 +712,12 @@ Avg Time:   12ms
 | `hints_used` | Number of hints requested |
 | `wall_time_ms` | Time to solve in milliseconds |
 | `seed` | Puzzle seed for reproducibility |
+| `backtrack_count` | Times agent revised a previous placement |
+| `backtrack_rate` | Fraction of valid moves that were backtracks |
+| `progress_steadiness` | How monotonically progress advances (1.0 = perfect) |
+| `reasoning_overhead` | Total actions / optimal path (1.0 = no waste) |
+| `error_streak_max` | Longest run of consecutive invalid moves |
+| `progress_velocity` | Average cells solved per step |
 ## Dataset Export
@@ -1194,12 +1308,13 @@ chuk-puzzles-gym/
 │       │   ├── base.py           # GridPosition, MoveResult
 │       │   ├── config.py         # Base GameConfig
 │       │   ├── enums.py          # DifficultyLevel, GameCommand, etc.
+│       │   ├── evaluation.py     # ReasoningMetrics, EpisodeResult, EvaluationSummary
 │       │   └── games.py          # Game-specific models (Cage, Task, etc.)
 │       └── games/                # Self-contained game modules
 │           ├── __init__.py       # AVAILABLE_GAMES registry
 │           ├── _base/            # Base classes
 │           │   ├── __init__.py
-│           │   ├── game.py       # PuzzleGame ABC
+│           │   ├── game.py       # PuzzleGame ABC + ReasoningTracker
 │           │   └── commands.py   # GameCommandHandler ABC
 │           ├── sudoku/           # Example game module
 │           │   ├── __init__.py   # Exports SudokuGame
@@ -1226,6 +1341,7 @@ chuk-puzzles-gym/
 │   ├── example_graph_coloring.py # Graph Coloring game logic demo
 │   ├── example_cryptarithmetic.py# Cryptarithmetic game logic demo
 │   ├── example_rush_hour.py      # Rush Hour game logic demo
+│   ├── example_reasoning_metrics.py # Reasoning depth metrics demo
 │   └── README.md                 # Example usage guide
 ├── .github/workflows/            # CI/CD workflows
 ├── pyproject.toml                # Modern Python project config
@@ -1465,9 +1581,10 @@ See [ROADMAP.md](ROADMAP.md) for the full development roadmap.
 ### Highlights
 **Benchmarking & Metrics**
-- Puzzle complexity metrics (constraint count, variable count, branching factor)
-- Episode model for tracking game sessions
-- Trace logging for offline analysis
+- ~~Puzzle complexity metrics~~ (implemented: constraint count, variable count, branching factor)
+- ~~Episode model for tracking game sessions~~ (implemented: EpisodeResult with ReasoningMetrics)
+- ~~Reasoning depth metrics~~ (implemented: backtrack detection, progress steadiness, error patterns)
+- ~~Trace logging for offline analysis~~ (implemented: solver distance traces in all output paths)
 **Agent Evaluation Tools**
 - Batch evaluation harness CLI

{chuk_puzzles_gym-0.10.1.dist-info → chuk_puzzles_gym-0.10.2.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
 chuk_puzzles_gym/__init__.py,sha256=zh2sc6QFKrtAmMLee7vlHgXuOBoB5CjSldlKFjZTVVE,521
 chuk_puzzles_gym/constants.py,sha256=58pKdvwoaB4PF1AK4b7mLNf_Y_YFyFassd1hYH1IUNE,280
-chuk_puzzles_gym/eval.py,sha256=jWjfQ4OaBNY2vDwcRxw1-MC27VorLNUMfRW-lQpK3Rs,26415
-chuk_puzzles_gym/gym_env.py,sha256=qoQZFz2Dnbl3QjTsDNHAxAx1qomU8paXVlH-SDcwlZI,17288
-chuk_puzzles_gym/server.py,sha256=QnG48mXd8AKDVFUULIwLWidqDvcsCkayxbvo7h_EKBg,45947
+chuk_puzzles_gym/eval.py,sha256=-ku_pshSMG5RIu-p4MdS9ju4kduyKjvTn8Q99y_UO_E,31830
+chuk_puzzles_gym/gym_env.py,sha256=V2Eg1CFXKceR6vWTvAzvfanXvZL24STbw3YP8-cjkk0,18074
+chuk_puzzles_gym/server.py,sha256=SWfuBO4wtm_4Ri8l5hbQmvMF7ZN4Q42Wt66neFp5-nQ,48055
 chuk_puzzles_gym/export/__init__.py,sha256=TTXBRR5CBBCL04r1iXMzxib9oOIDTC4npxy2_L1xc2A,366
-chuk_puzzles_gym/export/dataset.py,sha256=dZMz9m4JwpZZSigvaJjIpGKIoxUWB01gXoyNCZ4o17o,10998
+chuk_puzzles_gym/export/dataset.py,sha256=bza7iCfp4POz0gCcoSRF_hTRZmuAD-59DyrrHiqo4ac,11335
 chuk_puzzles_gym/games/__init__.py,sha256=zByuxje5uVWQ4wBoGHUooHkAg5cgCljrCCXkyOLxLzo,3403
 chuk_puzzles_gym/games/_base/__init__.py,sha256=oNjoMvOVDb010ooyGxAfXBrOqmw1BAGavmaxf44tmz0,188
 chuk_puzzles_gym/games/_base/commands.py,sha256=tY0kxk08D8nPr_C_awo8qDUhkL6EHA59KnWiLlYnloY,2381
-chuk_puzzles_gym/games/_base/game.py,sha256=-YPJOgWsb4YVz8tS3cXJYd-y-1Tyx7eh8vs3tZEXcEA,11240
+chuk_puzzles_gym/games/_base/game.py,sha256=Jwfjj4qazgaWLQLNTghfMuydy-D3KrOuUmpCM9kpjlU,15711
 chuk_puzzles_gym/games/binary/__init__.py,sha256=Pphgj0kcvHUgkM0Mq89GsWPt-Bg6DobDLi7cqliOywk,156
 chuk_puzzles_gym/games/binary/config.py,sha256=Iw8Wax1856aqaz1KvDC69Qou6z8gxIWr5rSAI0MGnWg,812
 chuk_puzzles_gym/games/binary/game.py,sha256=lRBweQIdzyRZm_jMPItZ1VAzAcsEEbxvGqjGwAlTTy0,16359
@@ -118,17 +118,17 @@ chuk_puzzles_gym/games/sudoku/game.py,sha256=35vB5x-KIs5z2b-CDV-dq5kifmVkoEkbLOx
 chuk_puzzles_gym/games/tents/__init__.py,sha256=iVxsZg7Juz3iHXTK8mfJZniFcMNnmAd2h2RjxR2TH40,133
 chuk_puzzles_gym/games/tents/config.py,sha256=gSi5epG5va8-a4ZQv5ekcFDkWQSYOSheX2j4FIs_I8Q,914
 chuk_puzzles_gym/games/tents/game.py,sha256=JGPLYvIosCwjJYhi0FCtA3YUFsgQsD9L_BEArHSOPFM,15802
-chuk_puzzles_gym/models/__init__.py,sha256=dZzLWsyKE993o8HFfFkxTR7XjDwYK56rB-5clwW4zPg,930
+chuk_puzzles_gym/models/__init__.py,sha256=6SQn3zEcalTl-9VqKbSwvmWaYkRMuGKUkfiC25c9-h8,976
 chuk_puzzles_gym/models/base.py,sha256=L7Zug9jUXJCOhD3wKJp0ppJZNTgroDQwdYMjvAaVVqc,1156
 chuk_puzzles_gym/models/config.py,sha256=12UkPlEEFzN1k9ZfJClpVqkp7E11MWriZVAH2RkfEM4,301
 chuk_puzzles_gym/models/enums.py,sha256=xmHv0OK2zKcxpfhJP3huuXhDnnX0BDLCwWfpR9ZuraQ,2342
-chuk_puzzles_gym/models/evaluation.py,sha256=EwFeecWtQ-wyezPE1dhpKDUH-BTdF7cDJ_W99JLoMUM,16070
+chuk_puzzles_gym/models/evaluation.py,sha256=b2ldWPih-lo2jy59pWincjv9qZuF6PsZd42LPZsZzLc,22162
 chuk_puzzles_gym/models/games.py,sha256=rnEW_Sl9xuZtvlBXBZfab34HrIhtUEiBdUSs_nvh10o,442
 chuk_puzzles_gym/trace/__init__.py,sha256=8JHaHxbTDhT9kv4e2e5Px4dCWuXY49OXmvzkMS4nKfw,273
 chuk_puzzles_gym/trace/generator.py,sha256=4pks0d_asoDE15QjM2VuzgFWTV1fZke_gHH2lVF8KVQ,34058
 chuk_puzzles_gym/utils/__init__.py,sha256=1AKPfRjT9YlBxxcA7qdKcvKBXdHJzfGtUWansrb_2VE,149
-chuk_puzzles_gym-0.10.1.dist-info/METADATA,sha256=HD-oYiDi5OTNMOjtvxQkB9aBuOBUARAy1RcXcjf4T2I,49935
-chuk_puzzles_gym-0.10.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-chuk_puzzles_gym-0.10.1.dist-info/entry_points.txt,sha256=tJGHiH8wjkBev2SPNuXOLFkaXE76sW9ZFIMQw4pUj5E,181
-chuk_puzzles_gym-0.10.1.dist-info/top_level.txt,sha256=H3z9wKGl7CV1BPlO6t5lEtok6WW9rwGr5C1Dr3Kqx28,17
-chuk_puzzles_gym-0.10.1.dist-info/RECORD,,
+chuk_puzzles_gym-0.10.2.dist-info/METADATA,sha256=adaIAGmTJQj7wES0bqZEETQ5pbQQJ9OrswxhNZayits,55140
+chuk_puzzles_gym-0.10.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+chuk_puzzles_gym-0.10.2.dist-info/entry_points.txt,sha256=tJGHiH8wjkBev2SPNuXOLFkaXE76sW9ZFIMQw4pUj5E,181
+chuk_puzzles_gym-0.10.2.dist-info/top_level.txt,sha256=H3z9wKGl7CV1BPlO6t5lEtok6WW9rwGr5C1Dr3Kqx28,17
+chuk_puzzles_gym-0.10.2.dist-info/RECORD,,

{chuk_puzzles_gym-0.10.1.dist-info → chuk_puzzles_gym-0.10.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{chuk_puzzles_gym-0.10.1.dist-info → chuk_puzzles_gym-0.10.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{chuk_puzzles_gym-0.10.1.dist-info → chuk_puzzles_gym-0.10.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

chuk-puzzles-gym 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl

chuk-puzzles-gym 0.10.1py3-none-any.whl → 0.10.2py3-none-any.whl