blindpath 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blindpath-0.1.0/LICENSE +21 -0
- blindpath-0.1.0/PKG-INFO +255 -0
- blindpath-0.1.0/README.md +225 -0
- blindpath-0.1.0/blindpath/Constants/__init__.py +37 -0
- blindpath-0.1.0/blindpath/Constants/env.py +24 -0
- blindpath-0.1.0/blindpath/Constants/tiles.py +19 -0
- blindpath-0.1.0/blindpath/DataClasses/__init__.py +7 -0
- blindpath-0.1.0/blindpath/DataClasses/env_config.py +40 -0
- blindpath-0.1.0/blindpath/DataClasses/generated_map.py +15 -0
- blindpath-0.1.0/blindpath/DataClasses/step_result.py +18 -0
- blindpath-0.1.0/blindpath/DataClasses/vector2.py +26 -0
- blindpath-0.1.0/blindpath/Util/__init__.py +9 -0
- blindpath-0.1.0/blindpath/Util/action_parsing.py +36 -0
- blindpath-0.1.0/blindpath/Util/astar.py +102 -0
- blindpath-0.1.0/blindpath/Util/grid_formatting.py +138 -0
- blindpath-0.1.0/blindpath/__init__.py +53 -0
- blindpath-0.1.0/blindpath/agents/__init__.py +11 -0
- blindpath-0.1.0/blindpath/agents/base.py +39 -0
- blindpath-0.1.0/blindpath/agents/llm_agent.py +69 -0
- blindpath-0.1.0/blindpath/agents/pomcp_agent.py +300 -0
- blindpath-0.1.0/blindpath/agents/random_agent.py +38 -0
- blindpath-0.1.0/blindpath/core/__init__.py +18 -0
- blindpath-0.1.0/blindpath/core/env.py +284 -0
- blindpath-0.1.0/blindpath/core/grid.py +151 -0
- blindpath-0.1.0/blindpath/core/map_generation.py +221 -0
- blindpath-0.1.0/blindpath/eval/__init__.py +4 -0
- blindpath-0.1.0/blindpath/eval/benchmark_results.py +109 -0
- blindpath-0.1.0/blindpath/eval/episode_metrics.py +128 -0
- blindpath-0.1.0/blindpath/eval/metrics.py +11 -0
- blindpath-0.1.0/blindpath/session.py +249 -0
- blindpath-0.1.0/blindpath.egg-info/PKG-INFO +255 -0
- blindpath-0.1.0/blindpath.egg-info/SOURCES.txt +35 -0
- blindpath-0.1.0/blindpath.egg-info/dependency_links.txt +1 -0
- blindpath-0.1.0/blindpath.egg-info/requires.txt +10 -0
- blindpath-0.1.0/blindpath.egg-info/top_level.txt +1 -0
- blindpath-0.1.0/pyproject.toml +40 -0
- blindpath-0.1.0/setup.cfg +4 -0
blindpath-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 prooheckcp
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
blindpath-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: blindpath
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: BlindPath: An LLM-Navigation Benchmark for Partial-Observation Grid Worlds
|
|
5
|
+
Author-email: prooheckcp <vasco.soares.2001@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/prooheckcp/BlindPath
|
|
8
|
+
Project-URL: Repository, https://github.com/prooheckcp/BlindPath
|
|
9
|
+
Project-URL: Issues, https://github.com/prooheckcp/BlindPath/issues
|
|
10
|
+
Keywords: llm,benchmark,navigation,grid-world,partial-observability,game-ai
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: numpy>=1.26.0
|
|
22
|
+
Provides-Extra: llm
|
|
23
|
+
Requires-Dist: anthropic>=0.25.0; extra == "llm"
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
27
|
+
Requires-Dist: build; extra == "dev"
|
|
28
|
+
Requires-Dist: twine; extra == "dev"
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# BlindPath 🗺️
|
|
32
|
+
|
|
33
|
+
**An LLM-Navigation Benchmark for Partial-Observation Grid Worlds**
|
|
34
|
+
|
|
35
|
+
BlindPath tests whether LLM agents can navigate a 2D grid world under
|
|
36
|
+
**partial observability** — a key challenge in real game AI that most
|
|
37
|
+
existing benchmarks ignore.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Key Distinctions
|
|
42
|
+
|
|
43
|
+
| Property | BlindPath | Typical Benchmarks |
|
|
44
|
+
|---|---|---|
|
|
45
|
+
| Targets | Game AI agents | General reasoning |
|
|
46
|
+
| Observability | Partial (vision window) | Full global state |
|
|
47
|
+
| Input format | Non-rigid (hybrid prompting) | Fixed natural language |
|
|
48
|
+
| Task complexity | Long-horizon planning | Short single-step |
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Benchmark Metrics
|
|
53
|
+
|
|
54
|
+
| Metric | Formula | Measures |
|
|
55
|
+
|---|---|---|
|
|
56
|
+
| **Success Rate** | `Successes / Episodes × 100%` | Task completion |
|
|
57
|
+
| **Optimal Rate** | `OptimalPathLen / LegalActions × 100%` | Navigation efficiency |
|
|
58
|
+
| **Feasible Rate** | `LegalActions / TotalActions × 100%` | Spatial rule adherence |
|
|
59
|
+
| **Compliance Ratio** | `TotalActions / Iterations × 100%` | Output format compliance |
|
|
60
|
+
| **Exploration Ratio** | `SeenTiles / AccessibleVisionTiles × 100%` | Coverage |
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Installation
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install -e .
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Or install dependencies directly:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install anthropic numpy
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Quick Start
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from blindpath import BlindPathEnv, EnvConfig, RandomAgent, Session
|
|
82
|
+
|
|
83
|
+
config = EnvConfig(
|
|
84
|
+
seed=42,
|
|
85
|
+
env_size=(20, 20),
|
|
86
|
+
num_goals=1,
|
|
87
|
+
obstacle_count=20,
|
|
88
|
+
vision_size=7, # agent sees a 7×7 window around itself
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
agent = RandomAgent(seed=42)
|
|
92
|
+
session = Session(agent, config, verbose=True)
|
|
93
|
+
|
|
94
|
+
# Single episode
|
|
95
|
+
metrics = session.run()
|
|
96
|
+
print(metrics.summary())
|
|
97
|
+
|
|
98
|
+
# Multi-episode benchmark
|
|
99
|
+
results = session.run_benchmark(num_episodes=20, base_seed=42)
|
|
100
|
+
print(results.summary())
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Hybrid Prompting Architecture
|
|
106
|
+
|
|
107
|
+
Developers supply a **custom_prompt** that is combined with BlindPath's
|
|
108
|
+
pre-defined static rules before being fed to the LLM. This gives full
|
|
109
|
+
flexibility over how state information is presented:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from blindpath import LLMAgent, Session, BlindPathEnv, StepResult
|
|
113
|
+
|
|
114
|
+
class MyAgent(LLMAgent):
|
|
115
|
+
def _prompt(self, prompt: str) -> str:
|
|
116
|
+
# Example: call your LLM's API here
|
|
117
|
+
# return my_llm_client.generate(prompt)
|
|
118
|
+
return '{"action": "Up"}'
|
|
119
|
+
|
|
120
|
+
def my_prompt(env: BlindPathEnv, result: StepResult) -> str:
|
|
121
|
+
pos = result.agent_position
|
|
122
|
+
return (
|
|
123
|
+
f"You are at column {pos.x}, row {pos.y}. "
|
|
124
|
+
f"Goals remaining: {result.goals_remaining}. "
|
|
125
|
+
"Move towards any visible 'G' tile."
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
agent = MyAgent(prompt_builder=my_prompt)
|
|
129
|
+
session = Session(agent, config)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Action Space
|
|
135
|
+
|
|
136
|
+
```json
|
|
137
|
+
{"action": "Up"}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
| Action | Effect |
|
|
141
|
+
|---|---|
|
|
142
|
+
| `"Up"` | Move agent by `(0, -1)` |
|
|
143
|
+
| `"Down"` | Move agent by `(0, +1)` |
|
|
144
|
+
| `"Left"` | Move agent by `(-1, 0)` |
|
|
145
|
+
| `"Right"` | Move agent by `(+1, 0)` |
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Grid Labels
|
|
150
|
+
|
|
151
|
+
| Symbol | Meaning |
|
|
152
|
+
|---|---|
|
|
153
|
+
| `A` | Agent |
|
|
154
|
+
| `E` | Empty |
|
|
155
|
+
| `O` | Obstacle (impassable) |
|
|
156
|
+
| `G` | Goal |
|
|
157
|
+
| `B` | Boundary (edge of map) |
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Hyperparameters
|
|
162
|
+
|
|
163
|
+
| Parameter | Type | Default | Notes |
|
|
164
|
+
|---|---|---|---|
|
|
165
|
+
| `seed` | `int \| None` | `None` | Falls back to system time; ensures reproducibility when set |
|
|
166
|
+
| `env_size` | `(int, int)` | `(40, 40)` | Minimum 5×5 |
|
|
167
|
+
| `num_goals` | `int` | `1` | ≥1 |
|
|
168
|
+
| `obstacle_count` | `int` | `0` | Number of obstacles |
|
|
169
|
+
| `vision_size` | `int` | `11` | Must be odd; ≥3 |
|
|
170
|
+
|
|
171
|
+
Each episode uses a **unique, procedurally generated map**. When running a
|
|
172
|
+
benchmark with `run_benchmark(num_episodes=N, base_seed=S)`, each episode
|
|
173
|
+
receives `seed = base_seed + i`, producing a different grid layout every time
|
|
174
|
+
while keeping the full run reproducible.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Running the Baselines
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
# Random agent (10 episodes)
|
|
182
|
+
python experiments/run_random.py --episodes 10 --seed 42
|
|
183
|
+
|
|
184
|
+
# POMCP agent (5 episodes)
|
|
185
|
+
python experiments/run_pomcp.py --episodes 5 --sims 300
|
|
186
|
+
|
|
187
|
+
# LLM agent (1 episode)
|
|
188
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
189
|
+
python experiments/run_llm.py --model claude-haiku-4-5 --verbose
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## Project Structure
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
blindpath/
|
|
198
|
+
├── Constants/
|
|
199
|
+
│ ├── env.py # Default/minimum config values
|
|
200
|
+
│ └── tiles.py # Tile label constants (A, E, O, G, B)
|
|
201
|
+
├── DataClasses/
|
|
202
|
+
│ ├── env_config.py # EnvConfig dataclass
|
|
203
|
+
│ ├── generated_map.py # GeneratedMap dataclass
|
|
204
|
+
│ ├── step_result.py # StepResult dataclass
|
|
205
|
+
│ └── vector2.py # Vector2 dataclass
|
|
206
|
+
├── Util/
|
|
207
|
+
│ ├── astar.py # A* pathfinding
|
|
208
|
+
│ └── grid_formatting.py # grid_to_ascii()
|
|
209
|
+
├── core/
|
|
210
|
+
│ ├── grid.py # Grid class
|
|
211
|
+
│ ├── map_generation.py # Procedural map generation
|
|
212
|
+
│ └── env.py # BlindPathEnv
|
|
213
|
+
├── agents/
|
|
214
|
+
│ ├── base.py # BaseAgent abstract class
|
|
215
|
+
│ ├── random_agent.py
|
|
216
|
+
│ ├── pomcp_agent.py
|
|
217
|
+
│ └── llm_agent.py
|
|
218
|
+
├── eval/
|
|
219
|
+
│ ├── episode_metrics.py # EpisodeMetrics dataclass
|
|
220
|
+
│ └── benchmark_results.py # BenchmarkResults dataclass
|
|
221
|
+
└── session.py # Session lifecycle + run_benchmark()
|
|
222
|
+
experiments/
|
|
223
|
+
├── arguments.py # Shared CLI argument parsing
|
|
224
|
+
├── run_random.py
|
|
225
|
+
├── run_pomcp.py
|
|
226
|
+
└── run_llm.py
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## Baselines
|
|
232
|
+
|
|
233
|
+
- **Random Agent** — Uniform random action selection. Lower bound.
|
|
234
|
+
- **POMCP Agent** — Partially Observable Monte-Carlo Planning with particle
|
|
235
|
+
filters. Traditional method baseline for partial-info navigation.
|
|
236
|
+
- **LLM Agent** — Claude via the Anthropic API. The primary evaluation target.
|
|
237
|
+
|
|
238
|
+
---
|
|
239
|
+
|
|
240
|
+
## Iteration Limit
|
|
241
|
+
|
|
242
|
+
```
|
|
243
|
+
max_iterations = OptimalPathLength × (full_area / vision_area) × 3
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Scales to allow for back-tracking proportional to how blind the agent is.
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
## Related Work
|
|
251
|
+
|
|
252
|
+
- [PPLN](https://arxiv.org/pdf/2310.03249) — Similar task, no partial observability
|
|
253
|
+
- [GWSOT](https://arxiv.org/pdf/2502.16690) — Spatial awareness probing, full state
|
|
254
|
+
- [GameTraversalBenchmark](https://arxiv.org/pdf/2410.07765) — 2D navigation, global state
|
|
255
|
+
- [GridRoute](https://arxiv.org/pdf/2505.24306) — Prompt engineering for pathfinding
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# BlindPath 🗺️
|
|
2
|
+
|
|
3
|
+
**An LLM-Navigation Benchmark for Partial-Observation Grid Worlds**
|
|
4
|
+
|
|
5
|
+
BlindPath tests whether LLM agents can navigate a 2D grid world under
|
|
6
|
+
**partial observability** — a key challenge in real game AI that most
|
|
7
|
+
existing benchmarks ignore.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Key Distinctions
|
|
12
|
+
|
|
13
|
+
| Property | BlindPath | Typical Benchmarks |
|
|
14
|
+
|---|---|---|
|
|
15
|
+
| Targets | Game AI agents | General reasoning |
|
|
16
|
+
| Observability | Partial (vision window) | Full global state |
|
|
17
|
+
| Input format | Non-rigid (hybrid prompting) | Fixed natural language |
|
|
18
|
+
| Task complexity | Long-horizon planning | Short single-step |
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Benchmark Metrics
|
|
23
|
+
|
|
24
|
+
| Metric | Formula | Measures |
|
|
25
|
+
|---|---|---|
|
|
26
|
+
| **Success Rate** | `Successes / Episodes × 100%` | Task completion |
|
|
27
|
+
| **Optimal Rate** | `OptimalPathLen / LegalActions × 100%` | Navigation efficiency |
|
|
28
|
+
| **Feasible Rate** | `LegalActions / TotalActions × 100%` | Spatial rule adherence |
|
|
29
|
+
| **Compliance Ratio** | `TotalActions / Iterations × 100%` | Output format compliance |
|
|
30
|
+
| **Exploration Ratio** | `SeenTiles / AccessibleVisionTiles × 100%` | Coverage |
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install -e .
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Or install dependencies directly:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install anthropic numpy
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from blindpath import BlindPathEnv, EnvConfig, RandomAgent, Session
|
|
52
|
+
|
|
53
|
+
config = EnvConfig(
|
|
54
|
+
seed=42,
|
|
55
|
+
env_size=(20, 20),
|
|
56
|
+
num_goals=1,
|
|
57
|
+
obstacle_count=20,
|
|
58
|
+
vision_size=7, # agent sees a 7×7 window around itself
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
agent = RandomAgent(seed=42)
|
|
62
|
+
session = Session(agent, config, verbose=True)
|
|
63
|
+
|
|
64
|
+
# Single episode
|
|
65
|
+
metrics = session.run()
|
|
66
|
+
print(metrics.summary())
|
|
67
|
+
|
|
68
|
+
# Multi-episode benchmark
|
|
69
|
+
results = session.run_benchmark(num_episodes=20, base_seed=42)
|
|
70
|
+
print(results.summary())
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Hybrid Prompting Architecture
|
|
76
|
+
|
|
77
|
+
Developers supply a **custom_prompt** that is combined with BlindPath's
|
|
78
|
+
pre-defined static rules before being fed to the LLM. This gives full
|
|
79
|
+
flexibility over how state information is presented:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from blindpath import LLMAgent, Session, BlindPathEnv, StepResult
|
|
83
|
+
|
|
84
|
+
class MyAgent(LLMAgent):
|
|
85
|
+
def _prompt(self, prompt: str) -> str:
|
|
86
|
+
# Example: call your LLM's API here
|
|
87
|
+
# return my_llm_client.generate(prompt)
|
|
88
|
+
return '{"action": "Up"}'
|
|
89
|
+
|
|
90
|
+
def my_prompt(env: BlindPathEnv, result: StepResult) -> str:
|
|
91
|
+
pos = result.agent_position
|
|
92
|
+
return (
|
|
93
|
+
f"You are at column {pos.x}, row {pos.y}. "
|
|
94
|
+
f"Goals remaining: {result.goals_remaining}. "
|
|
95
|
+
"Move towards any visible 'G' tile."
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
agent = MyAgent(prompt_builder=my_prompt)
|
|
99
|
+
session = Session(agent, config)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Action Space
|
|
105
|
+
|
|
106
|
+
```json
|
|
107
|
+
{"action": "Up"}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
| Action | Effect |
|
|
111
|
+
|---|---|
|
|
112
|
+
| `"Up"` | Move agent by `(0, -1)` |
|
|
113
|
+
| `"Down"` | Move agent by `(0, +1)` |
|
|
114
|
+
| `"Left"` | Move agent by `(-1, 0)` |
|
|
115
|
+
| `"Right"` | Move agent by `(+1, 0)` |
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Grid Labels
|
|
120
|
+
|
|
121
|
+
| Symbol | Meaning |
|
|
122
|
+
|---|---|
|
|
123
|
+
| `A` | Agent |
|
|
124
|
+
| `E` | Empty |
|
|
125
|
+
| `O` | Obstacle (impassable) |
|
|
126
|
+
| `G` | Goal |
|
|
127
|
+
| `B` | Boundary (edge of map) |
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Hyperparameters
|
|
132
|
+
|
|
133
|
+
| Parameter | Type | Default | Notes |
|
|
134
|
+
|---|---|---|---|
|
|
135
|
+
| `seed` | `int \| None` | `None` | Falls back to system time; ensures reproducibility when set |
|
|
136
|
+
| `env_size` | `(int, int)` | `(40, 40)` | Minimum 5×5 |
|
|
137
|
+
| `num_goals` | `int` | `1` | ≥1 |
|
|
138
|
+
| `obstacle_count` | `int` | `0` | Number of obstacles |
|
|
139
|
+
| `vision_size` | `int` | `11` | Must be odd; ≥3 |
|
|
140
|
+
|
|
141
|
+
Each episode uses a **unique, procedurally generated map**. When running a
|
|
142
|
+
benchmark with `run_benchmark(num_episodes=N, base_seed=S)`, each episode
|
|
143
|
+
receives `seed = base_seed + i`, producing a different grid layout every time
|
|
144
|
+
while keeping the full run reproducible.
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Running the Baselines
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# Random agent (10 episodes)
|
|
152
|
+
python experiments/run_random.py --episodes 10 --seed 42
|
|
153
|
+
|
|
154
|
+
# POMCP agent (5 episodes)
|
|
155
|
+
python experiments/run_pomcp.py --episodes 5 --sims 300
|
|
156
|
+
|
|
157
|
+
# LLM agent (1 episode)
|
|
158
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
159
|
+
python experiments/run_llm.py --model claude-haiku-4-5 --verbose
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Project Structure
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
blindpath/
|
|
168
|
+
├── Constants/
|
|
169
|
+
│ ├── env.py # Default/minimum config values
|
|
170
|
+
│ └── tiles.py # Tile label constants (A, E, O, G, B)
|
|
171
|
+
├── DataClasses/
|
|
172
|
+
│ ├── env_config.py # EnvConfig dataclass
|
|
173
|
+
│ ├── generated_map.py # GeneratedMap dataclass
|
|
174
|
+
│ ├── step_result.py # StepResult dataclass
|
|
175
|
+
│ └── vector2.py # Vector2 dataclass
|
|
176
|
+
├── Util/
|
|
177
|
+
│ ├── astar.py # A* pathfinding
|
|
178
|
+
│ └── grid_formatting.py # grid_to_ascii()
|
|
179
|
+
├── core/
|
|
180
|
+
│ ├── grid.py # Grid class
|
|
181
|
+
│ ├── map_generation.py # Procedural map generation
|
|
182
|
+
│ └── env.py # BlindPathEnv
|
|
183
|
+
├── agents/
|
|
184
|
+
│ ├── base.py # BaseAgent abstract class
|
|
185
|
+
│ ├── random_agent.py
|
|
186
|
+
│ ├── pomcp_agent.py
|
|
187
|
+
│ └── llm_agent.py
|
|
188
|
+
├── eval/
|
|
189
|
+
│ ├── episode_metrics.py # EpisodeMetrics dataclass
|
|
190
|
+
│ └── benchmark_results.py # BenchmarkResults dataclass
|
|
191
|
+
└── session.py # Session lifecycle + run_benchmark()
|
|
192
|
+
experiments/
|
|
193
|
+
├── arguments.py # Shared CLI argument parsing
|
|
194
|
+
├── run_random.py
|
|
195
|
+
├── run_pomcp.py
|
|
196
|
+
└── run_llm.py
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Baselines
|
|
202
|
+
|
|
203
|
+
- **Random Agent** — Uniform random action selection. Lower bound.
|
|
204
|
+
- **POMCP Agent** — Partially Observable Monte-Carlo Planning with particle
|
|
205
|
+
filters. Traditional method baseline for partial-info navigation.
|
|
206
|
+
- **LLM Agent** — Claude via the Anthropic API. The primary evaluation target.
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## Iteration Limit
|
|
211
|
+
|
|
212
|
+
```
|
|
213
|
+
max_iterations = OptimalPathLength × (full_area / vision_area) × 3
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Scales to allow for back-tracking proportional to how blind the agent is.
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Related Work
|
|
221
|
+
|
|
222
|
+
- [PPLN](https://arxiv.org/pdf/2310.03249) — Similar task, no partial observability
|
|
223
|
+
- [GWSOT](https://arxiv.org/pdf/2502.16690) — Spatial awareness probing, full state
|
|
224
|
+
- [GameTraversalBenchmark](https://arxiv.org/pdf/2410.07765) — 2D navigation, global state
|
|
225
|
+
- [GridRoute](https://arxiv.org/pdf/2505.24306) — Prompt engineering for pathfinding
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from blindpath.Constants.tiles import (
|
|
2
|
+
AGENT,
|
|
3
|
+
EMPTY,
|
|
4
|
+
OBSTACLE,
|
|
5
|
+
GOAL,
|
|
6
|
+
BOUNDARY,
|
|
7
|
+
TILE_WORDS,
|
|
8
|
+
)
|
|
9
|
+
from blindpath.Constants.env import (
|
|
10
|
+
DEFAULT_SEED,
|
|
11
|
+
DEFAULT_ENV_SIZE,
|
|
12
|
+
DEFAULT_NUM_GOALS,
|
|
13
|
+
DEFAULT_OBSTACLE_COUNT,
|
|
14
|
+
DEFAULT_VISION_SIZE,
|
|
15
|
+
MIN_ENV_DIMENSION,
|
|
16
|
+
MIN_VISION_SIZE,
|
|
17
|
+
MIN_NUM_GOALS,
|
|
18
|
+
MIN_OBSTACLE_COUNT,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"DEFAULT_SEED",
|
|
23
|
+
"DEFAULT_ENV_SIZE",
|
|
24
|
+
"DEFAULT_NUM_GOALS",
|
|
25
|
+
"DEFAULT_OBSTACLE_COUNT",
|
|
26
|
+
"DEFAULT_VISION_SIZE",
|
|
27
|
+
"MIN_ENV_DIMENSION",
|
|
28
|
+
"MIN_VISION_SIZE",
|
|
29
|
+
"MIN_NUM_GOALS",
|
|
30
|
+
"MIN_OBSTACLE_COUNT",
|
|
31
|
+
"AGENT",
|
|
32
|
+
"EMPTY",
|
|
33
|
+
"OBSTACLE",
|
|
34
|
+
"GOAL",
|
|
35
|
+
"BOUNDARY",
|
|
36
|
+
"TILE_WORDS",
|
|
37
|
+
]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Constants for BlindPath environment configuration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Tuple
|
|
6
|
+
|
|
7
|
+
# ---------------------------------------------------------------------------
|
|
8
|
+
# Default values
|
|
9
|
+
# ---------------------------------------------------------------------------
|
|
10
|
+
|
|
11
|
+
DEFAULT_SEED: None = None
|
|
12
|
+
DEFAULT_ENV_SIZE: Tuple[int, int] = (40, 40)
|
|
13
|
+
DEFAULT_NUM_GOALS: int = 1
|
|
14
|
+
DEFAULT_OBSTACLE_COUNT: int = 0
|
|
15
|
+
DEFAULT_VISION_SIZE: int = 11
|
|
16
|
+
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
# Minimum allowed values
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
MIN_ENV_DIMENSION: int = 5
|
|
22
|
+
MIN_VISION_SIZE: int = 3
|
|
23
|
+
MIN_NUM_GOALS: int = 1
|
|
24
|
+
MIN_OBSTACLE_COUNT: int = 0
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
blindpath/Constants/tiles.py
|
|
3
|
+
|
|
4
|
+
Single-character tile labels used throughout the grid system.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
AGENT = "A"
|
|
8
|
+
EMPTY = "E"
|
|
9
|
+
OBSTACLE = "O"
|
|
10
|
+
GOAL = "G"
|
|
11
|
+
BOUNDARY = "B"
|
|
12
|
+
|
|
13
|
+
TILE_WORDS: dict[str, str] = {
|
|
14
|
+
AGENT: "Agent",
|
|
15
|
+
EMPTY: "Empty",
|
|
16
|
+
OBSTACLE: "Obstacle",
|
|
17
|
+
GOAL: "Goal",
|
|
18
|
+
BOUNDARY: "Boundary",
|
|
19
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
from blindpath.DataClasses.vector2 import Vector2
|
|
2
|
+
from blindpath.core.grid import Grid
|
|
3
|
+
from blindpath.DataClasses.step_result import StepResult
|
|
4
|
+
from blindpath.DataClasses.env_config import EnvConfig
|
|
5
|
+
from blindpath.DataClasses.generated_map import GeneratedMap
|
|
6
|
+
|
|
7
|
+
__all__ = ["Vector2", "Grid", "StepResult", "EnvConfig", "GeneratedMap"]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional, Tuple
|
|
3
|
+
|
|
4
|
+
from blindpath.Constants.env import (
|
|
5
|
+
DEFAULT_SEED,
|
|
6
|
+
DEFAULT_ENV_SIZE,
|
|
7
|
+
DEFAULT_NUM_GOALS,
|
|
8
|
+
DEFAULT_OBSTACLE_COUNT,
|
|
9
|
+
DEFAULT_VISION_SIZE,
|
|
10
|
+
MIN_ENV_DIMENSION,
|
|
11
|
+
MIN_VISION_SIZE,
|
|
12
|
+
MIN_NUM_GOALS,
|
|
13
|
+
MIN_OBSTACLE_COUNT,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class EnvConfig:
|
|
19
|
+
"""All hyperparameters for one BlindPath environment instance."""
|
|
20
|
+
|
|
21
|
+
seed: Optional[int] = DEFAULT_SEED
|
|
22
|
+
env_size: Tuple[int, int] = DEFAULT_ENV_SIZE
|
|
23
|
+
num_goals: int = DEFAULT_NUM_GOALS
|
|
24
|
+
obstacle_count: int = DEFAULT_OBSTACLE_COUNT
|
|
25
|
+
vision_size: int = DEFAULT_VISION_SIZE
|
|
26
|
+
|
|
27
|
+
def __post_init__(self) -> None:
|
|
28
|
+
rows, cols = self.env_size
|
|
29
|
+
if rows < MIN_ENV_DIMENSION or cols < MIN_ENV_DIMENSION:
|
|
30
|
+
raise ValueError(f"env_size must be at least {MIN_ENV_DIMENSION}x{MIN_ENV_DIMENSION}")
|
|
31
|
+
if self.vision_size < MIN_VISION_SIZE:
|
|
32
|
+
raise ValueError(f"vision_size must be at least {MIN_VISION_SIZE}")
|
|
33
|
+
if self.vision_size % 2 == 0:
|
|
34
|
+
raise ValueError("vision_size must be odd")
|
|
35
|
+
if self.vision_size > min(rows, cols):
|
|
36
|
+
raise ValueError("vision_size cannot exceed the environment size")
|
|
37
|
+
if self.num_goals < MIN_NUM_GOALS:
|
|
38
|
+
raise ValueError(f"num_goals must be >= {MIN_NUM_GOALS}")
|
|
39
|
+
if self.obstacle_count < MIN_OBSTACLE_COUNT:
|
|
40
|
+
raise ValueError(f"obstacle_count must be >= {MIN_OBSTACLE_COUNT}")
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from blindpath.DataClasses.vector2 import Vector2
|
|
5
|
+
from blindpath.core.grid import Grid
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class GeneratedMap:
|
|
10
|
+
grid: Grid
|
|
11
|
+
agent_start: Vector2
|
|
12
|
+
goals: List[Vector2]
|
|
13
|
+
optimal_path_len: int
|
|
14
|
+
accessible_tiles: int
|
|
15
|
+
accessible_vision_tiles: int
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from blindpath.DataClasses.vector2 import Vector2
|
|
4
|
+
from blindpath.core.grid import Grid
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class StepResult:
|
|
9
|
+
"""Returned by BlindPathEnv.step()."""
|
|
10
|
+
vision_grid: Grid # NxN vision window (LLM input)
|
|
11
|
+
full_grid: Grid # Full grid (debug only)
|
|
12
|
+
agent_position: Vector2
|
|
13
|
+
goals_remaining: int
|
|
14
|
+
done: bool # episode finished (success OR timeout)
|
|
15
|
+
success: bool # all goals reached
|
|
16
|
+
timed_out: bool
|
|
17
|
+
legal: bool # was the attempted action legal?
|
|
18
|
+
iteration: int
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class Vector2:
|
|
8
|
+
x: int # column
|
|
9
|
+
y: int # row (0 = top)
|
|
10
|
+
|
|
11
|
+
def __add__(self, other: Vector2) -> Vector2:
|
|
12
|
+
return Vector2(self.x + other.x, self.y + other.y)
|
|
13
|
+
|
|
14
|
+
def __eq__(self, other: object) -> bool:
|
|
15
|
+
if not isinstance(other, Vector2):
|
|
16
|
+
return NotImplemented
|
|
17
|
+
return self.x == other.x and self.y == other.y
|
|
18
|
+
|
|
19
|
+
def __hash__(self) -> int:
|
|
20
|
+
return hash((self.x, self.y))
|
|
21
|
+
|
|
22
|
+
def __repr__(self) -> str:
|
|
23
|
+
return f"Vector2({self.x}, {self.y})"
|
|
24
|
+
|
|
25
|
+
def manhattan(self, other: Vector2) -> int:
|
|
26
|
+
return abs(self.x - other.x) + abs(self.y - other.y)
|