hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -22
- hud/agents/__init__.py +13 -15
- hud/agents/base.py +599 -599
- hud/agents/claude.py +373 -373
- hud/agents/langchain.py +261 -250
- hud/agents/misc/__init__.py +7 -7
- hud/agents/misc/response_agent.py +82 -80
- hud/agents/openai.py +352 -352
- hud/agents/openai_chat_generic.py +154 -154
- hud/agents/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -742
- hud/agents/tests/test_claude.py +324 -324
- hud/agents/tests/test_client.py +363 -363
- hud/agents/tests/test_openai.py +237 -237
- hud/cli/__init__.py +617 -617
- hud/cli/__main__.py +8 -8
- hud/cli/analyze.py +371 -371
- hud/cli/analyze_metadata.py +230 -230
- hud/cli/build.py +498 -427
- hud/cli/clone.py +185 -185
- hud/cli/cursor.py +92 -92
- hud/cli/debug.py +392 -392
- hud/cli/docker_utils.py +83 -83
- hud/cli/init.py +280 -281
- hud/cli/interactive.py +353 -353
- hud/cli/mcp_server.py +764 -756
- hud/cli/pull.py +330 -336
- hud/cli/push.py +404 -370
- hud/cli/remote_runner.py +311 -311
- hud/cli/runner.py +160 -160
- hud/cli/tests/__init__.py +3 -3
- hud/cli/tests/test_analyze.py +284 -284
- hud/cli/tests/test_cli_init.py +265 -265
- hud/cli/tests/test_cli_main.py +27 -27
- hud/cli/tests/test_clone.py +142 -142
- hud/cli/tests/test_cursor.py +253 -253
- hud/cli/tests/test_debug.py +453 -453
- hud/cli/tests/test_mcp_server.py +139 -139
- hud/cli/tests/test_utils.py +388 -388
- hud/cli/utils.py +263 -263
- hud/clients/README.md +143 -143
- hud/clients/__init__.py +16 -16
- hud/clients/base.py +378 -379
- hud/clients/fastmcp.py +222 -222
- hud/clients/mcp_use.py +298 -278
- hud/clients/tests/__init__.py +1 -1
- hud/clients/tests/test_client_integration.py +111 -111
- hud/clients/tests/test_fastmcp.py +342 -342
- hud/clients/tests/test_protocol.py +188 -188
- hud/clients/utils/__init__.py +1 -1
- hud/clients/utils/retry_transport.py +160 -160
- hud/datasets.py +327 -322
- hud/misc/__init__.py +1 -1
- hud/misc/claude_plays_pokemon.py +292 -292
- hud/otel/__init__.py +35 -35
- hud/otel/collector.py +142 -142
- hud/otel/config.py +164 -164
- hud/otel/context.py +536 -536
- hud/otel/exporters.py +366 -366
- hud/otel/instrumentation.py +97 -97
- hud/otel/processors.py +118 -118
- hud/otel/tests/__init__.py +1 -1
- hud/otel/tests/test_processors.py +197 -197
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -114
- hud/server/helper/__init__.py +5 -5
- hud/server/low_level.py +132 -132
- hud/server/server.py +170 -166
- hud/server/tests/__init__.py +3 -3
- hud/settings.py +73 -73
- hud/shared/__init__.py +5 -5
- hud/shared/exceptions.py +180 -180
- hud/shared/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -157
- hud/shared/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -25
- hud/telemetry/instrument.py +379 -379
- hud/telemetry/job.py +309 -309
- hud/telemetry/replay.py +74 -74
- hud/telemetry/trace.py +83 -83
- hud/tools/__init__.py +33 -33
- hud/tools/base.py +365 -365
- hud/tools/bash.py +161 -161
- hud/tools/computer/__init__.py +15 -15
- hud/tools/computer/anthropic.py +437 -437
- hud/tools/computer/hud.py +376 -376
- hud/tools/computer/openai.py +295 -295
- hud/tools/computer/settings.py +82 -82
- hud/tools/edit.py +314 -314
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -539
- hud/tools/executors/pyautogui.py +621 -621
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -511
- hud/tools/playwright.py +412 -412
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -282
- hud/tools/tests/test_bash.py +158 -158
- hud/tools/tests/test_bash_extended.py +197 -197
- hud/tools/tests/test_computer.py +425 -425
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -259
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -145
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -72
- hud/tools/utils.py +50 -50
- hud/types.py +136 -136
- hud/utils/__init__.py +10 -10
- hud/utils/async_utils.py +65 -65
- hud/utils/design.py +236 -168
- hud/utils/mcp.py +55 -55
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -173
- hud/utils/tests/test_init.py +17 -17
- hud/utils/tests/test_progress.py +261 -261
- hud/utils/tests/test_telemetry.py +82 -82
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
- hud_python-0.4.3.dist-info/RECORD +131 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
- hud/agents/art.py +0 -101
- hud_python-0.4.1.dist-info/RECORD +0 -132
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/misc/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"""Miscellaneous utilities for HUD SDK."""
|
|
1
|
+
"""Miscellaneous utilities for HUD SDK."""
|
hud/misc/claude_plays_pokemon.py
CHANGED
|
@@ -1,292 +1,292 @@
|
|
|
1
|
-
# pyright: reportGeneralTypeIssues=false
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
import json
|
|
5
|
-
import logging
|
|
6
|
-
from typing import TYPE_CHECKING, Any, cast
|
|
7
|
-
|
|
8
|
-
from anthropic import AsyncAnthropic
|
|
9
|
-
|
|
10
|
-
from hud.adapters import Adapter
|
|
11
|
-
from hud.adapters.common.types import CLA
|
|
12
|
-
|
|
13
|
-
# Update import to current API; if this script is legacy, keep it optional
|
|
14
|
-
try:
|
|
15
|
-
from hud.agents import MCPAgent as Agent # type: ignore[assignment]
|
|
16
|
-
except Exception: # pragma: no cover - optional example script
|
|
17
|
-
from hud.agents import MCPAgent as Agent # fallback
|
|
18
|
-
from hud.settings import settings
|
|
19
|
-
|
|
20
|
-
if TYPE_CHECKING:
|
|
21
|
-
from anthropic.types.beta import (
|
|
22
|
-
BetaImageBlockParam,
|
|
23
|
-
BetaMessageParam,
|
|
24
|
-
BetaTextBlockParam,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
from hud.env.environment import Observation
|
|
28
|
-
|
|
29
|
-
logger = logging.getLogger(__name__)
|
|
30
|
-
|
|
31
|
-
# Constants
|
|
32
|
-
DEFAULT_MODEL = "claude-3-7-sonnet-20250219"
|
|
33
|
-
DEFAULT_MAX_TOKENS = 4096
|
|
34
|
-
DEFAULT_MAX_ITERATIONS = 10
|
|
35
|
-
DEFAULT_TEMPERATURE = 0.7
|
|
36
|
-
DEFAULT_MAX_MESSAGE_MEMORY = 20
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def generate_system_prompt(game_name: str) -> str:
|
|
40
|
-
"""Generate the system prompt for the AI agent.
|
|
41
|
-
|
|
42
|
-
Args:
|
|
43
|
-
game_name: Name of the game being played
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
str: The system prompt for the AI agent
|
|
47
|
-
"""
|
|
48
|
-
return """You are a specialized AI assistant designed to play Pokémon games via screenshot analysis and text instructions. Your task is to understand the current game state from visual input, determine appropriate actions, and respond with structured outputs that control the game.
|
|
49
|
-
|
|
50
|
-
For each turn, you will receive:
|
|
51
|
-
1. A screenshot of the current game state
|
|
52
|
-
2. Contextual information about the game progress, recent events, and objectives
|
|
53
|
-
|
|
54
|
-
Based on this information, you must analyze the situation, determine the best course of action, and provide a structured JSON response.
|
|
55
|
-
|
|
56
|
-
## Response Format
|
|
57
|
-
Your response MUST follow this exact JSON format with no additional markers, tags, or block delimiters:
|
|
58
|
-
|
|
59
|
-
{
|
|
60
|
-
"analysis": "Brief analysis of the current game situation, visible UI elements, and important context (1-3 sentences)",
|
|
61
|
-
"current_objective": "The immediate goal based on the game state (single sentence)",
|
|
62
|
-
"reasoning": "Step-by-step logic explaining your chosen action sequence (2-4 sentences)",
|
|
63
|
-
"progress_assessment": "Evaluation of whether previous action(s) achieved their intended goal and why/why not (1-2 sentences)",
|
|
64
|
-
"actions": [
|
|
65
|
-
{
|
|
66
|
-
"type": "press",
|
|
67
|
-
"keys": ["up"|"down"|"left"|"right"|"a"|"b"|"start"|"select"|"pause"]
|
|
68
|
-
},
|
|
69
|
-
{
|
|
70
|
-
"type": "wait",
|
|
71
|
-
"time": milliseconds_to_wait
|
|
72
|
-
}
|
|
73
|
-
]
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
IMPORTANT: Do not include any conversation markers like <<ASSISTANT_CONVERSATION_START>> or <<ASSISTANT_CONVERSATION_END>> around your response. Provide only the clean JSON object.
|
|
77
|
-
|
|
78
|
-
## Action Types
|
|
79
|
-
- Button presses: {"type": "press", "keys": ["button_name"]} - Valid buttons are: up, down, left, right, a, b, start, select, pause
|
|
80
|
-
- Wait for processing: {"type": "wait", "time": milliseconds}
|
|
81
|
-
|
|
82
|
-
## Important Rules
|
|
83
|
-
1. Never use "wait" commands while the game is paused. The game state will not change while paused, so waiting is ineffective.
|
|
84
|
-
2. If you detect the game is paused, your next action should be to unpause by using {"type": "press", "keys": ["pause"]} before attempting other actions.
|
|
85
|
-
3. Maintain awareness of whether the game is in a paused state based on visual cues in the screenshot.
|
|
86
|
-
|
|
87
|
-
## Game Play Guidelines
|
|
88
|
-
1. **Navigation**: Use directional buttons to move the character or navigate menus
|
|
89
|
-
2. **Interaction**: Use 'a' to confirm selections and interact with objects/NPCs, 'b' to cancel or exit menus
|
|
90
|
-
3. **Menu Access**: Use 'start' to access the game menu
|
|
91
|
-
4. **Battle Strategy**: Analyze Pokémon types, moves, and stats to make optimal battle decisions
|
|
92
|
-
5. **Progressive Play**: Work toward completing the current objective while being mindful of longer-term goals like leveling Pokémon, collecting badges, and advancing the story
|
|
93
|
-
6. **Resource Management**: Monitor and manage HP, PP, items, and Pokéballs effectively
|
|
94
|
-
7. **Memory**: Maintain awareness of the game history and your previous actions to avoid repetitive behaviors
|
|
95
|
-
|
|
96
|
-
Always provide thoughtful analysis and clear reasoning for your decisions. If you're uncertain about the best course of action, prioritize safe moves that gather more information.
|
|
97
|
-
""" # noqa: E501
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def extract_action_from_response_block(block: dict[str, Any]) -> list[dict[str, Any]]:
|
|
101
|
-
"""Extract actions from a response block.
|
|
102
|
-
|
|
103
|
-
Args:
|
|
104
|
-
block: The response block containing actions
|
|
105
|
-
|
|
106
|
-
Returns:
|
|
107
|
-
list[dict[str, Any]]: List of actions extracted from the block
|
|
108
|
-
"""
|
|
109
|
-
if "actions" in block:
|
|
110
|
-
actions = block["actions"]
|
|
111
|
-
if isinstance(actions, list):
|
|
112
|
-
return actions
|
|
113
|
-
return []
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def extract_json_from_response(response: str) -> str:
|
|
117
|
-
"""Extract JSON from a response string.
|
|
118
|
-
|
|
119
|
-
Args:
|
|
120
|
-
response: The response string containing JSON
|
|
121
|
-
|
|
122
|
-
Returns:
|
|
123
|
-
str: The extracted JSON string
|
|
124
|
-
"""
|
|
125
|
-
# Try to find JSON block with markdown code block markers
|
|
126
|
-
start = response.find("```json")
|
|
127
|
-
end = response.rfind("```")
|
|
128
|
-
if start != -1 and end != -1:
|
|
129
|
-
start += len("```json")
|
|
130
|
-
return response[start:end].strip()
|
|
131
|
-
|
|
132
|
-
# Try to find JSON object directly
|
|
133
|
-
start = response.find("{")
|
|
134
|
-
end = response.rfind("}")
|
|
135
|
-
if start != -1 and end != -1:
|
|
136
|
-
return response[start : end + 1].strip()
|
|
137
|
-
|
|
138
|
-
return response.strip()
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
class ClaudePlaysPokemon(Agent[AsyncAnthropic, CLA]):
|
|
142
|
-
"""AI agent that plays Pokémon games using Claude."""
|
|
143
|
-
|
|
144
|
-
def __init__(
|
|
145
|
-
self,
|
|
146
|
-
client: AsyncAnthropic | None = None,
|
|
147
|
-
adapter: Adapter | None = None,
|
|
148
|
-
model: str = DEFAULT_MODEL,
|
|
149
|
-
max_tokens: int = DEFAULT_MAX_TOKENS,
|
|
150
|
-
max_iterations: int = DEFAULT_MAX_ITERATIONS,
|
|
151
|
-
temperature: float = DEFAULT_TEMPERATURE,
|
|
152
|
-
max_message_memory: int = DEFAULT_MAX_MESSAGE_MEMORY,
|
|
153
|
-
) -> None:
|
|
154
|
-
"""Initialize the Claude Plays Pokémon agent.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
client: Anthropic API client
|
|
158
|
-
adapter: Game adapter
|
|
159
|
-
model: Claude model to use
|
|
160
|
-
max_tokens: Maximum tokens for response
|
|
161
|
-
max_iterations: Maximum number of iterations
|
|
162
|
-
temperature: Response temperature
|
|
163
|
-
max_message_memory: Maximum number of messages to remember
|
|
164
|
-
|
|
165
|
-
Raises:
|
|
166
|
-
ValueError: If API key is not provided
|
|
167
|
-
"""
|
|
168
|
-
if client is None:
|
|
169
|
-
api_key = settings.anthropic_api_key
|
|
170
|
-
if not api_key:
|
|
171
|
-
raise ValueError("Anthropic API key is required")
|
|
172
|
-
client = AsyncAnthropic(api_key=api_key)
|
|
173
|
-
|
|
174
|
-
if adapter is None:
|
|
175
|
-
adapter = Adapter()
|
|
176
|
-
|
|
177
|
-
super().__init__(
|
|
178
|
-
client=client,
|
|
179
|
-
adapter=adapter,
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
self.model = model
|
|
183
|
-
self.max_tokens = max_tokens
|
|
184
|
-
self.max_iterations = max_iterations
|
|
185
|
-
self.temperature = temperature
|
|
186
|
-
self.max_message_memory = max_message_memory
|
|
187
|
-
|
|
188
|
-
self.system_prompts: list[BetaMessageParam] = [
|
|
189
|
-
{
|
|
190
|
-
"role": "assistant",
|
|
191
|
-
"content": generate_system_prompt("Pokemon Red"),
|
|
192
|
-
}
|
|
193
|
-
]
|
|
194
|
-
|
|
195
|
-
self.messages: list[BetaMessageParam] = []
|
|
196
|
-
|
|
197
|
-
async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
|
|
198
|
-
"""Fetch a response from Claude based on the current observation.
|
|
199
|
-
|
|
200
|
-
Args:
|
|
201
|
-
observation: The current game observation
|
|
202
|
-
|
|
203
|
-
Returns:
|
|
204
|
-
tuple[list[dict[str, Any]], bool, list[LogType] | None]: List of actions, whether the game is done, and a list of strings or dictionaries of logs.
|
|
205
|
-
|
|
206
|
-
Raises:
|
|
207
|
-
ValueError: If client is not initialized
|
|
208
|
-
""" # noqa: E501
|
|
209
|
-
if not self.client:
|
|
210
|
-
raise ValueError("Client is not initialized")
|
|
211
|
-
|
|
212
|
-
user_content: list[BetaTextBlockParam | BetaImageBlockParam] = []
|
|
213
|
-
|
|
214
|
-
if observation.text:
|
|
215
|
-
user_content.append(
|
|
216
|
-
{
|
|
217
|
-
"type": "text",
|
|
218
|
-
"text": observation.text,
|
|
219
|
-
}
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
if observation.screenshot:
|
|
223
|
-
logger.debug("Processing screenshot data")
|
|
224
|
-
user_content.append(
|
|
225
|
-
{
|
|
226
|
-
"type": "image",
|
|
227
|
-
"source": {
|
|
228
|
-
"type": "base64",
|
|
229
|
-
"media_type": "image/png",
|
|
230
|
-
"data": observation.screenshot,
|
|
231
|
-
},
|
|
232
|
-
}
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
self.messages.append(
|
|
236
|
-
{
|
|
237
|
-
"role": "user",
|
|
238
|
-
"content": user_content,
|
|
239
|
-
}
|
|
240
|
-
)
|
|
241
|
-
|
|
242
|
-
logger.debug(
|
|
243
|
-
"Sending messages to Claude", extra={"messages": self.system_prompts + self.messages}
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
response = await self.client.beta.messages.create(
|
|
247
|
-
model=self.model,
|
|
248
|
-
messages=self.system_prompts + self.messages,
|
|
249
|
-
temperature=self.temperature,
|
|
250
|
-
max_tokens=self.max_tokens,
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
response_content = response.content
|
|
254
|
-
self.messages.append(
|
|
255
|
-
cast(
|
|
256
|
-
"BetaMessageParam",
|
|
257
|
-
{
|
|
258
|
-
"role": "user",
|
|
259
|
-
"content": response_content,
|
|
260
|
-
},
|
|
261
|
-
)
|
|
262
|
-
)
|
|
263
|
-
|
|
264
|
-
# Maintain message memory limit
|
|
265
|
-
while len(self.messages) > self.max_message_memory:
|
|
266
|
-
self.messages.pop(0)
|
|
267
|
-
|
|
268
|
-
action_list: list[dict[str, Any]] = []
|
|
269
|
-
|
|
270
|
-
# Parse response content to extract actions
|
|
271
|
-
for block in response_content:
|
|
272
|
-
if block.type == "text":
|
|
273
|
-
text_json = extract_json_from_response(block.text)
|
|
274
|
-
try:
|
|
275
|
-
text = json.loads(text_json)
|
|
276
|
-
if not isinstance(text, dict):
|
|
277
|
-
logger.error("Invalid response format", extra={"text": text})
|
|
278
|
-
raise ValueError("Response is not a dictionary")
|
|
279
|
-
|
|
280
|
-
action_list.extend(extract_action_from_response_block(text))
|
|
281
|
-
|
|
282
|
-
except json.JSONDecodeError as e:
|
|
283
|
-
logger.error(
|
|
284
|
-
"Failed to parse response", extra={"error": str(e), "text": text_json}
|
|
285
|
-
)
|
|
286
|
-
|
|
287
|
-
else:
|
|
288
|
-
logger.error("Unexpected block type", extra={"type": type(block)})
|
|
289
|
-
|
|
290
|
-
logger.debug("Extracted actions", extra={"actions": action_list})
|
|
291
|
-
|
|
292
|
-
return action_list, False
|
|
1
|
+
# pyright: reportGeneralTypeIssues=false
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
7
|
+
|
|
8
|
+
from anthropic import AsyncAnthropic
|
|
9
|
+
|
|
10
|
+
from hud.adapters import Adapter
|
|
11
|
+
from hud.adapters.common.types import CLA
|
|
12
|
+
|
|
13
|
+
# Update import to current API; if this script is legacy, keep it optional
|
|
14
|
+
try:
|
|
15
|
+
from hud.agents import MCPAgent as Agent # type: ignore[assignment]
|
|
16
|
+
except Exception: # pragma: no cover - optional example script
|
|
17
|
+
from hud.agents import MCPAgent as Agent # fallback
|
|
18
|
+
from hud.settings import settings
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from anthropic.types.beta import (
|
|
22
|
+
BetaImageBlockParam,
|
|
23
|
+
BetaMessageParam,
|
|
24
|
+
BetaTextBlockParam,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from hud.env.environment import Observation
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
# Constants
|
|
32
|
+
DEFAULT_MODEL = "claude-3-7-sonnet-20250219"
|
|
33
|
+
DEFAULT_MAX_TOKENS = 4096
|
|
34
|
+
DEFAULT_MAX_ITERATIONS = 10
|
|
35
|
+
DEFAULT_TEMPERATURE = 0.7
|
|
36
|
+
DEFAULT_MAX_MESSAGE_MEMORY = 20
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def generate_system_prompt(game_name: str) -> str:
|
|
40
|
+
"""Generate the system prompt for the AI agent.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
game_name: Name of the game being played
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
str: The system prompt for the AI agent
|
|
47
|
+
"""
|
|
48
|
+
return """You are a specialized AI assistant designed to play Pokémon games via screenshot analysis and text instructions. Your task is to understand the current game state from visual input, determine appropriate actions, and respond with structured outputs that control the game.
|
|
49
|
+
|
|
50
|
+
For each turn, you will receive:
|
|
51
|
+
1. A screenshot of the current game state
|
|
52
|
+
2. Contextual information about the game progress, recent events, and objectives
|
|
53
|
+
|
|
54
|
+
Based on this information, you must analyze the situation, determine the best course of action, and provide a structured JSON response.
|
|
55
|
+
|
|
56
|
+
## Response Format
|
|
57
|
+
Your response MUST follow this exact JSON format with no additional markers, tags, or block delimiters:
|
|
58
|
+
|
|
59
|
+
{
|
|
60
|
+
"analysis": "Brief analysis of the current game situation, visible UI elements, and important context (1-3 sentences)",
|
|
61
|
+
"current_objective": "The immediate goal based on the game state (single sentence)",
|
|
62
|
+
"reasoning": "Step-by-step logic explaining your chosen action sequence (2-4 sentences)",
|
|
63
|
+
"progress_assessment": "Evaluation of whether previous action(s) achieved their intended goal and why/why not (1-2 sentences)",
|
|
64
|
+
"actions": [
|
|
65
|
+
{
|
|
66
|
+
"type": "press",
|
|
67
|
+
"keys": ["up"|"down"|"left"|"right"|"a"|"b"|"start"|"select"|"pause"]
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"type": "wait",
|
|
71
|
+
"time": milliseconds_to_wait
|
|
72
|
+
}
|
|
73
|
+
]
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
IMPORTANT: Do not include any conversation markers like <<ASSISTANT_CONVERSATION_START>> or <<ASSISTANT_CONVERSATION_END>> around your response. Provide only the clean JSON object.
|
|
77
|
+
|
|
78
|
+
## Action Types
|
|
79
|
+
- Button presses: {"type": "press", "keys": ["button_name"]} - Valid buttons are: up, down, left, right, a, b, start, select, pause
|
|
80
|
+
- Wait for processing: {"type": "wait", "time": milliseconds}
|
|
81
|
+
|
|
82
|
+
## Important Rules
|
|
83
|
+
1. Never use "wait" commands while the game is paused. The game state will not change while paused, so waiting is ineffective.
|
|
84
|
+
2. If you detect the game is paused, your next action should be to unpause by using {"type": "press", "keys": ["pause"]} before attempting other actions.
|
|
85
|
+
3. Maintain awareness of whether the game is in a paused state based on visual cues in the screenshot.
|
|
86
|
+
|
|
87
|
+
## Game Play Guidelines
|
|
88
|
+
1. **Navigation**: Use directional buttons to move the character or navigate menus
|
|
89
|
+
2. **Interaction**: Use 'a' to confirm selections and interact with objects/NPCs, 'b' to cancel or exit menus
|
|
90
|
+
3. **Menu Access**: Use 'start' to access the game menu
|
|
91
|
+
4. **Battle Strategy**: Analyze Pokémon types, moves, and stats to make optimal battle decisions
|
|
92
|
+
5. **Progressive Play**: Work toward completing the current objective while being mindful of longer-term goals like leveling Pokémon, collecting badges, and advancing the story
|
|
93
|
+
6. **Resource Management**: Monitor and manage HP, PP, items, and Pokéballs effectively
|
|
94
|
+
7. **Memory**: Maintain awareness of the game history and your previous actions to avoid repetitive behaviors
|
|
95
|
+
|
|
96
|
+
Always provide thoughtful analysis and clear reasoning for your decisions. If you're uncertain about the best course of action, prioritize safe moves that gather more information.
|
|
97
|
+
""" # noqa: E501
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def extract_action_from_response_block(block: dict[str, Any]) -> list[dict[str, Any]]:
|
|
101
|
+
"""Extract actions from a response block.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
block: The response block containing actions
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
list[dict[str, Any]]: List of actions extracted from the block
|
|
108
|
+
"""
|
|
109
|
+
if "actions" in block:
|
|
110
|
+
actions = block["actions"]
|
|
111
|
+
if isinstance(actions, list):
|
|
112
|
+
return actions
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def extract_json_from_response(response: str) -> str:
|
|
117
|
+
"""Extract JSON from a response string.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
response: The response string containing JSON
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
str: The extracted JSON string
|
|
124
|
+
"""
|
|
125
|
+
# Try to find JSON block with markdown code block markers
|
|
126
|
+
start = response.find("```json")
|
|
127
|
+
end = response.rfind("```")
|
|
128
|
+
if start != -1 and end != -1:
|
|
129
|
+
start += len("```json")
|
|
130
|
+
return response[start:end].strip()
|
|
131
|
+
|
|
132
|
+
# Try to find JSON object directly
|
|
133
|
+
start = response.find("{")
|
|
134
|
+
end = response.rfind("}")
|
|
135
|
+
if start != -1 and end != -1:
|
|
136
|
+
return response[start : end + 1].strip()
|
|
137
|
+
|
|
138
|
+
return response.strip()
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class ClaudePlaysPokemon(Agent[AsyncAnthropic, CLA]):
|
|
142
|
+
"""AI agent that plays Pokémon games using Claude."""
|
|
143
|
+
|
|
144
|
+
def __init__(
|
|
145
|
+
self,
|
|
146
|
+
client: AsyncAnthropic | None = None,
|
|
147
|
+
adapter: Adapter | None = None,
|
|
148
|
+
model: str = DEFAULT_MODEL,
|
|
149
|
+
max_tokens: int = DEFAULT_MAX_TOKENS,
|
|
150
|
+
max_iterations: int = DEFAULT_MAX_ITERATIONS,
|
|
151
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
152
|
+
max_message_memory: int = DEFAULT_MAX_MESSAGE_MEMORY,
|
|
153
|
+
) -> None:
|
|
154
|
+
"""Initialize the Claude Plays Pokémon agent.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
client: Anthropic API client
|
|
158
|
+
adapter: Game adapter
|
|
159
|
+
model: Claude model to use
|
|
160
|
+
max_tokens: Maximum tokens for response
|
|
161
|
+
max_iterations: Maximum number of iterations
|
|
162
|
+
temperature: Response temperature
|
|
163
|
+
max_message_memory: Maximum number of messages to remember
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
ValueError: If API key is not provided
|
|
167
|
+
"""
|
|
168
|
+
if client is None:
|
|
169
|
+
api_key = settings.anthropic_api_key
|
|
170
|
+
if not api_key:
|
|
171
|
+
raise ValueError("Anthropic API key is required")
|
|
172
|
+
client = AsyncAnthropic(api_key=api_key)
|
|
173
|
+
|
|
174
|
+
if adapter is None:
|
|
175
|
+
adapter = Adapter()
|
|
176
|
+
|
|
177
|
+
super().__init__(
|
|
178
|
+
client=client,
|
|
179
|
+
adapter=adapter,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
self.model = model
|
|
183
|
+
self.max_tokens = max_tokens
|
|
184
|
+
self.max_iterations = max_iterations
|
|
185
|
+
self.temperature = temperature
|
|
186
|
+
self.max_message_memory = max_message_memory
|
|
187
|
+
|
|
188
|
+
self.system_prompts: list[BetaMessageParam] = [
|
|
189
|
+
{
|
|
190
|
+
"role": "assistant",
|
|
191
|
+
"content": generate_system_prompt("Pokemon Red"),
|
|
192
|
+
}
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
self.messages: list[BetaMessageParam] = []
|
|
196
|
+
|
|
197
|
+
async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
|
|
198
|
+
"""Fetch a response from Claude based on the current observation.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
observation: The current game observation
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
tuple[list[dict[str, Any]], bool, list[LogType] | None]: List of actions, whether the game is done, and a list of strings or dictionaries of logs.
|
|
205
|
+
|
|
206
|
+
Raises:
|
|
207
|
+
ValueError: If client is not initialized
|
|
208
|
+
""" # noqa: E501
|
|
209
|
+
if not self.client:
|
|
210
|
+
raise ValueError("Client is not initialized")
|
|
211
|
+
|
|
212
|
+
user_content: list[BetaTextBlockParam | BetaImageBlockParam] = []
|
|
213
|
+
|
|
214
|
+
if observation.text:
|
|
215
|
+
user_content.append(
|
|
216
|
+
{
|
|
217
|
+
"type": "text",
|
|
218
|
+
"text": observation.text,
|
|
219
|
+
}
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if observation.screenshot:
|
|
223
|
+
logger.debug("Processing screenshot data")
|
|
224
|
+
user_content.append(
|
|
225
|
+
{
|
|
226
|
+
"type": "image",
|
|
227
|
+
"source": {
|
|
228
|
+
"type": "base64",
|
|
229
|
+
"media_type": "image/png",
|
|
230
|
+
"data": observation.screenshot,
|
|
231
|
+
},
|
|
232
|
+
}
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
self.messages.append(
|
|
236
|
+
{
|
|
237
|
+
"role": "user",
|
|
238
|
+
"content": user_content,
|
|
239
|
+
}
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
logger.debug(
|
|
243
|
+
"Sending messages to Claude", extra={"messages": self.system_prompts + self.messages}
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
response = await self.client.beta.messages.create(
|
|
247
|
+
model=self.model,
|
|
248
|
+
messages=self.system_prompts + self.messages,
|
|
249
|
+
temperature=self.temperature,
|
|
250
|
+
max_tokens=self.max_tokens,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
response_content = response.content
|
|
254
|
+
self.messages.append(
|
|
255
|
+
cast(
|
|
256
|
+
"BetaMessageParam",
|
|
257
|
+
{
|
|
258
|
+
"role": "user",
|
|
259
|
+
"content": response_content,
|
|
260
|
+
},
|
|
261
|
+
)
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Maintain message memory limit
|
|
265
|
+
while len(self.messages) > self.max_message_memory:
|
|
266
|
+
self.messages.pop(0)
|
|
267
|
+
|
|
268
|
+
action_list: list[dict[str, Any]] = []
|
|
269
|
+
|
|
270
|
+
# Parse response content to extract actions
|
|
271
|
+
for block in response_content:
|
|
272
|
+
if block.type == "text":
|
|
273
|
+
text_json = extract_json_from_response(block.text)
|
|
274
|
+
try:
|
|
275
|
+
text = json.loads(text_json)
|
|
276
|
+
if not isinstance(text, dict):
|
|
277
|
+
logger.error("Invalid response format", extra={"text": text})
|
|
278
|
+
raise ValueError("Response is not a dictionary")
|
|
279
|
+
|
|
280
|
+
action_list.extend(extract_action_from_response_block(text))
|
|
281
|
+
|
|
282
|
+
except json.JSONDecodeError as e:
|
|
283
|
+
logger.error(
|
|
284
|
+
"Failed to parse response", extra={"error": str(e), "text": text_json}
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
else:
|
|
288
|
+
logger.error("Unexpected block type", extra={"type": type(block)})
|
|
289
|
+
|
|
290
|
+
logger.debug("Extracted actions", extra={"actions": action_list})
|
|
291
|
+
|
|
292
|
+
return action_list, False
|
hud/otel/__init__.py
CHANGED
|
@@ -1,35 +1,35 @@
|
|
|
1
|
-
"""HUD OpenTelemetry integration.
|
|
2
|
-
|
|
3
|
-
This package provides the internal OpenTelemetry implementation for HUD telemetry.
|
|
4
|
-
Users should interact with the telemetry APIs through hud.telemetry instead.
|
|
5
|
-
|
|
6
|
-
Internal Components:
|
|
7
|
-
- config: OpenTelemetry configuration and setup
|
|
8
|
-
- context: Trace context management and utilities
|
|
9
|
-
- processors: Span enrichment with HUD context
|
|
10
|
-
- exporters: Sending spans to HUD backend
|
|
11
|
-
- collector: In-memory span collection for replay
|
|
12
|
-
- instrumentation: Auto-instrumentation for agents and MCP
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
from __future__ import annotations
|
|
16
|
-
|
|
17
|
-
from .collector import enable_trace_collection
|
|
18
|
-
from .config import configure_telemetry, is_telemetry_configured, shutdown_telemetry
|
|
19
|
-
from .context import (
|
|
20
|
-
get_current_task_run_id,
|
|
21
|
-
is_root_trace,
|
|
22
|
-
span_context,
|
|
23
|
-
trace,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
__all__ = [
|
|
27
|
-
"configure_telemetry",
|
|
28
|
-
"enable_trace_collection",
|
|
29
|
-
"get_current_task_run_id",
|
|
30
|
-
"is_root_trace",
|
|
31
|
-
"is_telemetry_configured",
|
|
32
|
-
"shutdown_telemetry",
|
|
33
|
-
"span_context",
|
|
34
|
-
"trace",
|
|
35
|
-
]
|
|
1
|
+
"""HUD OpenTelemetry integration.
|
|
2
|
+
|
|
3
|
+
This package provides the internal OpenTelemetry implementation for HUD telemetry.
|
|
4
|
+
Users should interact with the telemetry APIs through hud.telemetry instead.
|
|
5
|
+
|
|
6
|
+
Internal Components:
|
|
7
|
+
- config: OpenTelemetry configuration and setup
|
|
8
|
+
- context: Trace context management and utilities
|
|
9
|
+
- processors: Span enrichment with HUD context
|
|
10
|
+
- exporters: Sending spans to HUD backend
|
|
11
|
+
- collector: In-memory span collection for replay
|
|
12
|
+
- instrumentation: Auto-instrumentation for agents and MCP
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from .collector import enable_trace_collection
|
|
18
|
+
from .config import configure_telemetry, is_telemetry_configured, shutdown_telemetry
|
|
19
|
+
from .context import (
|
|
20
|
+
get_current_task_run_id,
|
|
21
|
+
is_root_trace,
|
|
22
|
+
span_context,
|
|
23
|
+
trace,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"configure_telemetry",
|
|
28
|
+
"enable_trace_collection",
|
|
29
|
+
"get_current_task_run_id",
|
|
30
|
+
"is_root_trace",
|
|
31
|
+
"is_telemetry_configured",
|
|
32
|
+
"shutdown_telemetry",
|
|
33
|
+
"span_context",
|
|
34
|
+
"trace",
|
|
35
|
+
]
|