hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -414
  87. hud/tools/computer/hud.py +376 -328
  88. hud/tools/computer/openai.py +295 -286
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.4.dist-info/METADATA +0 -284
  190. hud_python-0.3.4.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
@@ -1,283 +1,292 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import logging
5
- from typing import Any, cast
6
-
7
- from anthropic import AsyncAnthropic
8
- from anthropic.types.beta import (
9
- BetaMessageParam,
10
- BetaTextBlockParam,
11
- BetaImageBlockParam,
12
- )
13
-
14
- from hud.adapters.common.types import CLA, LogType
15
- from hud.agent import Agent
16
- from hud.adapters import Adapter
17
- from hud.settings import settings
18
- from hud.env.environment import Observation
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
- # Constants
23
- DEFAULT_MODEL = "claude-3-7-sonnet-20250219"
24
- DEFAULT_MAX_TOKENS = 4096
25
- DEFAULT_MAX_ITERATIONS = 10
26
- DEFAULT_TEMPERATURE = 0.7
27
- DEFAULT_MAX_MESSAGE_MEMORY = 20
28
-
29
-
30
- def generate_system_prompt(game_name: str) -> str:
31
- """Generate the system prompt for the AI agent.
32
-
33
- Args:
34
- game_name: Name of the game being played
35
-
36
- Returns:
37
- str: The system prompt for the AI agent
38
- """
39
- return """You are a specialized AI assistant designed to play Pokémon games via screenshot analysis and text instructions. Your task is to understand the current game state from visual input, determine appropriate actions, and respond with structured outputs that control the game.
40
-
41
- For each turn, you will receive:
42
- 1. A screenshot of the current game state
43
- 2. Contextual information about the game progress, recent events, and objectives
44
-
45
- Based on this information, you must analyze the situation, determine the best course of action, and provide a structured JSON response.
46
-
47
- ## Response Format
48
- Your response MUST follow this exact JSON format with no additional markers, tags, or block delimiters:
49
-
50
- {
51
- "analysis": "Brief analysis of the current game situation, visible UI elements, and important context (1-3 sentences)",
52
- "current_objective": "The immediate goal based on the game state (single sentence)",
53
- "reasoning": "Step-by-step logic explaining your chosen action sequence (2-4 sentences)",
54
- "progress_assessment": "Evaluation of whether previous action(s) achieved their intended goal and why/why not (1-2 sentences)",
55
- "actions": [
56
- {
57
- "type": "press",
58
- "keys": ["up"|"down"|"left"|"right"|"a"|"b"|"start"|"select"|"pause"]
59
- },
60
- {
61
- "type": "wait",
62
- "time": milliseconds_to_wait
63
- }
64
- ]
65
- }
66
-
67
- IMPORTANT: Do not include any conversation markers like <<ASSISTANT_CONVERSATION_START>> or <<ASSISTANT_CONVERSATION_END>> around your response. Provide only the clean JSON object.
68
-
69
- ## Action Types
70
- - Button presses: {"type": "press", "keys": ["button_name"]} - Valid buttons are: up, down, left, right, a, b, start, select, pause
71
- - Wait for processing: {"type": "wait", "time": milliseconds}
72
-
73
- ## Important Rules
74
- 1. Never use "wait" commands while the game is paused. The game state will not change while paused, so waiting is ineffective.
75
- 2. If you detect the game is paused, your next action should be to unpause by using {"type": "press", "keys": ["pause"]} before attempting other actions.
76
- 3. Maintain awareness of whether the game is in a paused state based on visual cues in the screenshot.
77
-
78
- ## Game Play Guidelines
79
- 1. **Navigation**: Use directional buttons to move the character or navigate menus
80
- 2. **Interaction**: Use 'a' to confirm selections and interact with objects/NPCs, 'b' to cancel or exit menus
81
- 3. **Menu Access**: Use 'start' to access the game menu
82
- 4. **Battle Strategy**: Analyze Pokémon types, moves, and stats to make optimal battle decisions
83
- 5. **Progressive Play**: Work toward completing the current objective while being mindful of longer-term goals like leveling Pokémon, collecting badges, and advancing the story
84
- 6. **Resource Management**: Monitor and manage HP, PP, items, and Pokéballs effectively
85
- 7. **Memory**: Maintain awareness of the game history and your previous actions to avoid repetitive behaviors
86
-
87
- Always provide thoughtful analysis and clear reasoning for your decisions. If you're uncertain about the best course of action, prioritize safe moves that gather more information.
88
- """
89
-
90
-
91
- def extract_action_from_response_block(block: dict[str, Any]) -> list[dict[str, Any]]:
92
- """Extract actions from a response block.
93
-
94
- Args:
95
- block: The response block containing actions
96
-
97
- Returns:
98
- list[dict[str, Any]]: List of actions extracted from the block
99
- """
100
- if "actions" in block:
101
- actions = block["actions"]
102
- if isinstance(actions, list):
103
- return actions
104
- return []
105
-
106
-
107
- def extract_json_from_response(response: str) -> str:
108
- """Extract JSON from a response string.
109
-
110
- Args:
111
- response: The response string containing JSON
112
-
113
- Returns:
114
- str: The extracted JSON string
115
- """
116
- # Try to find JSON block with markdown code block markers
117
- start = response.find("```json")
118
- end = response.rfind("```")
119
- if start != -1 and end != -1:
120
- start += len("```json")
121
- return response[start:end].strip()
122
-
123
- # Try to find JSON object directly
124
- start = response.find("{")
125
- end = response.rfind("}")
126
- if start != -1 and end != -1:
127
- return response[start : end + 1].strip()
128
-
129
- return response.strip()
130
-
131
-
132
- class ClaudePlaysPokemon(Agent[AsyncAnthropic, CLA]):
133
- """AI agent that plays Pokémon games using Claude."""
134
-
135
- def __init__(
136
- self,
137
- client: AsyncAnthropic | None = None,
138
- adapter: Adapter | None = None,
139
- model: str = DEFAULT_MODEL,
140
- max_tokens: int = DEFAULT_MAX_TOKENS,
141
- max_iterations: int = DEFAULT_MAX_ITERATIONS,
142
- temperature: float = DEFAULT_TEMPERATURE,
143
- max_message_memory: int = DEFAULT_MAX_MESSAGE_MEMORY,
144
- ) -> None:
145
- """Initialize the Claude Plays Pokémon agent.
146
-
147
- Args:
148
- client: Anthropic API client
149
- adapter: Game adapter
150
- model: Claude model to use
151
- max_tokens: Maximum tokens for response
152
- max_iterations: Maximum number of iterations
153
- temperature: Response temperature
154
- max_message_memory: Maximum number of messages to remember
155
-
156
- Raises:
157
- ValueError: If API key is not provided
158
- """
159
- if client is None:
160
- api_key = settings.anthropic_api_key
161
- if not api_key:
162
- raise ValueError("Anthropic API key is required")
163
- client = AsyncAnthropic(api_key=api_key)
164
-
165
- if adapter is None:
166
- adapter = Adapter()
167
-
168
- super().__init__(
169
- client=client,
170
- adapter=adapter,
171
- )
172
-
173
- self.model = model
174
- self.max_tokens = max_tokens
175
- self.max_iterations = max_iterations
176
- self.temperature = temperature
177
- self.max_message_memory = max_message_memory
178
-
179
- self.system_prompts: list[BetaMessageParam] = [
180
- {
181
- "role": "assistant",
182
- "content": generate_system_prompt("Pokemon Red"),
183
- }
184
- ]
185
-
186
- self.messages: list[BetaMessageParam] = []
187
-
188
- async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
189
- """Fetch a response from Claude based on the current observation.
190
-
191
- Args:
192
- observation: The current game observation
193
-
194
- Returns:
195
- tuple[list[dict[str, Any]], bool, list[LogType] | None]: List of actions, whether the game is done, and a list of strings or dictionaries of logs.
196
-
197
- Raises:
198
- ValueError: If client is not initialized
199
- """
200
- if not self.client:
201
- raise ValueError("Client is not initialized")
202
-
203
- user_content: list[BetaTextBlockParam | BetaImageBlockParam] = []
204
-
205
- if observation.text:
206
- user_content.append(
207
- {
208
- "type": "text",
209
- "text": observation.text,
210
- }
211
- )
212
-
213
- if observation.screenshot:
214
- logger.debug("Processing screenshot data")
215
- user_content.append(
216
- {
217
- "type": "image",
218
- "source": {
219
- "type": "base64",
220
- "media_type": "image/png",
221
- "data": observation.screenshot,
222
- },
223
- }
224
- )
225
-
226
- self.messages.append(
227
- {
228
- "role": "user",
229
- "content": user_content,
230
- }
231
- )
232
-
233
- logger.debug(
234
- "Sending messages to Claude", extra={"messages": self.system_prompts + self.messages}
235
- )
236
-
237
- response = await self.client.beta.messages.create(
238
- model=self.model,
239
- messages=self.system_prompts + self.messages,
240
- temperature=self.temperature,
241
- max_tokens=self.max_tokens,
242
- )
243
-
244
- response_content = response.content
245
- self.messages.append(
246
- cast(
247
- BetaMessageParam,
248
- {
249
- "role": "user",
250
- "content": response_content,
251
- },
252
- )
253
- )
254
-
255
- # Maintain message memory limit
256
- while len(self.messages) > self.max_message_memory:
257
- self.messages.pop(0)
258
-
259
- action_list: list[dict[str, Any]] = []
260
-
261
- # Parse response content to extract actions
262
- for block in response_content:
263
- if block.type == "text":
264
- text_json = extract_json_from_response(block.text)
265
- try:
266
- text = json.loads(text_json)
267
- if not isinstance(text, dict):
268
- logger.error("Invalid response format", extra={"text": text})
269
- raise ValueError("Response is not a dictionary")
270
-
271
- action_list.extend(extract_action_from_response_block(text))
272
-
273
- except json.JSONDecodeError as e:
274
- logger.error(
275
- "Failed to parse response", extra={"error": str(e), "text": text_json}
276
- )
277
-
278
- else:
279
- logger.error("Unexpected block type", extra={"type": type(block)})
280
-
281
- logger.debug("Extracted actions", extra={"actions": action_list})
282
-
283
- return action_list, False
1
+ # pyright: reportGeneralTypeIssues=false
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any, cast
7
+
8
+ from anthropic import AsyncAnthropic
9
+
10
+ from hud.adapters import Adapter
11
+ from hud.adapters.common.types import CLA
12
+
13
+ # Update import to current API; if this script is legacy, keep it optional
14
+ try:
15
+ from hud.agents import MCPAgent as Agent # type: ignore[assignment]
16
+ except Exception: # pragma: no cover - optional example script
17
+ from hud.agents import MCPAgent as Agent # fallback
18
+ from hud.settings import settings
19
+
20
+ if TYPE_CHECKING:
21
+ from anthropic.types.beta import (
22
+ BetaImageBlockParam,
23
+ BetaMessageParam,
24
+ BetaTextBlockParam,
25
+ )
26
+
27
+ from hud.env.environment import Observation
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Constants
32
+ DEFAULT_MODEL = "claude-3-7-sonnet-20250219"
33
+ DEFAULT_MAX_TOKENS = 4096
34
+ DEFAULT_MAX_ITERATIONS = 10
35
+ DEFAULT_TEMPERATURE = 0.7
36
+ DEFAULT_MAX_MESSAGE_MEMORY = 20
37
+
38
+
39
+ def generate_system_prompt(game_name: str) -> str:
40
+ """Generate the system prompt for the AI agent.
41
+
42
+ Args:
43
+ game_name: Name of the game being played
44
+
45
+ Returns:
46
+ str: The system prompt for the AI agent
47
+ """
48
+ return """You are a specialized AI assistant designed to play Pokémon games via screenshot analysis and text instructions. Your task is to understand the current game state from visual input, determine appropriate actions, and respond with structured outputs that control the game.
49
+
50
+ For each turn, you will receive:
51
+ 1. A screenshot of the current game state
52
+ 2. Contextual information about the game progress, recent events, and objectives
53
+
54
+ Based on this information, you must analyze the situation, determine the best course of action, and provide a structured JSON response.
55
+
56
+ ## Response Format
57
+ Your response MUST follow this exact JSON format with no additional markers, tags, or block delimiters:
58
+
59
+ {
60
+ "analysis": "Brief analysis of the current game situation, visible UI elements, and important context (1-3 sentences)",
61
+ "current_objective": "The immediate goal based on the game state (single sentence)",
62
+ "reasoning": "Step-by-step logic explaining your chosen action sequence (2-4 sentences)",
63
+ "progress_assessment": "Evaluation of whether previous action(s) achieved their intended goal and why/why not (1-2 sentences)",
64
+ "actions": [
65
+ {
66
+ "type": "press",
67
+ "keys": ["up"|"down"|"left"|"right"|"a"|"b"|"start"|"select"|"pause"]
68
+ },
69
+ {
70
+ "type": "wait",
71
+ "time": milliseconds_to_wait
72
+ }
73
+ ]
74
+ }
75
+
76
+ IMPORTANT: Do not include any conversation markers like <<ASSISTANT_CONVERSATION_START>> or <<ASSISTANT_CONVERSATION_END>> around your response. Provide only the clean JSON object.
77
+
78
+ ## Action Types
79
+ - Button presses: {"type": "press", "keys": ["button_name"]} - Valid buttons are: up, down, left, right, a, b, start, select, pause
80
+ - Wait for processing: {"type": "wait", "time": milliseconds}
81
+
82
+ ## Important Rules
83
+ 1. Never use "wait" commands while the game is paused. The game state will not change while paused, so waiting is ineffective.
84
+ 2. If you detect the game is paused, your next action should be to unpause by using {"type": "press", "keys": ["pause"]} before attempting other actions.
85
+ 3. Maintain awareness of whether the game is in a paused state based on visual cues in the screenshot.
86
+
87
+ ## Game Play Guidelines
88
+ 1. **Navigation**: Use directional buttons to move the character or navigate menus
89
+ 2. **Interaction**: Use 'a' to confirm selections and interact with objects/NPCs, 'b' to cancel or exit menus
90
+ 3. **Menu Access**: Use 'start' to access the game menu
91
+ 4. **Battle Strategy**: Analyze Pokémon types, moves, and stats to make optimal battle decisions
92
+ 5. **Progressive Play**: Work toward completing the current objective while being mindful of longer-term goals like leveling Pokémon, collecting badges, and advancing the story
93
+ 6. **Resource Management**: Monitor and manage HP, PP, items, and Pokéballs effectively
94
+ 7. **Memory**: Maintain awareness of the game history and your previous actions to avoid repetitive behaviors
95
+
96
+ Always provide thoughtful analysis and clear reasoning for your decisions. If you're uncertain about the best course of action, prioritize safe moves that gather more information.
97
+ """ # noqa: E501
98
+
99
+
100
+ def extract_action_from_response_block(block: dict[str, Any]) -> list[dict[str, Any]]:
101
+ """Extract actions from a response block.
102
+
103
+ Args:
104
+ block: The response block containing actions
105
+
106
+ Returns:
107
+ list[dict[str, Any]]: List of actions extracted from the block
108
+ """
109
+ if "actions" in block:
110
+ actions = block["actions"]
111
+ if isinstance(actions, list):
112
+ return actions
113
+ return []
114
+
115
+
116
+ def extract_json_from_response(response: str) -> str:
117
+ """Extract JSON from a response string.
118
+
119
+ Args:
120
+ response: The response string containing JSON
121
+
122
+ Returns:
123
+ str: The extracted JSON string
124
+ """
125
+ # Try to find JSON block with markdown code block markers
126
+ start = response.find("```json")
127
+ end = response.rfind("```")
128
+ if start != -1 and end != -1:
129
+ start += len("```json")
130
+ return response[start:end].strip()
131
+
132
+ # Try to find JSON object directly
133
+ start = response.find("{")
134
+ end = response.rfind("}")
135
+ if start != -1 and end != -1:
136
+ return response[start : end + 1].strip()
137
+
138
+ return response.strip()
139
+
140
+
141
+ class ClaudePlaysPokemon(Agent[AsyncAnthropic, CLA]):
142
+ """AI agent that plays Pokémon games using Claude."""
143
+
144
+ def __init__(
145
+ self,
146
+ client: AsyncAnthropic | None = None,
147
+ adapter: Adapter | None = None,
148
+ model: str = DEFAULT_MODEL,
149
+ max_tokens: int = DEFAULT_MAX_TOKENS,
150
+ max_iterations: int = DEFAULT_MAX_ITERATIONS,
151
+ temperature: float = DEFAULT_TEMPERATURE,
152
+ max_message_memory: int = DEFAULT_MAX_MESSAGE_MEMORY,
153
+ ) -> None:
154
+ """Initialize the Claude Plays Pokémon agent.
155
+
156
+ Args:
157
+ client: Anthropic API client
158
+ adapter: Game adapter
159
+ model: Claude model to use
160
+ max_tokens: Maximum tokens for response
161
+ max_iterations: Maximum number of iterations
162
+ temperature: Response temperature
163
+ max_message_memory: Maximum number of messages to remember
164
+
165
+ Raises:
166
+ ValueError: If API key is not provided
167
+ """
168
+ if client is None:
169
+ api_key = settings.anthropic_api_key
170
+ if not api_key:
171
+ raise ValueError("Anthropic API key is required")
172
+ client = AsyncAnthropic(api_key=api_key)
173
+
174
+ if adapter is None:
175
+ adapter = Adapter()
176
+
177
+ super().__init__(
178
+ client=client,
179
+ adapter=adapter,
180
+ )
181
+
182
+ self.model = model
183
+ self.max_tokens = max_tokens
184
+ self.max_iterations = max_iterations
185
+ self.temperature = temperature
186
+ self.max_message_memory = max_message_memory
187
+
188
+ self.system_prompts: list[BetaMessageParam] = [
189
+ {
190
+ "role": "assistant",
191
+ "content": generate_system_prompt("Pokemon Red"),
192
+ }
193
+ ]
194
+
195
+ self.messages: list[BetaMessageParam] = []
196
+
197
+ async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
198
+ """Fetch a response from Claude based on the current observation.
199
+
200
+ Args:
201
+ observation: The current game observation
202
+
203
+ Returns:
204
+ tuple[list[dict[str, Any]], bool, list[LogType] | None]: List of actions, whether the game is done, and a list of strings or dictionaries of logs.
205
+
206
+ Raises:
207
+ ValueError: If client is not initialized
208
+ """ # noqa: E501
209
+ if not self.client:
210
+ raise ValueError("Client is not initialized")
211
+
212
+ user_content: list[BetaTextBlockParam | BetaImageBlockParam] = []
213
+
214
+ if observation.text:
215
+ user_content.append(
216
+ {
217
+ "type": "text",
218
+ "text": observation.text,
219
+ }
220
+ )
221
+
222
+ if observation.screenshot:
223
+ logger.debug("Processing screenshot data")
224
+ user_content.append(
225
+ {
226
+ "type": "image",
227
+ "source": {
228
+ "type": "base64",
229
+ "media_type": "image/png",
230
+ "data": observation.screenshot,
231
+ },
232
+ }
233
+ )
234
+
235
+ self.messages.append(
236
+ {
237
+ "role": "user",
238
+ "content": user_content,
239
+ }
240
+ )
241
+
242
+ logger.debug(
243
+ "Sending messages to Claude", extra={"messages": self.system_prompts + self.messages}
244
+ )
245
+
246
+ response = await self.client.beta.messages.create(
247
+ model=self.model,
248
+ messages=self.system_prompts + self.messages,
249
+ temperature=self.temperature,
250
+ max_tokens=self.max_tokens,
251
+ )
252
+
253
+ response_content = response.content
254
+ self.messages.append(
255
+ cast(
256
+ "BetaMessageParam",
257
+ {
258
+ "role": "user",
259
+ "content": response_content,
260
+ },
261
+ )
262
+ )
263
+
264
+ # Maintain message memory limit
265
+ while len(self.messages) > self.max_message_memory:
266
+ self.messages.pop(0)
267
+
268
+ action_list: list[dict[str, Any]] = []
269
+
270
+ # Parse response content to extract actions
271
+ for block in response_content:
272
+ if block.type == "text":
273
+ text_json = extract_json_from_response(block.text)
274
+ try:
275
+ text = json.loads(text_json)
276
+ if not isinstance(text, dict):
277
+ logger.error("Invalid response format", extra={"text": text})
278
+ raise ValueError("Response is not a dictionary")
279
+
280
+ action_list.extend(extract_action_from_response_block(text))
281
+
282
+ except json.JSONDecodeError as e:
283
+ logger.error(
284
+ "Failed to parse response", extra={"error": str(e), "text": text_json}
285
+ )
286
+
287
+ else:
288
+ logger.error("Unexpected block type", extra={"type": type(block)})
289
+
290
+ logger.debug("Extracted actions", extra={"actions": action_list})
291
+
292
+ return action_list, False
hud/otel/__init__.py ADDED
@@ -0,0 +1,35 @@
1
+ """HUD OpenTelemetry integration.
2
+
3
+ This package provides the internal OpenTelemetry implementation for HUD telemetry.
4
+ Users should interact with the telemetry APIs through hud.telemetry instead.
5
+
6
+ Internal Components:
7
+ - config: OpenTelemetry configuration and setup
8
+ - context: Trace context management and utilities
9
+ - processors: Span enrichment with HUD context
10
+ - exporters: Sending spans to HUD backend
11
+ - collector: In-memory span collection for replay
12
+ - instrumentation: Auto-instrumentation for agents and MCP
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from .collector import enable_trace_collection
18
+ from .config import configure_telemetry, is_telemetry_configured, shutdown_telemetry
19
+ from .context import (
20
+ get_current_task_run_id,
21
+ is_root_trace,
22
+ span_context,
23
+ trace,
24
+ )
25
+
26
+ __all__ = [
27
+ "configure_telemetry",
28
+ "enable_trace_collection",
29
+ "get_current_task_run_id",
30
+ "is_root_trace",
31
+ "is_telemetry_configured",
32
+ "shutdown_telemetry",
33
+ "span_context",
34
+ "trace",
35
+ ]