hud-python 0.4.45__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (282) hide show
  1. hud/__init__.py +27 -7
  2. hud/agents/__init__.py +70 -5
  3. hud/agents/base.py +238 -500
  4. hud/agents/claude.py +236 -247
  5. hud/agents/gateway.py +42 -0
  6. hud/agents/gemini.py +264 -0
  7. hud/agents/gemini_cua.py +324 -0
  8. hud/agents/grounded_openai.py +98 -100
  9. hud/agents/misc/integration_test_agent.py +51 -20
  10. hud/agents/misc/response_agent.py +48 -36
  11. hud/agents/openai.py +282 -296
  12. hud/agents/{openai_chat_generic.py → openai_chat.py} +63 -33
  13. hud/agents/operator.py +199 -0
  14. hud/agents/resolver.py +70 -0
  15. hud/agents/tests/conftest.py +133 -0
  16. hud/agents/tests/test_base.py +300 -622
  17. hud/agents/tests/test_base_runtime.py +233 -0
  18. hud/agents/tests/test_claude.py +381 -214
  19. hud/agents/tests/test_client.py +9 -10
  20. hud/agents/tests/test_gemini.py +369 -0
  21. hud/agents/tests/test_grounded_openai_agent.py +65 -50
  22. hud/agents/tests/test_openai.py +377 -140
  23. hud/agents/tests/test_operator.py +362 -0
  24. hud/agents/tests/test_resolver.py +192 -0
  25. hud/agents/tests/test_run_eval.py +179 -0
  26. hud/agents/types.py +148 -0
  27. hud/cli/__init__.py +493 -546
  28. hud/cli/analyze.py +43 -5
  29. hud/cli/build.py +699 -113
  30. hud/cli/debug.py +8 -5
  31. hud/cli/dev.py +889 -732
  32. hud/cli/eval.py +793 -667
  33. hud/cli/flows/dev.py +167 -0
  34. hud/cli/flows/init.py +191 -0
  35. hud/cli/flows/tasks.py +153 -56
  36. hud/cli/flows/templates.py +151 -0
  37. hud/cli/flows/tests/__init__.py +1 -0
  38. hud/cli/flows/tests/test_dev.py +126 -0
  39. hud/cli/init.py +60 -58
  40. hud/cli/pull.py +1 -1
  41. hud/cli/push.py +38 -13
  42. hud/cli/rft.py +311 -0
  43. hud/cli/rft_status.py +145 -0
  44. hud/cli/tests/test_analyze.py +5 -5
  45. hud/cli/tests/test_analyze_metadata.py +3 -2
  46. hud/cli/tests/test_analyze_module.py +120 -0
  47. hud/cli/tests/test_build.py +110 -8
  48. hud/cli/tests/test_build_failure.py +41 -0
  49. hud/cli/tests/test_build_module.py +50 -0
  50. hud/cli/tests/test_cli_init.py +6 -1
  51. hud/cli/tests/test_cli_more_wrappers.py +30 -0
  52. hud/cli/tests/test_cli_root.py +140 -0
  53. hud/cli/tests/test_convert.py +361 -0
  54. hud/cli/tests/test_debug.py +12 -10
  55. hud/cli/tests/test_dev.py +197 -0
  56. hud/cli/tests/test_eval.py +251 -0
  57. hud/cli/tests/test_eval_bedrock.py +51 -0
  58. hud/cli/tests/test_init.py +124 -0
  59. hud/cli/tests/test_main_module.py +11 -5
  60. hud/cli/tests/test_mcp_server.py +12 -100
  61. hud/cli/tests/test_push.py +1 -1
  62. hud/cli/tests/test_push_happy.py +74 -0
  63. hud/cli/tests/test_push_wrapper.py +23 -0
  64. hud/cli/tests/test_registry.py +1 -1
  65. hud/cli/tests/test_utils.py +1 -1
  66. hud/cli/{rl → utils}/celebrate.py +14 -12
  67. hud/cli/utils/config.py +18 -1
  68. hud/cli/utils/docker.py +130 -4
  69. hud/cli/utils/env_check.py +9 -9
  70. hud/cli/utils/git.py +136 -0
  71. hud/cli/utils/interactive.py +39 -5
  72. hud/cli/utils/metadata.py +70 -1
  73. hud/cli/utils/runner.py +1 -1
  74. hud/cli/utils/server.py +2 -2
  75. hud/cli/utils/source_hash.py +3 -3
  76. hud/cli/utils/tasks.py +4 -1
  77. hud/cli/utils/tests/__init__.py +0 -0
  78. hud/cli/utils/tests/test_config.py +58 -0
  79. hud/cli/utils/tests/test_docker.py +93 -0
  80. hud/cli/utils/tests/test_docker_hints.py +71 -0
  81. hud/cli/utils/tests/test_env_check.py +74 -0
  82. hud/cli/utils/tests/test_environment.py +42 -0
  83. hud/cli/utils/tests/test_git.py +142 -0
  84. hud/cli/utils/tests/test_interactive_module.py +60 -0
  85. hud/cli/utils/tests/test_local_runner.py +50 -0
  86. hud/cli/utils/tests/test_logging_utils.py +23 -0
  87. hud/cli/utils/tests/test_metadata.py +49 -0
  88. hud/cli/utils/tests/test_package_runner.py +35 -0
  89. hud/cli/utils/tests/test_registry_utils.py +49 -0
  90. hud/cli/utils/tests/test_remote_runner.py +25 -0
  91. hud/cli/utils/tests/test_runner_modules.py +52 -0
  92. hud/cli/utils/tests/test_source_hash.py +36 -0
  93. hud/cli/utils/tests/test_tasks.py +80 -0
  94. hud/cli/utils/version_check.py +258 -0
  95. hud/cli/{rl → utils}/viewer.py +2 -2
  96. hud/clients/README.md +12 -11
  97. hud/clients/__init__.py +4 -3
  98. hud/clients/base.py +166 -26
  99. hud/clients/environment.py +51 -0
  100. hud/clients/fastmcp.py +13 -6
  101. hud/clients/mcp_use.py +45 -15
  102. hud/clients/tests/test_analyze_scenarios.py +206 -0
  103. hud/clients/tests/test_protocol.py +9 -3
  104. hud/datasets/__init__.py +23 -20
  105. hud/datasets/loader.py +326 -0
  106. hud/datasets/runner.py +198 -105
  107. hud/datasets/tests/__init__.py +0 -0
  108. hud/datasets/tests/test_loader.py +221 -0
  109. hud/datasets/tests/test_utils.py +315 -0
  110. hud/datasets/utils.py +270 -90
  111. hud/environment/__init__.py +52 -0
  112. hud/environment/connection.py +258 -0
  113. hud/environment/connectors/__init__.py +33 -0
  114. hud/environment/connectors/base.py +68 -0
  115. hud/environment/connectors/local.py +177 -0
  116. hud/environment/connectors/mcp_config.py +137 -0
  117. hud/environment/connectors/openai.py +101 -0
  118. hud/environment/connectors/remote.py +172 -0
  119. hud/environment/environment.py +835 -0
  120. hud/environment/integrations/__init__.py +45 -0
  121. hud/environment/integrations/adk.py +67 -0
  122. hud/environment/integrations/anthropic.py +196 -0
  123. hud/environment/integrations/gemini.py +92 -0
  124. hud/environment/integrations/langchain.py +82 -0
  125. hud/environment/integrations/llamaindex.py +68 -0
  126. hud/environment/integrations/openai.py +238 -0
  127. hud/environment/mock.py +306 -0
  128. hud/environment/router.py +263 -0
  129. hud/environment/scenarios.py +620 -0
  130. hud/environment/tests/__init__.py +1 -0
  131. hud/environment/tests/test_connection.py +317 -0
  132. hud/environment/tests/test_connectors.py +205 -0
  133. hud/environment/tests/test_environment.py +593 -0
  134. hud/environment/tests/test_integrations.py +257 -0
  135. hud/environment/tests/test_local_connectors.py +242 -0
  136. hud/environment/tests/test_scenarios.py +1086 -0
  137. hud/environment/tests/test_tools.py +208 -0
  138. hud/environment/types.py +23 -0
  139. hud/environment/utils/__init__.py +35 -0
  140. hud/environment/utils/formats.py +215 -0
  141. hud/environment/utils/schema.py +171 -0
  142. hud/environment/utils/tool_wrappers.py +113 -0
  143. hud/eval/__init__.py +67 -0
  144. hud/eval/context.py +727 -0
  145. hud/eval/display.py +299 -0
  146. hud/eval/instrument.py +187 -0
  147. hud/eval/manager.py +533 -0
  148. hud/eval/parallel.py +268 -0
  149. hud/eval/task.py +372 -0
  150. hud/eval/tests/__init__.py +1 -0
  151. hud/eval/tests/test_context.py +178 -0
  152. hud/eval/tests/test_eval.py +210 -0
  153. hud/eval/tests/test_manager.py +152 -0
  154. hud/eval/tests/test_parallel.py +168 -0
  155. hud/eval/tests/test_task.py +291 -0
  156. hud/eval/types.py +65 -0
  157. hud/eval/utils.py +194 -0
  158. hud/patches/__init__.py +19 -0
  159. hud/patches/mcp_patches.py +308 -0
  160. hud/patches/warnings.py +54 -0
  161. hud/samples/browser.py +4 -4
  162. hud/server/__init__.py +2 -1
  163. hud/server/low_level.py +2 -1
  164. hud/server/router.py +164 -0
  165. hud/server/server.py +567 -80
  166. hud/server/tests/test_mcp_server_integration.py +11 -11
  167. hud/server/tests/test_mcp_server_more.py +1 -1
  168. hud/server/tests/test_server_extra.py +2 -0
  169. hud/settings.py +45 -3
  170. hud/shared/exceptions.py +36 -10
  171. hud/shared/hints.py +26 -1
  172. hud/shared/requests.py +15 -3
  173. hud/shared/tests/test_exceptions.py +40 -31
  174. hud/shared/tests/test_hints.py +167 -0
  175. hud/telemetry/__init__.py +20 -19
  176. hud/telemetry/exporter.py +201 -0
  177. hud/telemetry/instrument.py +165 -253
  178. hud/telemetry/tests/test_eval_telemetry.py +356 -0
  179. hud/telemetry/tests/test_exporter.py +258 -0
  180. hud/telemetry/tests/test_instrument.py +401 -0
  181. hud/tools/__init__.py +18 -2
  182. hud/tools/agent.py +223 -0
  183. hud/tools/apply_patch.py +639 -0
  184. hud/tools/base.py +54 -4
  185. hud/tools/bash.py +2 -2
  186. hud/tools/computer/__init__.py +36 -3
  187. hud/tools/computer/anthropic.py +2 -2
  188. hud/tools/computer/gemini.py +385 -0
  189. hud/tools/computer/hud.py +23 -6
  190. hud/tools/computer/openai.py +20 -21
  191. hud/tools/computer/qwen.py +434 -0
  192. hud/tools/computer/settings.py +37 -0
  193. hud/tools/edit.py +3 -7
  194. hud/tools/executors/base.py +4 -2
  195. hud/tools/executors/pyautogui.py +1 -1
  196. hud/tools/grounding/grounded_tool.py +13 -18
  197. hud/tools/grounding/grounder.py +10 -31
  198. hud/tools/grounding/tests/test_grounded_tool.py +26 -44
  199. hud/tools/jupyter.py +330 -0
  200. hud/tools/playwright.py +18 -3
  201. hud/tools/shell.py +308 -0
  202. hud/tools/tests/test_agent_tool.py +355 -0
  203. hud/tools/tests/test_apply_patch.py +718 -0
  204. hud/tools/tests/test_computer.py +4 -9
  205. hud/tools/tests/test_computer_actions.py +24 -2
  206. hud/tools/tests/test_jupyter_tool.py +181 -0
  207. hud/tools/tests/test_shell.py +596 -0
  208. hud/tools/tests/test_submit.py +85 -0
  209. hud/tools/tests/test_types.py +193 -0
  210. hud/tools/types.py +21 -1
  211. hud/types.py +194 -56
  212. hud/utils/__init__.py +2 -0
  213. hud/utils/env.py +67 -0
  214. hud/utils/hud_console.py +89 -18
  215. hud/utils/mcp.py +15 -58
  216. hud/utils/strict_schema.py +162 -0
  217. hud/utils/tests/test_init.py +1 -2
  218. hud/utils/tests/test_mcp.py +1 -28
  219. hud/utils/tests/test_pretty_errors.py +186 -0
  220. hud/utils/tests/test_tool_shorthand.py +154 -0
  221. hud/utils/tests/test_version.py +1 -1
  222. hud/utils/types.py +20 -0
  223. hud/version.py +1 -1
  224. hud_python-0.5.13.dist-info/METADATA +264 -0
  225. hud_python-0.5.13.dist-info/RECORD +305 -0
  226. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/WHEEL +1 -1
  227. hud/agents/langchain.py +0 -261
  228. hud/agents/lite_llm.py +0 -72
  229. hud/cli/rl/__init__.py +0 -180
  230. hud/cli/rl/config.py +0 -101
  231. hud/cli/rl/display.py +0 -133
  232. hud/cli/rl/gpu.py +0 -63
  233. hud/cli/rl/gpu_utils.py +0 -321
  234. hud/cli/rl/local_runner.py +0 -595
  235. hud/cli/rl/presets.py +0 -96
  236. hud/cli/rl/remote_runner.py +0 -463
  237. hud/cli/rl/rl_api.py +0 -150
  238. hud/cli/rl/vllm.py +0 -177
  239. hud/cli/rl/wait_utils.py +0 -89
  240. hud/datasets/parallel.py +0 -687
  241. hud/misc/__init__.py +0 -1
  242. hud/misc/claude_plays_pokemon.py +0 -292
  243. hud/otel/__init__.py +0 -35
  244. hud/otel/collector.py +0 -142
  245. hud/otel/config.py +0 -181
  246. hud/otel/context.py +0 -570
  247. hud/otel/exporters.py +0 -369
  248. hud/otel/instrumentation.py +0 -135
  249. hud/otel/processors.py +0 -121
  250. hud/otel/tests/__init__.py +0 -1
  251. hud/otel/tests/test_processors.py +0 -197
  252. hud/rl/README.md +0 -30
  253. hud/rl/__init__.py +0 -1
  254. hud/rl/actor.py +0 -176
  255. hud/rl/buffer.py +0 -405
  256. hud/rl/chat_template.jinja +0 -101
  257. hud/rl/config.py +0 -192
  258. hud/rl/distributed.py +0 -132
  259. hud/rl/learner.py +0 -637
  260. hud/rl/tests/__init__.py +0 -1
  261. hud/rl/tests/test_learner.py +0 -186
  262. hud/rl/train.py +0 -382
  263. hud/rl/types.py +0 -101
  264. hud/rl/utils/start_vllm_server.sh +0 -30
  265. hud/rl/utils.py +0 -524
  266. hud/rl/vllm_adapter.py +0 -143
  267. hud/telemetry/job.py +0 -352
  268. hud/telemetry/replay.py +0 -74
  269. hud/telemetry/tests/test_replay.py +0 -40
  270. hud/telemetry/tests/test_trace.py +0 -63
  271. hud/telemetry/trace.py +0 -158
  272. hud/utils/agent_factories.py +0 -86
  273. hud/utils/async_utils.py +0 -65
  274. hud/utils/group_eval.py +0 -223
  275. hud/utils/progress.py +0 -149
  276. hud/utils/tasks.py +0 -127
  277. hud/utils/tests/test_async_utils.py +0 -173
  278. hud/utils/tests/test_progress.py +0 -261
  279. hud_python-0.4.45.dist-info/METADATA +0 -552
  280. hud_python-0.4.45.dist-info/RECORD +0 -228
  281. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/entry_points.txt +0 -0
  282. {hud_python-0.4.45.dist-info → hud_python-0.5.13.dist-info}/licenses/LICENSE +0 -0
@@ -1,292 +0,0 @@
1
- # pyright: reportGeneralTypeIssues=false
2
- from __future__ import annotations
3
-
4
- import json
5
- import logging
6
- from typing import TYPE_CHECKING, Any, cast
7
-
8
- from anthropic import AsyncAnthropic
9
-
10
- from hud.adapters import Adapter
11
- from hud.adapters.common.types import CLA
12
-
13
- # Update import to current API; if this script is legacy, keep it optional
14
- try:
15
- from hud.agents import MCPAgent as Agent # type: ignore[assignment]
16
- except Exception: # pragma: no cover - optional example script
17
- from hud.agents import MCPAgent as Agent # fallback
18
- from hud.settings import settings
19
-
20
- if TYPE_CHECKING:
21
- from anthropic.types.beta import (
22
- BetaImageBlockParam,
23
- BetaMessageParam,
24
- BetaTextBlockParam,
25
- )
26
-
27
- from hud.env.environment import Observation
28
-
29
- logger = logging.getLogger(__name__)
30
-
31
- # Constants
32
- DEFAULT_MODEL = "claude-3-7-sonnet-20250219"
33
- DEFAULT_MAX_TOKENS = 4096
34
- DEFAULT_MAX_ITERATIONS = 10
35
- DEFAULT_TEMPERATURE = 0.7
36
- DEFAULT_MAX_MESSAGE_MEMORY = 20
37
-
38
-
39
- def generate_system_prompt(game_name: str) -> str:
40
- """Generate the system prompt for the AI agent.
41
-
42
- Args:
43
- game_name: Name of the game being played
44
-
45
- Returns:
46
- str: The system prompt for the AI agent
47
- """
48
- return """You are a specialized AI assistant designed to play Pokémon games via screenshot analysis and text instructions. Your task is to understand the current game state from visual input, determine appropriate actions, and respond with structured outputs that control the game.
49
-
50
- For each turn, you will receive:
51
- 1. A screenshot of the current game state
52
- 2. Contextual information about the game progress, recent events, and objectives
53
-
54
- Based on this information, you must analyze the situation, determine the best course of action, and provide a structured JSON response.
55
-
56
- ## Response Format
57
- Your response MUST follow this exact JSON format with no additional markers, tags, or block delimiters:
58
-
59
- {
60
- "analysis": "Brief analysis of the current game situation, visible UI elements, and important context (1-3 sentences)",
61
- "current_objective": "The immediate goal based on the game state (single sentence)",
62
- "reasoning": "Step-by-step logic explaining your chosen action sequence (2-4 sentences)",
63
- "progress_assessment": "Evaluation of whether previous action(s) achieved their intended goal and why/why not (1-2 sentences)",
64
- "actions": [
65
- {
66
- "type": "press",
67
- "keys": ["up"|"down"|"left"|"right"|"a"|"b"|"start"|"select"|"pause"]
68
- },
69
- {
70
- "type": "wait",
71
- "time": milliseconds_to_wait
72
- }
73
- ]
74
- }
75
-
76
- IMPORTANT: Do not include any conversation markers like <<ASSISTANT_CONVERSATION_START>> or <<ASSISTANT_CONVERSATION_END>> around your response. Provide only the clean JSON object.
77
-
78
- ## Action Types
79
- - Button presses: {"type": "press", "keys": ["button_name"]} - Valid buttons are: up, down, left, right, a, b, start, select, pause
80
- - Wait for processing: {"type": "wait", "time": milliseconds}
81
-
82
- ## Important Rules
83
- 1. Never use "wait" commands while the game is paused. The game state will not change while paused, so waiting is ineffective.
84
- 2. If you detect the game is paused, your next action should be to unpause by using {"type": "press", "keys": ["pause"]} before attempting other actions.
85
- 3. Maintain awareness of whether the game is in a paused state based on visual cues in the screenshot.
86
-
87
- ## Game Play Guidelines
88
- 1. **Navigation**: Use directional buttons to move the character or navigate menus
89
- 2. **Interaction**: Use 'a' to confirm selections and interact with objects/NPCs, 'b' to cancel or exit menus
90
- 3. **Menu Access**: Use 'start' to access the game menu
91
- 4. **Battle Strategy**: Analyze Pokémon types, moves, and stats to make optimal battle decisions
92
- 5. **Progressive Play**: Work toward completing the current objective while being mindful of longer-term goals like leveling Pokémon, collecting badges, and advancing the story
93
- 6. **Resource Management**: Monitor and manage HP, PP, items, and Pokéballs effectively
94
- 7. **Memory**: Maintain awareness of the game history and your previous actions to avoid repetitive behaviors
95
-
96
- Always provide thoughtful analysis and clear reasoning for your decisions. If you're uncertain about the best course of action, prioritize safe moves that gather more information.
97
- """ # noqa: E501
98
-
99
-
100
- def extract_action_from_response_block(block: dict[str, Any]) -> list[dict[str, Any]]:
101
- """Extract actions from a response block.
102
-
103
- Args:
104
- block: The response block containing actions
105
-
106
- Returns:
107
- list[dict[str, Any]]: List of actions extracted from the block
108
- """
109
- if "actions" in block:
110
- actions = block["actions"]
111
- if isinstance(actions, list):
112
- return actions
113
- return []
114
-
115
-
116
- def extract_json_from_response(response: str) -> str:
117
- """Extract JSON from a response string.
118
-
119
- Args:
120
- response: The response string containing JSON
121
-
122
- Returns:
123
- str: The extracted JSON string
124
- """
125
- # Try to find JSON block with markdown code block markers
126
- start = response.find("```json")
127
- end = response.rfind("```")
128
- if start != -1 and end != -1:
129
- start += len("```json")
130
- return response[start:end].strip()
131
-
132
- # Try to find JSON object directly
133
- start = response.find("{")
134
- end = response.rfind("}")
135
- if start != -1 and end != -1:
136
- return response[start : end + 1].strip()
137
-
138
- return response.strip()
139
-
140
-
141
- class ClaudePlaysPokemon(Agent[AsyncAnthropic, CLA]):
142
- """AI agent that plays Pokémon games using Claude."""
143
-
144
- def __init__(
145
- self,
146
- client: AsyncAnthropic | None = None,
147
- adapter: Adapter | None = None,
148
- model: str = DEFAULT_MODEL,
149
- max_tokens: int = DEFAULT_MAX_TOKENS,
150
- max_iterations: int = DEFAULT_MAX_ITERATIONS,
151
- temperature: float = DEFAULT_TEMPERATURE,
152
- max_message_memory: int = DEFAULT_MAX_MESSAGE_MEMORY,
153
- ) -> None:
154
- """Initialize the Claude Plays Pokémon agent.
155
-
156
- Args:
157
- client: Anthropic API client
158
- adapter: Game adapter
159
- model: Claude model to use
160
- max_tokens: Maximum tokens for response
161
- max_iterations: Maximum number of iterations
162
- temperature: Response temperature
163
- max_message_memory: Maximum number of messages to remember
164
-
165
- Raises:
166
- ValueError: If API key is not provided
167
- """
168
- if client is None:
169
- api_key = settings.anthropic_api_key
170
- if not api_key:
171
- raise ValueError("Anthropic API key is required")
172
- client = AsyncAnthropic(api_key=api_key)
173
-
174
- if adapter is None:
175
- adapter = Adapter()
176
-
177
- super().__init__(
178
- client=client,
179
- adapter=adapter,
180
- )
181
-
182
- self.model = model
183
- self.max_tokens = max_tokens
184
- self.max_iterations = max_iterations
185
- self.temperature = temperature
186
- self.max_message_memory = max_message_memory
187
-
188
- self.system_prompts: list[BetaMessageParam] = [
189
- {
190
- "role": "assistant",
191
- "content": generate_system_prompt("Pokemon Red"),
192
- }
193
- ]
194
-
195
- self.messages: list[BetaMessageParam] = []
196
-
197
- async def fetch_response(self, observation: Observation) -> tuple[list[dict[str, Any]], bool]:
198
- """Fetch a response from Claude based on the current observation.
199
-
200
- Args:
201
- observation: The current game observation
202
-
203
- Returns:
204
- tuple[list[dict[str, Any]], bool, list[LogType] | None]: List of actions, whether the game is done, and a list of strings or dictionaries of logs.
205
-
206
- Raises:
207
- ValueError: If client is not initialized
208
- """ # noqa: E501
209
- if not self.client:
210
- raise ValueError("Client is not initialized")
211
-
212
- user_content: list[BetaTextBlockParam | BetaImageBlockParam] = []
213
-
214
- if observation.text:
215
- user_content.append(
216
- {
217
- "type": "text",
218
- "text": observation.text,
219
- }
220
- )
221
-
222
- if observation.screenshot:
223
- logger.debug("Processing screenshot data")
224
- user_content.append(
225
- {
226
- "type": "image",
227
- "source": {
228
- "type": "base64",
229
- "media_type": "image/png",
230
- "data": observation.screenshot,
231
- },
232
- }
233
- )
234
-
235
- self.messages.append(
236
- {
237
- "role": "user",
238
- "content": user_content,
239
- }
240
- )
241
-
242
- logger.debug(
243
- "Sending messages to Claude", extra={"messages": self.system_prompts + self.messages}
244
- )
245
-
246
- response = await self.client.beta.messages.create(
247
- model=self.model,
248
- messages=self.system_prompts + self.messages,
249
- temperature=self.temperature,
250
- max_tokens=self.max_tokens,
251
- )
252
-
253
- response_content = response.content
254
- self.messages.append(
255
- cast(
256
- "BetaMessageParam",
257
- {
258
- "role": "user",
259
- "content": response_content,
260
- },
261
- )
262
- )
263
-
264
- # Maintain message memory limit
265
- while len(self.messages) > self.max_message_memory:
266
- self.messages.pop(0)
267
-
268
- action_list: list[dict[str, Any]] = []
269
-
270
- # Parse response content to extract actions
271
- for block in response_content:
272
- if block.type == "text":
273
- text_json = extract_json_from_response(block.text)
274
- try:
275
- text = json.loads(text_json)
276
- if not isinstance(text, dict):
277
- logger.error("Invalid response format", extra={"text": text})
278
- raise ValueError("Response is not a dictionary")
279
-
280
- action_list.extend(extract_action_from_response_block(text))
281
-
282
- except json.JSONDecodeError as e:
283
- logger.error(
284
- "Failed to parse response", extra={"error": str(e), "text": text_json}
285
- )
286
-
287
- else:
288
- logger.error("Unexpected block type", extra={"type": type(block)})
289
-
290
- logger.debug("Extracted actions", extra={"actions": action_list})
291
-
292
- return action_list, False
hud/otel/__init__.py DELETED
@@ -1,35 +0,0 @@
1
- """HUD OpenTelemetry integration.
2
-
3
- This package provides the internal OpenTelemetry implementation for HUD telemetry.
4
- Users should interact with the telemetry APIs through hud.telemetry instead.
5
-
6
- Internal Components:
7
- - config: OpenTelemetry configuration and setup
8
- - context: Trace context management and utilities
9
- - processors: Span enrichment with HUD context
10
- - exporters: Sending spans to HUD backend
11
- - collector: In-memory span collection for replay
12
- - instrumentation: Auto-instrumentation for agents and MCP
13
- """
14
-
15
- from __future__ import annotations
16
-
17
- from .collector import enable_trace_collection
18
- from .config import configure_telemetry, is_telemetry_configured, shutdown_telemetry
19
- from .context import (
20
- get_current_task_run_id,
21
- is_root_trace,
22
- span_context,
23
- trace,
24
- )
25
-
26
- __all__ = [
27
- "configure_telemetry",
28
- "enable_trace_collection",
29
- "get_current_task_run_id",
30
- "is_root_trace",
31
- "is_telemetry_configured",
32
- "shutdown_telemetry",
33
- "span_context",
34
- "trace",
35
- ]
hud/otel/collector.py DELETED
@@ -1,142 +0,0 @@
1
- """Global span collector for building in-memory traces.
2
-
3
- This module provides a way to collect spans during execution
4
- and retrieve them as a Trace object, enabling replay functionality
5
- without modifying agent code.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import logging
11
- import threading
12
- from contextvars import ContextVar
13
- from typing import TYPE_CHECKING
14
-
15
- from opentelemetry import trace
16
- from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
17
-
18
- from hud.types import Trace
19
-
20
- if TYPE_CHECKING:
21
- from opentelemetry.sdk.trace import ReadableSpan
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
- # Global storage for collected spans by task_run_id
26
- _TRACE_STORAGE: dict[str, TraceCollector] = {}
27
- _LOCK = threading.Lock()
28
-
29
- # Context variable to track if collection is enabled
30
- _collecting_enabled: ContextVar[bool] = ContextVar("collecting_enabled", default=False)
31
-
32
-
33
- class TraceCollector:
34
- """Collects spans for a single task run."""
35
-
36
- def __init__(self, task_run_id: str) -> None:
37
- self.task_run_id = task_run_id
38
- self.spans: list[ReadableSpan] = []
39
- self._lock = threading.Lock()
40
-
41
- def add_span(self, span: ReadableSpan) -> None:
42
- """Thread-safe span addition."""
43
- with self._lock:
44
- self.spans.append(span)
45
-
46
- def to_trace(self) -> Trace:
47
- """Convert collected spans to a Trace object."""
48
- from .exporters import HudSpan, _span_to_dict
49
-
50
- trace = Trace()
51
-
52
- # Convert spans to TraceSteps
53
- for span in self.spans:
54
- try:
55
- # Use the same conversion logic as the exporter
56
- span_dict = _span_to_dict(span)
57
- hud_span = HudSpan.model_validate(span_dict)
58
-
59
- # The attributes field is already a TraceStep
60
- step = hud_span.attributes
61
- # Add timing from the span itself
62
- step.start_timestamp = hud_span.start_time
63
- step.end_timestamp = hud_span.end_time
64
- trace.append(step)
65
-
66
- except Exception as e:
67
- # Log but don't fail the whole trace
68
- logger.debug("Failed to convert span: %s", e)
69
-
70
- return trace
71
-
72
-
73
- class CollectingSpanExporter(SpanExporter):
74
- """A span exporter that collects spans in memory for replay."""
75
-
76
- def export(self, spans: list[ReadableSpan]) -> SpanExportResult:
77
- """Collect spans if collection is enabled."""
78
- if not _collecting_enabled.get():
79
- return SpanExportResult.SUCCESS
80
-
81
- for span in spans:
82
- # Extract task_run_id from span
83
- task_run_id = span.attributes.get("hud.task_run_id") if span.attributes else None
84
- if not task_run_id or not isinstance(task_run_id, str):
85
- continue
86
-
87
- # Get or create collector
88
- with _LOCK:
89
- if task_run_id not in _TRACE_STORAGE:
90
- _TRACE_STORAGE[task_run_id] = TraceCollector(task_run_id)
91
- collector = _TRACE_STORAGE[task_run_id]
92
-
93
- # Add span
94
- collector.add_span(span)
95
-
96
- return SpanExportResult.SUCCESS
97
-
98
- def shutdown(self) -> None:
99
- """Clean up resources."""
100
- with _LOCK:
101
- _TRACE_STORAGE.clear()
102
-
103
-
104
- def enable_trace_collection(enabled: bool = True) -> None:
105
- """Enable or disable in-memory trace collection."""
106
- _collecting_enabled.set(enabled)
107
-
108
-
109
- def get_trace(task_run_id: str) -> Trace | None:
110
- """Retrieve collected trace for a task run ID.
111
-
112
- Returns None if no trace was collected or collection was disabled.
113
- """
114
- with _LOCK:
115
- collector = _TRACE_STORAGE.get(task_run_id)
116
- if collector:
117
- return collector.to_trace()
118
- return None
119
-
120
-
121
- def clear_trace(task_run_id: str) -> None:
122
- """Clear collected trace for a task run ID."""
123
- with _LOCK:
124
- _TRACE_STORAGE.pop(task_run_id, None)
125
-
126
-
127
- def install_collector() -> None:
128
- """Install the collecting span exporter.
129
-
130
- This should be called after configure_telemetry().
131
- """
132
- provider = trace.get_tracer_provider()
133
- # Guard for SDK tracer providers only
134
- if hasattr(provider, "add_span_processor"):
135
- from opentelemetry.sdk.trace.export import SimpleSpanProcessor
136
-
137
- exporter = CollectingSpanExporter()
138
- processor = SimpleSpanProcessor(exporter)
139
- try:
140
- provider.add_span_processor(processor) # type: ignore[attr-defined]
141
- except Exception:
142
- logger.warning("Failed to add span processor")
hud/otel/config.py DELETED
@@ -1,181 +0,0 @@
1
- """Central configuration for OpenTelemetry inside HUD SDK.
2
-
3
- This file is responsible for
4
- 1. creating the global ``TracerProvider``
5
- 2. attaching span processors (HUD enrichment, batch + exporter)
6
- 3. activating the community MCP instrumentation so that *every* MCP
7
- request/response/notification is traced automatically.
8
-
9
- It is *idempotent*: calling :func:`configure_telemetry` more than once
10
- returns the same provider and does nothing.
11
- """
12
-
13
- from __future__ import annotations
14
-
15
- import logging
16
- from typing import Any
17
-
18
- from opentelemetry import trace
19
- from opentelemetry.sdk.resources import Resource
20
- from opentelemetry.sdk.trace import TracerProvider
21
- from opentelemetry.sdk.trace.export import BatchSpanProcessor
22
-
23
- from hud.settings import settings
24
-
25
- from .collector import enable_trace_collection, install_collector
26
- from .exporters import HudSpanExporter
27
- from .instrumentation import install_mcp_instrumentation
28
- from .processors import HudEnrichmentProcessor
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
- # Global singleton provider so multiple calls do not create duplicates
33
- _TRACER_PROVIDER: TracerProvider | None = None
34
-
35
-
36
- def is_telemetry_configured() -> bool:
37
- """Check if telemetry has been configured."""
38
- return _TRACER_PROVIDER is not None
39
-
40
-
41
- # ---------------------------------------------------------------------------
42
- # Public API
43
- # ---------------------------------------------------------------------------
44
-
45
-
46
- def configure_telemetry(
47
- *,
48
- service_name: str = "hud-sdk",
49
- service_version: str | None = None,
50
- environment: str | None = None,
51
- extra_resource_attributes: dict[str, Any] | None = None,
52
- enable_otlp: bool = False,
53
- otlp_endpoint: str | None = None,
54
- otlp_headers: dict[str, str] | None = None,
55
- enable_collection: bool = True,
56
- ) -> TracerProvider:
57
- """Initialise OpenTelemetry for the current Python process.
58
-
59
- It is safe to call this in every entry-point; the provider will only
60
- be created once.
61
- """
62
- global _TRACER_PROVIDER
63
-
64
- if _TRACER_PROVIDER is not None:
65
- return _TRACER_PROVIDER
66
-
67
- # ------------------------------------------------------------------
68
- # 1. Resource (identity of this service)
69
- # ------------------------------------------------------------------
70
- res_attrs: dict[str, Any] = {
71
- "service.name": service_name,
72
- "telemetry.sdk.name": "hud-otel",
73
- "telemetry.sdk.language": "python",
74
- }
75
- if service_version:
76
- res_attrs["service.version"] = service_version
77
- if environment:
78
- res_attrs["deployment.environment"] = environment
79
- if extra_resource_attributes:
80
- res_attrs.update(extra_resource_attributes)
81
-
82
- resource = Resource.create(res_attrs)
83
-
84
- # ------------------------------------------------------------------
85
- # 2. Provider
86
- # ------------------------------------------------------------------
87
- provider = TracerProvider(resource=resource)
88
- _TRACER_PROVIDER = provider
89
-
90
- # ------------------------------------------------------------------
91
- # 3. Processors / exporters
92
- # ------------------------------------------------------------------
93
- provider.add_span_processor(HudEnrichmentProcessor())
94
-
95
- # HUD exporter (only if enabled and API key is available)
96
- if settings.telemetry_enabled and settings.api_key:
97
- exporter = HudSpanExporter(
98
- telemetry_url=settings.hud_telemetry_url, api_key=settings.api_key
99
- )
100
- # Export more continuously to avoid big end flushes
101
- provider.add_span_processor(
102
- BatchSpanProcessor(
103
- exporter,
104
- schedule_delay_millis=1000,
105
- max_queue_size=8192,
106
- max_export_batch_size=256,
107
- export_timeout_millis=30000,
108
- )
109
- )
110
- elif settings.telemetry_enabled and not settings.api_key and not enable_otlp:
111
- # Error if no exporters are configured
112
- raise ValueError(
113
- "No telemetry backend configured. Either:\n"
114
- "1. Set HUD_API_KEY environment variable for HUD telemetry (https://hud.so)\n"
115
- "2. Use enable_otlp=True with configure_telemetry() for alternative backends (e.g., Jaeger)\n" # noqa: E501
116
- )
117
- elif not settings.telemetry_enabled:
118
- logger.info("HUD telemetry disabled via HUD_TELEMETRY_ENABLED=false")
119
-
120
- # OTLP exporter (optional - for standard OTel viewers)
121
- if enable_otlp:
122
- try:
123
- from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
124
-
125
- otlp_config = {}
126
- if otlp_endpoint:
127
- otlp_config["endpoint"] = otlp_endpoint
128
- # Default to HTTP endpoint if not specified
129
- if not otlp_endpoint.startswith(("http://", "https://")):
130
- otlp_config["endpoint"] = f"http://{otlp_endpoint}/v1/traces"
131
- else:
132
- # Default HTTP endpoint
133
- otlp_config["endpoint"] = "http://localhost:4318/v1/traces"
134
-
135
- if otlp_headers:
136
- otlp_config["headers"] = otlp_headers
137
-
138
- otlp_exporter = OTLPSpanExporter(**otlp_config)
139
- provider.add_span_processor(
140
- BatchSpanProcessor(
141
- otlp_exporter,
142
- schedule_delay_millis=1000,
143
- max_queue_size=8192,
144
- max_export_batch_size=256,
145
- export_timeout_millis=30000,
146
- )
147
- )
148
- logger.info("OTLP HTTP exporter enabled - endpoint: %s", otlp_config["endpoint"])
149
- except ImportError:
150
- logger.warning(
151
- "OTLP export requested but opentelemetry-exporter-otlp-proto-http not installed. "
152
- "Install with: pip install 'hud-python[agent]'"
153
- )
154
-
155
- # ------------------------------------------------------------------
156
- # 4. Activate provider and instrumentation
157
- # ------------------------------------------------------------------
158
- trace.set_tracer_provider(provider)
159
- install_mcp_instrumentation(provider)
160
-
161
- # Install in-memory collector if requested
162
- if enable_collection:
163
- install_collector()
164
- enable_trace_collection(True)
165
- logger.debug("In-memory trace collection enabled")
166
-
167
- # Agent instrumentation now handled by @hud.instrument decorators
168
- logger.debug("OpenTelemetry configuration completed")
169
-
170
- logger.debug("OpenTelemetry configured (provider id=%s)", id(provider))
171
- return provider
172
-
173
-
174
- def shutdown_telemetry() -> None:
175
- """Flush and shutdown the global provider (if configured)."""
176
- global _TRACER_PROVIDER
177
- if _TRACER_PROVIDER is None:
178
- return
179
- _TRACER_PROVIDER.shutdown() # type: ignore[arg-type]
180
- _TRACER_PROVIDER = None
181
- logger.debug("OpenTelemetry shutdown complete")