PyPI - hud-python - Versions diffs - 0.4.74__tar.gz → 0.5.1__tar.gz - Mend

hud-python 0.4.74tar.gz → 0.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (405) hide show

hud_python-0.5.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,264 @@
+Metadata-Version: 2.4
+Name: hud-python
+Version: 0.5.1
+Summary: SDK for the HUD platform.
+Project-URL: Homepage, https://github.com/hud-evals/hud-python
+Project-URL: Bug Tracker, https://github.com/hud-evals/hud-python/issues
+Project-URL: Documentation, https://docs.hud.ai
+Author-email: HUD <founders@hud.ai>
+License: MIT License
+        Copyright (c) 2025 Human Union Data, Inc
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: <3.13,>=3.11
+Requires-Dist: blessed>=1.20.0
+Requires-Dist: fastmcp==2.13.3
+Requires-Dist: httpx<1,>=0.23.0
+Requires-Dist: mcp<1.23,>1.21.1
+Requires-Dist: openai>=2.8.1
+Requires-Dist: packaging>=21.0
+Requires-Dist: prompt-toolkit==3.0.51
+Requires-Dist: pydantic-settings<3,>=2.2
+Requires-Dist: pydantic<3,>=2.6
+Requires-Dist: questionary==2.1.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: scarf-sdk>=0.1.0
+Requires-Dist: toml>=0.10.2
+Requires-Dist: typer>=0.9.0
+Requires-Dist: watchfiles>=0.21.0
+Provides-Extra: agent
+Requires-Dist: anthropic>=0.75; extra == 'agent'
+Requires-Dist: datasets>=2.14.0; extra == 'agent'
+Requires-Dist: google-genai; extra == 'agent'
+Requires-Dist: langchain>=1.1.0; extra == 'agent'
+Requires-Dist: mcp-use==1.5.0; extra == 'agent'
+Requires-Dist: openai-agents; extra == 'agent'
+Requires-Dist: pillow>=11.1.0; extra == 'agent'
+Requires-Dist: tornado>=6.5.2; extra == 'agent'
+Provides-Extra: agents
+Requires-Dist: anthropic>=0.75; extra == 'agents'
+Requires-Dist: datasets>=2.14.0; extra == 'agents'
+Requires-Dist: google-genai; extra == 'agents'
+Requires-Dist: langchain>=1.1.0; extra == 'agents'
+Requires-Dist: mcp-use==1.5.0; extra == 'agents'
+Requires-Dist: openai-agents; extra == 'agents'
+Requires-Dist: pillow>=11.1.0; extra == 'agents'
+Requires-Dist: tornado>=6.5.2; extra == 'agents'
+Provides-Extra: bedrock
+Requires-Dist: anthropic[bedrock]>=0.75; extra == 'bedrock'
+Provides-Extra: dev
+Requires-Dist: anthropic>=0.75; extra == 'dev'
+Requires-Dist: datasets>=2.14.0; extra == 'dev'
+Requires-Dist: dotenv>=0.9.9; extra == 'dev'
+Requires-Dist: google-adk; extra == 'dev'
+Requires-Dist: google-genai; extra == 'dev'
+Requires-Dist: ipykernel; extra == 'dev'
+Requires-Dist: ipython<9; extra == 'dev'
+Requires-Dist: jupyter-client; extra == 'dev'
+Requires-Dist: jupyter-core; extra == 'dev'
+Requires-Dist: langchain>=1.1.0; extra == 'dev'
+Requires-Dist: llama-index-core; extra == 'dev'
+Requires-Dist: mcp-use==1.5.0; extra == 'dev'
+Requires-Dist: openai-agents; extra == 'dev'
+Requires-Dist: pillow>=11.1.0; extra == 'dev'
+Requires-Dist: playwright; extra == 'dev'
+Requires-Dist: pyautogui>=0.9.54; extra == 'dev'
+Requires-Dist: pyright==1.1.407; extra == 'dev'
+Requires-Dist: pytest-asyncio; extra == 'dev'
+Requires-Dist: pytest-cov; extra == 'dev'
+Requires-Dist: pytest-mock; extra == 'dev'
+Requires-Dist: pytest<9,>=8.1.1; extra == 'dev'
+Requires-Dist: ruff>=0.11.8; extra == 'dev'
+Requires-Dist: tornado>=6.5.2; extra == 'dev'
+Description-Content-Type: text/markdown
+<div align="left">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
+    <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
+    <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
+  </picture>
+</div>
+The HUD SDK is an open-source Python toolkit for building, evaluating, and training AI agents. Use a unified API for any model provider, wrap your code as MCP environments, run A/B evals at scale, and train with reinforcement learning.
+To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
+[![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
+[![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
+[![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
+[![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
+[![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
+[![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
+[![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
+[![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
+## Install
+```bash
+pip install hud-python
+```
+Get your API key at [hud.ai](https://hud.ai) and set it:
+```bash
+export HUD_API_KEY=your-key-here
+```
+> For CLI tools (`hud init`, `hud dev`, etc.): `uv tool install hud-python --python 3.12`
+![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
+## Usage
+### Unified Model API
+Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
+```python
+from openai import AsyncOpenAI
+import os
+client = AsyncOpenAI(
+    base_url="https://inference.hud.ai",
+    api_key=os.environ["HUD_API_KEY"]
+)
+response = await client.chat.completions.create(
+    model="claude-sonnet-4-5",  # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
+    messages=[{"role": "user", "content": "Hello!"}]
+)
+```
+Every call is traced at [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/gateway)
+### Environments
+Turn your code into tools agents can call. Define how to evaluate them:
+```python
+from hud import Environment
+env = Environment("my-env")
+@env.tool()
+def add(a: int, b: int) -> int:
+    """Add two numbers."""
+    return a + b
+@env.scenario("solve-math")
+async def solve_math(problem: str, answer: int):
+    response = yield problem                    # Prompt
+    yield 1.0 if str(answer) in response else 0.0  # Reward
+async with env("solve-math", problem="What is 2+2?", answer=4) as ctx:
+    # Your agent logic here - call tools, get response
+    result = await ctx.call_tool("add", a=2, b=2)
+    await ctx.submit(f"The answer is {result}")
+print(ctx.reward)  # 1.0
+```
+The agent runs between the yields. First yield sends the prompt, second yield scores the result. → [Docs](https://docs.hud.ai/quick-links/environments) · [Templates](https://hud.ai/environments)
+### A/B Evals
+Test different models. Repeat runs to see the distribution:
+```python
+from openai import AsyncOpenAI
+import os
+client = AsyncOpenAI(
+    base_url="https://inference.hud.ai",
+    api_key=os.environ["HUD_API_KEY"]
+)
+# Using the env from above
+async with env("solve-math", problem="What is 2+2?", answer=4, variants={"model": ["gpt-4o", "claude-sonnet-4-5"]}, group=5) as ctx:
+    response = await client.chat.completions.create(
+        model=ctx.variants["model"],
+        messages=[{"role": "user", "content": ctx.prompt}],
+        tools=ctx.tools  # Environment tools available to the model
+    )
+    await ctx.submit(response.choices[0].message.content)
+```
+**Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/ab-testing)
+### Deploy & Train
+Push to GitHub, connect on hud.ai, run at scale:
+```bash
+hud init                  # Scaffold environment
+git push                  # Push to GitHub
+# Connect on hud.ai → New → Environment
+hud eval my-eval --model gpt-4o --group-size 100
+# Or create and run tasks on the platform
+```
+Every run generates training data. Use it to fine-tune or run RL. → [Docs](https://docs.hud.ai/quick-links/deploy)
+## Links
+- 📖 [Documentation](https://docs.hud.ai)
+- ⌨️ [CLI Reference](https://docs.hud.ai/reference/cli/overview)
+- 🏆 [Leaderboards](https://hud.ai/leaderboards)
+- 🌐 [Environment Templates](https://hud.ai/environments)
+- 🤖 [Supported Models](https://hud.ai/models)
+- 💬 [Discord](https://discord.gg/wkjtmHYYjm)
+## Enterprise
+Building agents at scale? We work with teams on custom environments, benchmarks, and training.
+[📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
+## Contributing
+We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
+Key areas: [Agents](hud/agents/) · [Tools](hud/tools/) · [Environments](https://hud.ai/environments)
+<a href="https://github.com/hud-evals/hud-python/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
+</a>
+## Citation
+```bibtex
+@software{hud2025agentevalplatform,
+  author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
+  title  = {HUD: An Evaluation and RL Envrionments Platform for Agents},
+  date   = {2025-04},
+  url    = {https://github.com/hud-evals/hud-python},
+  langid = {en}
+}
+```
+MIT License · [LICENSE](LICENSE)

hud_python-0.5.1/README.md ADDED Viewed

@@ -0,0 +1,166 @@
+<div align="left">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo_dark.svg">
+    <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg">
+    <img src="https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/logo/hud_logo.svg" alt="HUD" width="150" style="margin-bottom: 24px;"/>
+  </picture>
+</div>
+The HUD SDK is an open-source Python toolkit for building, evaluating, and training AI agents. Use a unified API for any model provider, wrap your code as MCP environments, run A/B evals at scale, and train with reinforcement learning.
+To learn more, check out our [Documentation](https://docs.hud.ai) and [API Reference](https://docs.hud.ai/reference).
+[![PyPI](https://img.shields.io/pypi/v/hud-python?style=flat-square)](https://pypi.org/project/hud-python/)
+[![License](https://img.shields.io/badge/license-MIT-green?style=flat-square)](LICENSE)
+[![Add docs to Cursor](https://img.shields.io/badge/Add%20docs%20to-Cursor-black?style=flat-square)](https://cursor.com/en/install-mcp?name=docs-hud-python&config=eyJ1cmwiOiJodHRwczovL2RvY3MuaHVkLmFpL21jcCJ9)
+[![Discord](https://img.shields.io/discord/1327447144772407390?label=Discord&logo=discord&style=flat-square)](https://discord.gg/wkjtmHYYjm)
+[![X Follow](https://img.shields.io/twitter/follow/hud_evals?style=social)](https://x.com/intent/user?screen_name=hud_evals)
+[![Shop](https://img.shields.io/badge/_-white.svg?label=shop&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAJCAYAAAAywQxIAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAACxMAAAsTAQCanBgAAAF6SURBVChTlZA9ixNhFIWf8yaTpFHRRMXCKpAZhCAYFvwoLHZhwUKw9A9YCJb+Bq0sxGbBQrTxX1j41dvIRAjGZbdwRUUGIzPMeyw2swS3WZ/ynHvP5VylafoAWAd+5Xm+wX+SpukmcMf29RDCZrD9BViz3f53+CjYngKZpD5A2/Y7SQBMJpOkKIprdV1vdzqdHzHGblmW9Ww2+5pl2TmAxWKxmM/nP8fj8cmqqtZijJ9sb0u6ABBWjh0riuIt8CqE8LGu66e2d5MkeQ8QY3xme7fb7T4ZjUbrZVl+jjFuSXoEXGxCDgIl9WzfAO5LSmzvNB771R6vzG4Bx0MIt/M8vwV8aLyDQNt70+n0G1AspaTxVln+aghQluVsKbvxVysflT9NQK/XO7R/SGiQ9Nt2aftElmWXJd1kv0kbeANQVdWl4XB4XtJouXaqNRgMHkrqS+r0+/3XwD1JXdungRfAVWBi+6WkK8D3EMJz22cl3W21WgNgx3YAzvwFd0Chdq03gKUAAAAASUVORK5CYII=&style=social)](https://shop.hud.ai)
+[![Scarf](https://static.scarf.sh/a.png?x-pxid=6530ff33-4945-452b-81f9-626872593933)](https://scarf.sh)
+[![Docs](https://img.shields.io/badge/docs-hud.ai-blue?style=flat-square)](https://docs.hud.ai)
+## Install
+```bash
+pip install hud-python
+```
+Get your API key at [hud.ai](https://hud.ai) and set it:
+```bash
+export HUD_API_KEY=your-key-here
+```
+> For CLI tools (`hud init`, `hud dev`, etc.): `uv tool install hud-python --python 3.12`
+![Agent running on SheetBench](https://raw.githubusercontent.com/hud-evals/hud-python/main/docs/src/images/trace_sheet.gif)
+## Usage
+### Unified Model API
+Use Claude, GPT, Gemini, or Grok through one OpenAI-compatible endpoint:
+```python
+from openai import AsyncOpenAI
+import os
+client = AsyncOpenAI(
+    base_url="https://inference.hud.ai",
+    api_key=os.environ["HUD_API_KEY"]
+)
+response = await client.chat.completions.create(
+    model="claude-sonnet-4-5",  # or gpt-4o, gemini-2.5-pro (https://hud.ai/models)
+    messages=[{"role": "user", "content": "Hello!"}]
+)
+```
+Every call is traced at [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/gateway)
+### Environments
+Turn your code into tools agents can call. Define how to evaluate them:
+```python
+from hud import Environment
+env = Environment("my-env")
+@env.tool()
+def add(a: int, b: int) -> int:
+    """Add two numbers."""
+    return a + b
+@env.scenario("solve-math")
+async def solve_math(problem: str, answer: int):
+    response = yield problem                    # Prompt
+    yield 1.0 if str(answer) in response else 0.0  # Reward
+async with env("solve-math", problem="What is 2+2?", answer=4) as ctx:
+    # Your agent logic here - call tools, get response
+    result = await ctx.call_tool("add", a=2, b=2)
+    await ctx.submit(f"The answer is {result}")
+print(ctx.reward)  # 1.0
+```
+The agent runs between the yields. First yield sends the prompt, second yield scores the result. → [Docs](https://docs.hud.ai/quick-links/environments) · [Templates](https://hud.ai/environments)
+### A/B Evals
+Test different models. Repeat runs to see the distribution:
+```python
+from openai import AsyncOpenAI
+import os
+client = AsyncOpenAI(
+    base_url="https://inference.hud.ai",
+    api_key=os.environ["HUD_API_KEY"]
+)
+# Using the env from above
+async with env("solve-math", problem="What is 2+2?", answer=4, variants={"model": ["gpt-4o", "claude-sonnet-4-5"]}, group=5) as ctx:
+    response = await client.chat.completions.create(
+        model=ctx.variants["model"],
+        messages=[{"role": "user", "content": ctx.prompt}],
+        tools=ctx.tools  # Environment tools available to the model
+    )
+    await ctx.submit(response.choices[0].message.content)
+```
+**Variants** test configurations. **Groups** repeat for distribution. Results stream to [hud.ai](https://hud.ai). → [Docs](https://docs.hud.ai/quick-links/ab-testing)
+### Deploy & Train
+Push to GitHub, connect on hud.ai, run at scale:
+```bash
+hud init                  # Scaffold environment
+git push                  # Push to GitHub
+# Connect on hud.ai → New → Environment
+hud eval my-eval --model gpt-4o --group-size 100
+# Or create and run tasks on the platform
+```
+Every run generates training data. Use it to fine-tune or run RL. → [Docs](https://docs.hud.ai/quick-links/deploy)
+## Links
+- 📖 [Documentation](https://docs.hud.ai)
+- ⌨️ [CLI Reference](https://docs.hud.ai/reference/cli/overview)
+- 🏆 [Leaderboards](https://hud.ai/leaderboards)
+- 🌐 [Environment Templates](https://hud.ai/environments)
+- 🤖 [Supported Models](https://hud.ai/models)
+- 💬 [Discord](https://discord.gg/wkjtmHYYjm)
+## Enterprise
+Building agents at scale? We work with teams on custom environments, benchmarks, and training.
+[📅 Book a call](https://cal.com/jay-hud) · [📧 founders@hud.ai](mailto:founders@hud.ai)
+## Contributing
+We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md).
+Key areas: [Agents](hud/agents/) · [Tools](hud/tools/) · [Environments](https://hud.ai/environments)
+<a href="https://github.com/hud-evals/hud-python/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=hud-evals/hud-python&max=50" />
+</a>
+## Citation
+```bibtex
+@software{hud2025agentevalplatform,
+  author = {HUD and Jay Ram and Lorenss Martinsons and Parth Patel and Govind Pimpale and Dylan Bowman and Jaideep and Nguyen Nhat Minh},
+  title  = {HUD: An Evaluation and RL Envrionments Platform for Agents},
+  date   = {2025-04},
+  url    = {https://github.com/hud-evals/hud-python},
+  langid = {en}
+}
+```
+MIT License · [LICENSE](LICENSE)

hud_python-0.5.1/examples/README.md ADDED Viewed

@@ -0,0 +1,127 @@
+# Examples
+A collection of examples demonstrating HUD SDK usage patterns.
+## Quick Start
+### 00_agent_env.py
+Minimal MCP server and client in one file. Shows the basic agent-environment communication pattern using `hud.eval()`.
+```bash
+python examples/00_agent_env.py
+```
+### 01_agent_lifecycle.py
+Complete agent lifecycle demonstrating:
+- v5 Task format with Environment and scenario
+- `hud.eval()` context for connection and tracing
+- Agent initialization and execution
+- Automatic scenario setup/evaluation
+```bash
+python examples/01_agent_lifecycle.py
+```
+> Requires `HUD_API_KEY` and `ANTHROPIC_API_KEY` environment variables.
+## Agent Examples
+### 02_claude_agent.py
+Claude agent with computer use capabilities for browser automation.
+```bash
+python examples/02_claude_agent.py
+```
+> Requires `HUD_API_KEY` and `ANTHROPIC_API_KEY`.
+### 03_openai_compatible_agent.py
+OpenAI-compatible chat.completions agent with both text and browser 2048 environments.
+```bash
+export OPENAI_API_KEY=your-key
+# export OPENAI_BASE_URL=http://localhost:8000/v1  # for local servers (e.g., vllm)
+python examples/03_openai_compatible_agent.py --mode text     # text environment
+python examples/03_openai_compatible_agent.py --mode browser  # browser environment
+```
+> Requires Docker for local environment execution.
+### 04_grounded_agent.py
+Grounded agent that separates visual grounding (element detection) from high-level reasoning.
+```bash
+export OPENAI_API_KEY=your-key
+export OPENROUTER_API_KEY=your-key
+python examples/04_grounded_agent.py
+```
+> Requires Docker and API keys for both OpenAI and OpenRouter.
+### 05_custom_agent.py
+Build a custom MCPAgent using HUD Gateway for unified model access:
+- No need for individual provider API keys
+- Works with Anthropic, OpenAI, Gemini, OpenRouter models
+- Automatic tracing with `@hud.instrument`
+```bash
+HUD_API_KEY=sk-hud-... python examples/05_custom_agent.py
+```
+## Dataset Evaluation
+### run_evaluation.py
+Generic dataset evaluation runner using the programmatic API.
+```bash
+# Run all tasks in a dataset
+python examples/run_evaluation.py hud-evals/SheetBench-50
+# Run specific tasks by index
+python examples/run_evaluation.py hud-evals/SheetBench-50 --task-ids 0 1 2
+# Use different agent and concurrency
+python examples/run_evaluation.py hud-evals/OSWorld-Verified-Gold --agent operator --max-concurrent 50
+```
+For production evaluations, prefer the CLI: `hud eval --help`
+## Key Concepts
+### v5 Task Format
+The v5 Task format is the recommended way to define evaluation tasks:
+```python
+from hud.eval.task import Task
+# Simple task with hub environment
+task = Task(
+    env={"name": "browser"},  # Connect to browser hub
+    scenario="checkout",       # Scenario to run
+    args={"user_id": "alice"}, # Scenario arguments
+)
+# Task with local Docker environment
+env = hud.Environment("my-env")
+env.connect_local(command="docker", args=["run", "--rm", "-i", "my-image"])
+task = Task(env=env, scenario="test")
+```
+### Using hud.eval()
+All examples use `hud.eval()` as the primary entry point:
+```python
+async with hud.eval(task, name="my-eval", variants={"model": "gpt-4o"}) as ctx:
+    result = await agent.run(ctx, max_steps=10)
+    print(f"Reward: {ctx.reward}")
+```
+The context manager handles:
+- Environment connection (MCP servers start)
+- Scenario setup execution
+- Telemetry and tracing
+- Automatic scenario evaluation on exit

hud_python-0.5.1/hud/__init__.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""hud-python.
+tools for building, evaluating, and training AI agents.
+"""
+from __future__ import annotations
+import warnings
+# Apply patches to third-party libraries early, before other imports
+from . import patches as _patches  # noqa: F401
+from .environment import Environment
+from .eval import EvalContext
+from .eval import run_eval as eval
+from .telemetry.instrument import instrument
+def trace(*args: object, **kwargs: object) -> EvalContext:
+    """Deprecated: Use hud.eval() instead.
+    .. deprecated:: 0.5.1
+        hud.trace() is deprecated. Use hud.eval() or env.eval() instead.
+    """
+    warnings.warn(
+        "hud.trace() is deprecated. Use hud.eval() or env.eval() instead.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return eval(*args, **kwargs)  # type: ignore[arg-type]
+__all__ = [
+    "Environment",
+    "EvalContext",
+    "eval",
+    "instrument",
+    "trace",  # Deprecated alias for eval
+]
+try:
+    from .version import __version__
+except ImportError:
+    __version__ = "unknown"
+try:
+    from .utils.pretty_errors import install_pretty_errors
+    install_pretty_errors()
+except Exception:  # noqa: S110
+    pass

hud_python-0.5.1/hud/agents/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+from __future__ import annotations
+from .base import MCPAgent
+from .openai import OpenAIAgent
+from .openai_chat import OpenAIChatAgent
+from .operator import OperatorAgent
+# Note: These agents are not exported here to avoid requiring optional dependencies.
+# Import directly if needed:
+#   from hud.agents.claude import ClaudeAgent  # requires anthropic
+#   from hud.agents.gemini import GeminiAgent  # requires google-genai
+#   from hud.agents.gemini_cua import GeminiCUAAgent  # requires google-genai
+__all__ = [
+    "MCPAgent",
+    "OpenAIAgent",
+    "OpenAIChatAgent",
+    "OperatorAgent",
+]

hud-python 0.4.74__tar.gz → 0.5.1__tar.gz

hud-python 0.4.74tar.gz → 0.5.1tar.gz