cua-agent 0.4.0b4__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/PKG-INFO +2 -2
- cua_agent-0.4.2/agent/__init__.py +64 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/agent.py +18 -1
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/callbacks/__init__.py +2 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/callbacks/budget_manager.py +43 -43
- cua_agent-0.4.2/agent/callbacks/telemetry.py +210 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/cli.py +27 -15
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/loops/anthropic.py +659 -18
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/responses.py +207 -207
- cua_agent-0.4.2/agent/telemetry.py +142 -0
- cua_agent-0.4.2/agent/ui/__init__.py +7 -0
- cua_agent-0.4.2/agent/ui/__main__.py +4 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/ui/gradio/__init__.py +2 -2
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/ui/gradio/app.py +19 -19
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/ui/gradio/ui_components.py +28 -10
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/pyproject.toml +2 -2
- cua_agent-0.4.0b4/agent/__init__.py +0 -19
- cua_agent-0.4.0b4/agent/ui/__init__.py +0 -7
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/README.md +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/__main__.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/adapters/__init__.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/adapters/huggingfacelocal_adapter.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/callbacks/base.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/callbacks/image_retention.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/callbacks/logging.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/callbacks/pii_anonymization.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/callbacks/trajectory_saver.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/computer_handler.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/decorators.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/loops/__init__.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/loops/omniparser.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/loops/openai.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/loops/uitars.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.2}/agent/types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: >=3.11
|
|
@@ -13,7 +13,7 @@ Requires-Dist: pydantic>=2.6.4
|
|
|
13
13
|
Requires-Dist: rich>=13.7.1
|
|
14
14
|
Requires-Dist: python-dotenv>=1.0.1
|
|
15
15
|
Requires-Dist: cua-computer<0.5.0,>=0.3.0
|
|
16
|
-
Requires-Dist: cua-core<0.2.0,>=0.1.
|
|
16
|
+
Requires-Dist: cua-core<0.2.0,>=0.1.8
|
|
17
17
|
Requires-Dist: certifi>=2024.2.2
|
|
18
18
|
Requires-Dist: litellm>=1.74.8
|
|
19
19
|
Provides-Extra: openai
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""
|
|
2
|
+
agent - Decorator-based Computer Use Agent with liteLLM integration
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .decorators import agent_loop
|
|
9
|
+
from .agent import ComputerAgent
|
|
10
|
+
from .types import Messages, AgentResponse
|
|
11
|
+
|
|
12
|
+
# Import loops to register them
|
|
13
|
+
from . import loops
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"agent_loop",
|
|
17
|
+
"ComputerAgent",
|
|
18
|
+
"Messages",
|
|
19
|
+
"AgentResponse"
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
__version__ = "0.4.0"
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Initialize telemetry when the package is imported
|
|
27
|
+
try:
|
|
28
|
+
# Import from core telemetry for basic functions
|
|
29
|
+
from core.telemetry import (
|
|
30
|
+
is_telemetry_enabled,
|
|
31
|
+
flush,
|
|
32
|
+
record_event,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Import set_dimension from our own telemetry module
|
|
36
|
+
from .telemetry import set_dimension
|
|
37
|
+
|
|
38
|
+
# Check if telemetry is enabled
|
|
39
|
+
if is_telemetry_enabled():
|
|
40
|
+
logger.info("Telemetry is enabled")
|
|
41
|
+
|
|
42
|
+
# Record package initialization
|
|
43
|
+
record_event(
|
|
44
|
+
"module_init",
|
|
45
|
+
{
|
|
46
|
+
"module": "agent",
|
|
47
|
+
"version": __version__,
|
|
48
|
+
"python_version": sys.version,
|
|
49
|
+
},
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Set the package version as a dimension
|
|
53
|
+
set_dimension("agent_version", __version__)
|
|
54
|
+
|
|
55
|
+
# Flush events to ensure they're sent
|
|
56
|
+
flush()
|
|
57
|
+
else:
|
|
58
|
+
logger.info("Telemetry is disabled")
|
|
59
|
+
except ImportError as e:
|
|
60
|
+
# Telemetry not available
|
|
61
|
+
logger.warning(f"Telemetry not available: {e}")
|
|
62
|
+
except Exception as e:
|
|
63
|
+
# Other issues with telemetry
|
|
64
|
+
logger.warning(f"Error initializing telemetry: {e}")
|
|
@@ -6,6 +6,7 @@ import asyncio
|
|
|
6
6
|
from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Callable, Set
|
|
7
7
|
|
|
8
8
|
from litellm.responses.utils import Usage
|
|
9
|
+
|
|
9
10
|
from .types import Messages, Computer
|
|
10
11
|
from .decorators import find_agent_loop
|
|
11
12
|
from .computer_handler import OpenAIComputerHandler, acknowledge_safety_check_callback, check_blocklisted_url
|
|
@@ -14,7 +15,13 @@ import litellm
|
|
|
14
15
|
import litellm.utils
|
|
15
16
|
import inspect
|
|
16
17
|
from .adapters import HuggingFaceLocalAdapter
|
|
17
|
-
from .callbacks import
|
|
18
|
+
from .callbacks import (
|
|
19
|
+
ImageRetentionCallback,
|
|
20
|
+
LoggingCallback,
|
|
21
|
+
TrajectorySaverCallback,
|
|
22
|
+
BudgetManagerCallback,
|
|
23
|
+
TelemetryCallback,
|
|
24
|
+
)
|
|
18
25
|
|
|
19
26
|
def get_json(obj: Any, max_depth: int = 10) -> Any:
|
|
20
27
|
def custom_serializer(o: Any, depth: int = 0, seen: Set[int] = None) -> Any:
|
|
@@ -129,6 +136,7 @@ class ComputerAgent:
|
|
|
129
136
|
screenshot_delay: Optional[float | int] = 0.5,
|
|
130
137
|
use_prompt_caching: Optional[bool] = False,
|
|
131
138
|
max_trajectory_budget: Optional[float | dict] = None,
|
|
139
|
+
telemetry_enabled: Optional[bool] = True,
|
|
132
140
|
**kwargs
|
|
133
141
|
):
|
|
134
142
|
"""
|
|
@@ -146,6 +154,7 @@ class ComputerAgent:
|
|
|
146
154
|
screenshot_delay: Delay before screenshots in seconds
|
|
147
155
|
use_prompt_caching: If set, use prompt caching to avoid reprocessing the same prompt. Intended for use with anthropic providers.
|
|
148
156
|
max_trajectory_budget: If set, adds BudgetManagerCallback to track usage costs and stop when budget is exceeded
|
|
157
|
+
telemetry_enabled: If set, adds TelemetryCallback to track anonymized usage data. Enabled by default.
|
|
149
158
|
**kwargs: Additional arguments passed to the agent loop
|
|
150
159
|
"""
|
|
151
160
|
self.model = model
|
|
@@ -158,10 +167,18 @@ class ComputerAgent:
|
|
|
158
167
|
self.max_retries = max_retries
|
|
159
168
|
self.screenshot_delay = screenshot_delay
|
|
160
169
|
self.use_prompt_caching = use_prompt_caching
|
|
170
|
+
self.telemetry_enabled = telemetry_enabled
|
|
161
171
|
self.kwargs = kwargs
|
|
162
172
|
|
|
163
173
|
# == Add built-in callbacks ==
|
|
164
174
|
|
|
175
|
+
# Add telemetry callback if telemetry_enabled is set
|
|
176
|
+
if self.telemetry_enabled:
|
|
177
|
+
if isinstance(self.telemetry_enabled, bool):
|
|
178
|
+
self.callbacks.append(TelemetryCallback(self))
|
|
179
|
+
else:
|
|
180
|
+
self.callbacks.append(TelemetryCallback(self, **self.telemetry_enabled))
|
|
181
|
+
|
|
165
182
|
# Add logging callback if verbosity is set
|
|
166
183
|
if self.verbosity is not None:
|
|
167
184
|
self.callbacks.append(LoggingCallback(level=self.verbosity))
|
|
@@ -7,6 +7,7 @@ from .image_retention import ImageRetentionCallback
|
|
|
7
7
|
from .logging import LoggingCallback
|
|
8
8
|
from .trajectory_saver import TrajectorySaverCallback
|
|
9
9
|
from .budget_manager import BudgetManagerCallback
|
|
10
|
+
from .telemetry import TelemetryCallback
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
12
13
|
"AsyncCallbackHandler",
|
|
@@ -14,4 +15,5 @@ __all__ = [
|
|
|
14
15
|
"LoggingCallback",
|
|
15
16
|
"TrajectorySaverCallback",
|
|
16
17
|
"BudgetManagerCallback",
|
|
18
|
+
"TelemetryCallback",
|
|
17
19
|
]
|
|
@@ -1,44 +1,44 @@
|
|
|
1
|
-
from typing import Dict, List, Any
|
|
2
|
-
from .base import AsyncCallbackHandler
|
|
3
|
-
|
|
4
|
-
class BudgetExceededError(Exception):
|
|
5
|
-
"""Exception raised when budget is exceeded."""
|
|
6
|
-
pass
|
|
7
|
-
|
|
8
|
-
class BudgetManagerCallback(AsyncCallbackHandler):
|
|
9
|
-
"""Budget manager callback that tracks usage costs and can stop execution when budget is exceeded."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False):
|
|
12
|
-
"""
|
|
13
|
-
Initialize BudgetManagerCallback.
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
max_budget: Maximum budget allowed
|
|
17
|
-
reset_after_each_run: Whether to reset budget after each run
|
|
18
|
-
raise_error: Whether to raise an error when budget is exceeded
|
|
19
|
-
"""
|
|
20
|
-
self.max_budget = max_budget
|
|
21
|
-
self.reset_after_each_run = reset_after_each_run
|
|
22
|
-
self.raise_error = raise_error
|
|
23
|
-
self.total_cost = 0.0
|
|
24
|
-
|
|
25
|
-
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
26
|
-
"""Reset budget if configured to do so."""
|
|
27
|
-
if self.reset_after_each_run:
|
|
28
|
-
self.total_cost = 0.0
|
|
29
|
-
|
|
30
|
-
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
31
|
-
"""Track usage costs."""
|
|
32
|
-
if "response_cost" in usage:
|
|
33
|
-
self.total_cost += usage["response_cost"]
|
|
34
|
-
|
|
35
|
-
async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
|
|
36
|
-
"""Check if budget allows continuation."""
|
|
37
|
-
if self.total_cost >= self.max_budget:
|
|
38
|
-
if self.raise_error:
|
|
39
|
-
raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
40
|
-
else:
|
|
41
|
-
print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
42
|
-
return False
|
|
43
|
-
return True
|
|
1
|
+
from typing import Dict, List, Any
|
|
2
|
+
from .base import AsyncCallbackHandler
|
|
3
|
+
|
|
4
|
+
class BudgetExceededError(Exception):
|
|
5
|
+
"""Exception raised when budget is exceeded."""
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
class BudgetManagerCallback(AsyncCallbackHandler):
|
|
9
|
+
"""Budget manager callback that tracks usage costs and can stop execution when budget is exceeded."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False):
|
|
12
|
+
"""
|
|
13
|
+
Initialize BudgetManagerCallback.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
max_budget: Maximum budget allowed
|
|
17
|
+
reset_after_each_run: Whether to reset budget after each run
|
|
18
|
+
raise_error: Whether to raise an error when budget is exceeded
|
|
19
|
+
"""
|
|
20
|
+
self.max_budget = max_budget
|
|
21
|
+
self.reset_after_each_run = reset_after_each_run
|
|
22
|
+
self.raise_error = raise_error
|
|
23
|
+
self.total_cost = 0.0
|
|
24
|
+
|
|
25
|
+
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
26
|
+
"""Reset budget if configured to do so."""
|
|
27
|
+
if self.reset_after_each_run:
|
|
28
|
+
self.total_cost = 0.0
|
|
29
|
+
|
|
30
|
+
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
31
|
+
"""Track usage costs."""
|
|
32
|
+
if "response_cost" in usage:
|
|
33
|
+
self.total_cost += usage["response_cost"]
|
|
34
|
+
|
|
35
|
+
async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
|
|
36
|
+
"""Check if budget allows continuation."""
|
|
37
|
+
if self.total_cost >= self.max_budget:
|
|
38
|
+
if self.raise_error:
|
|
39
|
+
raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
40
|
+
else:
|
|
41
|
+
print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
42
|
+
return False
|
|
43
|
+
return True
|
|
44
44
|
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Telemetry callback handler for Computer-Use Agent (cua-agent)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from typing import List, Dict, Any, Optional, Union
|
|
8
|
+
|
|
9
|
+
from .base import AsyncCallbackHandler
|
|
10
|
+
from ..telemetry import (
|
|
11
|
+
record_event,
|
|
12
|
+
is_telemetry_enabled,
|
|
13
|
+
set_dimension,
|
|
14
|
+
SYSTEM_INFO,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TelemetryCallback(AsyncCallbackHandler):
|
|
19
|
+
"""
|
|
20
|
+
Telemetry callback handler for Computer-Use Agent (cua-agent)
|
|
21
|
+
|
|
22
|
+
Tracks agent usage, performance metrics, and optionally trajectory data.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
agent,
|
|
28
|
+
log_trajectory: bool = False
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Initialize telemetry callback.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
agent: The ComputerAgent instance
|
|
35
|
+
log_trajectory: Whether to log full trajectory items (opt-in)
|
|
36
|
+
"""
|
|
37
|
+
self.agent = agent
|
|
38
|
+
self.log_trajectory = log_trajectory
|
|
39
|
+
|
|
40
|
+
# Generate session/run IDs
|
|
41
|
+
self.session_id = str(uuid.uuid4())
|
|
42
|
+
self.run_id = None
|
|
43
|
+
|
|
44
|
+
# Track timing and metrics
|
|
45
|
+
self.run_start_time = None
|
|
46
|
+
self.step_count = 0
|
|
47
|
+
self.step_start_time = None
|
|
48
|
+
self.total_usage = {
|
|
49
|
+
"prompt_tokens": 0,
|
|
50
|
+
"completion_tokens": 0,
|
|
51
|
+
"total_tokens": 0,
|
|
52
|
+
"response_cost": 0.0
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Record agent initialization
|
|
56
|
+
if is_telemetry_enabled():
|
|
57
|
+
self._record_agent_initialization()
|
|
58
|
+
|
|
59
|
+
def _record_agent_initialization(self) -> None:
|
|
60
|
+
"""Record agent type/model and session initialization."""
|
|
61
|
+
agent_info = {
|
|
62
|
+
"session_id": self.session_id,
|
|
63
|
+
"agent_type": self.agent.agent_loop.__name__ if hasattr(self.agent, 'agent_loop') else 'unknown',
|
|
64
|
+
"model": getattr(self.agent, 'model', 'unknown'),
|
|
65
|
+
**SYSTEM_INFO
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# Set session-level dimensions
|
|
69
|
+
set_dimension("session_id", self.session_id)
|
|
70
|
+
set_dimension("agent_type", agent_info["agent_type"])
|
|
71
|
+
set_dimension("model", agent_info["model"])
|
|
72
|
+
|
|
73
|
+
record_event("agent_session_start", agent_info)
|
|
74
|
+
|
|
75
|
+
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
76
|
+
"""Called at the start of an agent run loop."""
|
|
77
|
+
if not is_telemetry_enabled():
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
self.run_id = str(uuid.uuid4())
|
|
81
|
+
self.run_start_time = time.time()
|
|
82
|
+
self.step_count = 0
|
|
83
|
+
|
|
84
|
+
# Calculate input context size
|
|
85
|
+
input_context_size = self._calculate_context_size(old_items)
|
|
86
|
+
|
|
87
|
+
run_data = {
|
|
88
|
+
"session_id": self.session_id,
|
|
89
|
+
"run_id": self.run_id,
|
|
90
|
+
"start_time": self.run_start_time,
|
|
91
|
+
"input_context_size": input_context_size,
|
|
92
|
+
"num_existing_messages": len(old_items)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Log trajectory if opted in
|
|
96
|
+
if self.log_trajectory:
|
|
97
|
+
trajectory = self._extract_trajectory(old_items)
|
|
98
|
+
if trajectory:
|
|
99
|
+
run_data["uploaded_trajectory"] = trajectory
|
|
100
|
+
|
|
101
|
+
set_dimension("run_id", self.run_id)
|
|
102
|
+
record_event("agent_run_start", run_data)
|
|
103
|
+
|
|
104
|
+
async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
|
|
105
|
+
"""Called at the end of an agent run loop."""
|
|
106
|
+
if not is_telemetry_enabled() or not self.run_start_time:
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
run_duration = time.time() - self.run_start_time
|
|
110
|
+
|
|
111
|
+
run_data = {
|
|
112
|
+
"session_id": self.session_id,
|
|
113
|
+
"run_id": self.run_id,
|
|
114
|
+
"end_time": time.time(),
|
|
115
|
+
"duration_seconds": run_duration,
|
|
116
|
+
"num_steps": self.step_count,
|
|
117
|
+
"total_usage": self.total_usage.copy()
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# Log trajectory if opted in
|
|
121
|
+
if self.log_trajectory:
|
|
122
|
+
trajectory = self._extract_trajectory(new_items)
|
|
123
|
+
if trajectory:
|
|
124
|
+
run_data["uploaded_trajectory"] = trajectory
|
|
125
|
+
|
|
126
|
+
record_event("agent_run_end", run_data)
|
|
127
|
+
|
|
128
|
+
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
129
|
+
"""Called when usage information is received."""
|
|
130
|
+
if not is_telemetry_enabled():
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
# Accumulate usage stats
|
|
134
|
+
self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
|
|
135
|
+
self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
|
|
136
|
+
self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
|
|
137
|
+
self.total_usage["response_cost"] += usage.get("response_cost", 0.0)
|
|
138
|
+
|
|
139
|
+
# Record individual usage event
|
|
140
|
+
usage_data = {
|
|
141
|
+
"session_id": self.session_id,
|
|
142
|
+
"run_id": self.run_id,
|
|
143
|
+
"step": self.step_count,
|
|
144
|
+
**usage
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
record_event("agent_usage", usage_data)
|
|
148
|
+
|
|
149
|
+
async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
|
|
150
|
+
"""Called when responses are received."""
|
|
151
|
+
if not is_telemetry_enabled():
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
self.step_count += 1
|
|
155
|
+
step_duration = None
|
|
156
|
+
|
|
157
|
+
if self.step_start_time:
|
|
158
|
+
step_duration = time.time() - self.step_start_time
|
|
159
|
+
|
|
160
|
+
self.step_start_time = time.time()
|
|
161
|
+
|
|
162
|
+
step_data = {
|
|
163
|
+
"session_id": self.session_id,
|
|
164
|
+
"run_id": self.run_id,
|
|
165
|
+
"step": self.step_count,
|
|
166
|
+
"timestamp": self.step_start_time
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if step_duration is not None:
|
|
170
|
+
step_data["duration_seconds"] = step_duration
|
|
171
|
+
|
|
172
|
+
record_event("agent_step", step_data)
|
|
173
|
+
|
|
174
|
+
def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
|
|
175
|
+
"""Calculate approximate context size in tokens/characters."""
|
|
176
|
+
total_size = 0
|
|
177
|
+
|
|
178
|
+
for item in items:
|
|
179
|
+
if item.get("type") == "message" and "content" in item:
|
|
180
|
+
content = item["content"]
|
|
181
|
+
if isinstance(content, str):
|
|
182
|
+
total_size += len(content)
|
|
183
|
+
elif isinstance(content, list):
|
|
184
|
+
for part in content:
|
|
185
|
+
if isinstance(part, dict) and "text" in part:
|
|
186
|
+
total_size += len(part["text"])
|
|
187
|
+
elif "content" in item and isinstance(item["content"], str):
|
|
188
|
+
total_size += len(item["content"])
|
|
189
|
+
|
|
190
|
+
return total_size
|
|
191
|
+
|
|
192
|
+
def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
193
|
+
"""Extract trajectory items that should be logged."""
|
|
194
|
+
trajectory = []
|
|
195
|
+
|
|
196
|
+
for item in items:
|
|
197
|
+
# Include user messages, assistant messages, reasoning, computer calls, and computer outputs
|
|
198
|
+
if (
|
|
199
|
+
item.get("role") == "user" or # User inputs
|
|
200
|
+
(item.get("type") == "message" and item.get("role") == "assistant") or # Model outputs
|
|
201
|
+
item.get("type") == "reasoning" or # Reasoning traces
|
|
202
|
+
item.get("type") == "computer_call" or # Computer actions
|
|
203
|
+
item.get("type") == "computer_call_output" # Computer outputs
|
|
204
|
+
):
|
|
205
|
+
# Create a copy of the item with timestamp
|
|
206
|
+
trajectory_item = item.copy()
|
|
207
|
+
trajectory_item["logged_at"] = time.time()
|
|
208
|
+
trajectory.append(trajectory_item)
|
|
209
|
+
|
|
210
|
+
return trajectory
|
|
@@ -92,26 +92,30 @@ def print_welcome(model: str, agent_loop: str, container_name: str):
|
|
|
92
92
|
async def ainput(prompt: str = ""):
|
|
93
93
|
return await asyncio.to_thread(input, prompt)
|
|
94
94
|
|
|
95
|
-
async def chat_loop(agent, model: str, container_name: str):
|
|
95
|
+
async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = ""):
|
|
96
96
|
"""Main chat loop with the agent."""
|
|
97
97
|
print_welcome(model, agent.agent_loop.__name__, container_name)
|
|
98
98
|
|
|
99
99
|
history = []
|
|
100
100
|
|
|
101
|
+
if initial_prompt:
|
|
102
|
+
history.append({"role": "user", "content": initial_prompt})
|
|
103
|
+
|
|
101
104
|
while True:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
if user_input.lower() in ['exit', 'quit', 'q']:
|
|
107
|
-
print_colored("\n👋 Goodbye!")
|
|
108
|
-
break
|
|
105
|
+
if history[-1].get("role") != "user":
|
|
106
|
+
# Get user input with prompt
|
|
107
|
+
print_colored("> ", end="")
|
|
108
|
+
user_input = await ainput()
|
|
109
109
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
110
|
+
if user_input.lower() in ['exit', 'quit', 'q']:
|
|
111
|
+
print_colored("\n👋 Goodbye!")
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
if not user_input:
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
# Add user message to history
|
|
118
|
+
history.append({"role": "user", "content": user_input})
|
|
115
119
|
|
|
116
120
|
# Stream responses from the agent with spinner
|
|
117
121
|
with yaspin(text="Thinking...", spinner="line", attrs=["dark"]) as spinner:
|
|
@@ -204,6 +208,12 @@ Examples:
|
|
|
204
208
|
action="store_true",
|
|
205
209
|
help="Enable verbose logging"
|
|
206
210
|
)
|
|
211
|
+
|
|
212
|
+
parser.add_argument(
|
|
213
|
+
"-p", "--prompt",
|
|
214
|
+
type=str,
|
|
215
|
+
help="Initial prompt to send to the agent. Leave blank for interactive mode."
|
|
216
|
+
)
|
|
207
217
|
|
|
208
218
|
args = parser.parse_args()
|
|
209
219
|
|
|
@@ -269,9 +279,11 @@ Examples:
|
|
|
269
279
|
agent_kwargs = {
|
|
270
280
|
"model": args.model,
|
|
271
281
|
"tools": [computer],
|
|
272
|
-
"only_n_most_recent_images": args.images,
|
|
273
282
|
"verbosity": 20 if args.verbose else 30, # DEBUG vs WARNING
|
|
274
283
|
}
|
|
284
|
+
|
|
285
|
+
if args.images > 0:
|
|
286
|
+
agent_kwargs["only_n_most_recent_images"] = args.images
|
|
275
287
|
|
|
276
288
|
if args.trajectory:
|
|
277
289
|
agent_kwargs["trajectory_dir"] = "trajectories"
|
|
@@ -286,7 +298,7 @@ Examples:
|
|
|
286
298
|
agent = ComputerAgent(**agent_kwargs)
|
|
287
299
|
|
|
288
300
|
# Start chat loop
|
|
289
|
-
await chat_loop(agent, args.model, container_name)
|
|
301
|
+
await chat_loop(agent, args.model, container_name, args.prompt)
|
|
290
302
|
|
|
291
303
|
|
|
292
304
|
|