cua-agent 0.4.0b4__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/PKG-INFO +1 -1
- cua_agent-0.4.1/agent/__init__.py +64 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/agent.py +18 -1
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/callbacks/__init__.py +2 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/callbacks/budget_manager.py +43 -43
- cua_agent-0.4.1/agent/callbacks/telemetry.py +210 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/responses.py +207 -207
- cua_agent-0.4.1/agent/telemetry.py +142 -0
- cua_agent-0.4.1/agent/ui/__init__.py +7 -0
- cua_agent-0.4.1/agent/ui/__main__.py +4 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/ui/gradio/__init__.py +2 -2
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/ui/gradio/app.py +19 -19
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/ui/gradio/ui_components.py +28 -10
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/pyproject.toml +1 -1
- cua_agent-0.4.0b4/agent/__init__.py +0 -19
- cua_agent-0.4.0b4/agent/ui/__init__.py +0 -7
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/README.md +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/__main__.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/adapters/__init__.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/adapters/huggingfacelocal_adapter.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/callbacks/base.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/callbacks/image_retention.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/callbacks/logging.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/callbacks/pii_anonymization.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/callbacks/trajectory_saver.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/cli.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/computer_handler.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/decorators.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/loops/__init__.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/loops/anthropic.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/loops/omniparser.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/loops/openai.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/loops/uitars.py +0 -0
- {cua_agent-0.4.0b4 → cua_agent-0.4.1}/agent/types.py +0 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""
|
|
2
|
+
agent - Decorator-based Computer Use Agent with liteLLM integration
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .decorators import agent_loop
|
|
9
|
+
from .agent import ComputerAgent
|
|
10
|
+
from .types import Messages, AgentResponse
|
|
11
|
+
|
|
12
|
+
# Import loops to register them
|
|
13
|
+
from . import loops
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"agent_loop",
|
|
17
|
+
"ComputerAgent",
|
|
18
|
+
"Messages",
|
|
19
|
+
"AgentResponse"
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
__version__ = "0.4.0"
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Initialize telemetry when the package is imported
|
|
27
|
+
try:
|
|
28
|
+
# Import from core telemetry for basic functions
|
|
29
|
+
from core.telemetry import (
|
|
30
|
+
is_telemetry_enabled,
|
|
31
|
+
flush,
|
|
32
|
+
record_event,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Import set_dimension from our own telemetry module
|
|
36
|
+
from .telemetry import set_dimension
|
|
37
|
+
|
|
38
|
+
# Check if telemetry is enabled
|
|
39
|
+
if is_telemetry_enabled():
|
|
40
|
+
logger.info("Telemetry is enabled")
|
|
41
|
+
|
|
42
|
+
# Record package initialization
|
|
43
|
+
record_event(
|
|
44
|
+
"module_init",
|
|
45
|
+
{
|
|
46
|
+
"module": "agent",
|
|
47
|
+
"version": __version__,
|
|
48
|
+
"python_version": sys.version,
|
|
49
|
+
},
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Set the package version as a dimension
|
|
53
|
+
set_dimension("agent_version", __version__)
|
|
54
|
+
|
|
55
|
+
# Flush events to ensure they're sent
|
|
56
|
+
flush()
|
|
57
|
+
else:
|
|
58
|
+
logger.info("Telemetry is disabled")
|
|
59
|
+
except ImportError as e:
|
|
60
|
+
# Telemetry not available
|
|
61
|
+
logger.warning(f"Telemetry not available: {e}")
|
|
62
|
+
except Exception as e:
|
|
63
|
+
# Other issues with telemetry
|
|
64
|
+
logger.warning(f"Error initializing telemetry: {e}")
|
|
@@ -6,6 +6,7 @@ import asyncio
|
|
|
6
6
|
from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Callable, Set
|
|
7
7
|
|
|
8
8
|
from litellm.responses.utils import Usage
|
|
9
|
+
|
|
9
10
|
from .types import Messages, Computer
|
|
10
11
|
from .decorators import find_agent_loop
|
|
11
12
|
from .computer_handler import OpenAIComputerHandler, acknowledge_safety_check_callback, check_blocklisted_url
|
|
@@ -14,7 +15,13 @@ import litellm
|
|
|
14
15
|
import litellm.utils
|
|
15
16
|
import inspect
|
|
16
17
|
from .adapters import HuggingFaceLocalAdapter
|
|
17
|
-
from .callbacks import
|
|
18
|
+
from .callbacks import (
|
|
19
|
+
ImageRetentionCallback,
|
|
20
|
+
LoggingCallback,
|
|
21
|
+
TrajectorySaverCallback,
|
|
22
|
+
BudgetManagerCallback,
|
|
23
|
+
TelemetryCallback,
|
|
24
|
+
)
|
|
18
25
|
|
|
19
26
|
def get_json(obj: Any, max_depth: int = 10) -> Any:
|
|
20
27
|
def custom_serializer(o: Any, depth: int = 0, seen: Set[int] = None) -> Any:
|
|
@@ -129,6 +136,7 @@ class ComputerAgent:
|
|
|
129
136
|
screenshot_delay: Optional[float | int] = 0.5,
|
|
130
137
|
use_prompt_caching: Optional[bool] = False,
|
|
131
138
|
max_trajectory_budget: Optional[float | dict] = None,
|
|
139
|
+
telemetry_enabled: Optional[bool] = True,
|
|
132
140
|
**kwargs
|
|
133
141
|
):
|
|
134
142
|
"""
|
|
@@ -146,6 +154,7 @@ class ComputerAgent:
|
|
|
146
154
|
screenshot_delay: Delay before screenshots in seconds
|
|
147
155
|
use_prompt_caching: If set, use prompt caching to avoid reprocessing the same prompt. Intended for use with anthropic providers.
|
|
148
156
|
max_trajectory_budget: If set, adds BudgetManagerCallback to track usage costs and stop when budget is exceeded
|
|
157
|
+
telemetry_enabled: If set, adds TelemetryCallback to track anonymized usage data. Enabled by default.
|
|
149
158
|
**kwargs: Additional arguments passed to the agent loop
|
|
150
159
|
"""
|
|
151
160
|
self.model = model
|
|
@@ -158,10 +167,18 @@ class ComputerAgent:
|
|
|
158
167
|
self.max_retries = max_retries
|
|
159
168
|
self.screenshot_delay = screenshot_delay
|
|
160
169
|
self.use_prompt_caching = use_prompt_caching
|
|
170
|
+
self.telemetry_enabled = telemetry_enabled
|
|
161
171
|
self.kwargs = kwargs
|
|
162
172
|
|
|
163
173
|
# == Add built-in callbacks ==
|
|
164
174
|
|
|
175
|
+
# Add telemetry callback if telemetry_enabled is set
|
|
176
|
+
if self.telemetry_enabled:
|
|
177
|
+
if isinstance(self.telemetry_enabled, bool):
|
|
178
|
+
self.callbacks.append(TelemetryCallback(self))
|
|
179
|
+
else:
|
|
180
|
+
self.callbacks.append(TelemetryCallback(self, **self.telemetry_enabled))
|
|
181
|
+
|
|
165
182
|
# Add logging callback if verbosity is set
|
|
166
183
|
if self.verbosity is not None:
|
|
167
184
|
self.callbacks.append(LoggingCallback(level=self.verbosity))
|
|
@@ -7,6 +7,7 @@ from .image_retention import ImageRetentionCallback
|
|
|
7
7
|
from .logging import LoggingCallback
|
|
8
8
|
from .trajectory_saver import TrajectorySaverCallback
|
|
9
9
|
from .budget_manager import BudgetManagerCallback
|
|
10
|
+
from .telemetry import TelemetryCallback
|
|
10
11
|
|
|
11
12
|
__all__ = [
|
|
12
13
|
"AsyncCallbackHandler",
|
|
@@ -14,4 +15,5 @@ __all__ = [
|
|
|
14
15
|
"LoggingCallback",
|
|
15
16
|
"TrajectorySaverCallback",
|
|
16
17
|
"BudgetManagerCallback",
|
|
18
|
+
"TelemetryCallback",
|
|
17
19
|
]
|
|
@@ -1,44 +1,44 @@
|
|
|
1
|
-
from typing import Dict, List, Any
|
|
2
|
-
from .base import AsyncCallbackHandler
|
|
3
|
-
|
|
4
|
-
class BudgetExceededError(Exception):
|
|
5
|
-
"""Exception raised when budget is exceeded."""
|
|
6
|
-
pass
|
|
7
|
-
|
|
8
|
-
class BudgetManagerCallback(AsyncCallbackHandler):
|
|
9
|
-
"""Budget manager callback that tracks usage costs and can stop execution when budget is exceeded."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False):
|
|
12
|
-
"""
|
|
13
|
-
Initialize BudgetManagerCallback.
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
max_budget: Maximum budget allowed
|
|
17
|
-
reset_after_each_run: Whether to reset budget after each run
|
|
18
|
-
raise_error: Whether to raise an error when budget is exceeded
|
|
19
|
-
"""
|
|
20
|
-
self.max_budget = max_budget
|
|
21
|
-
self.reset_after_each_run = reset_after_each_run
|
|
22
|
-
self.raise_error = raise_error
|
|
23
|
-
self.total_cost = 0.0
|
|
24
|
-
|
|
25
|
-
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
26
|
-
"""Reset budget if configured to do so."""
|
|
27
|
-
if self.reset_after_each_run:
|
|
28
|
-
self.total_cost = 0.0
|
|
29
|
-
|
|
30
|
-
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
31
|
-
"""Track usage costs."""
|
|
32
|
-
if "response_cost" in usage:
|
|
33
|
-
self.total_cost += usage["response_cost"]
|
|
34
|
-
|
|
35
|
-
async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
|
|
36
|
-
"""Check if budget allows continuation."""
|
|
37
|
-
if self.total_cost >= self.max_budget:
|
|
38
|
-
if self.raise_error:
|
|
39
|
-
raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
40
|
-
else:
|
|
41
|
-
print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
42
|
-
return False
|
|
43
|
-
return True
|
|
1
|
+
from typing import Dict, List, Any
|
|
2
|
+
from .base import AsyncCallbackHandler
|
|
3
|
+
|
|
4
|
+
class BudgetExceededError(Exception):
|
|
5
|
+
"""Exception raised when budget is exceeded."""
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
class BudgetManagerCallback(AsyncCallbackHandler):
|
|
9
|
+
"""Budget manager callback that tracks usage costs and can stop execution when budget is exceeded."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, max_budget: float, reset_after_each_run: bool = True, raise_error: bool = False):
|
|
12
|
+
"""
|
|
13
|
+
Initialize BudgetManagerCallback.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
max_budget: Maximum budget allowed
|
|
17
|
+
reset_after_each_run: Whether to reset budget after each run
|
|
18
|
+
raise_error: Whether to raise an error when budget is exceeded
|
|
19
|
+
"""
|
|
20
|
+
self.max_budget = max_budget
|
|
21
|
+
self.reset_after_each_run = reset_after_each_run
|
|
22
|
+
self.raise_error = raise_error
|
|
23
|
+
self.total_cost = 0.0
|
|
24
|
+
|
|
25
|
+
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
26
|
+
"""Reset budget if configured to do so."""
|
|
27
|
+
if self.reset_after_each_run:
|
|
28
|
+
self.total_cost = 0.0
|
|
29
|
+
|
|
30
|
+
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
31
|
+
"""Track usage costs."""
|
|
32
|
+
if "response_cost" in usage:
|
|
33
|
+
self.total_cost += usage["response_cost"]
|
|
34
|
+
|
|
35
|
+
async def on_run_continue(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> bool:
|
|
36
|
+
"""Check if budget allows continuation."""
|
|
37
|
+
if self.total_cost >= self.max_budget:
|
|
38
|
+
if self.raise_error:
|
|
39
|
+
raise BudgetExceededError(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
40
|
+
else:
|
|
41
|
+
print(f"Budget exceeded: ${self.total_cost} >= ${self.max_budget}")
|
|
42
|
+
return False
|
|
43
|
+
return True
|
|
44
44
|
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Telemetry callback handler for Computer-Use Agent (cua-agent)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from typing import List, Dict, Any, Optional, Union
|
|
8
|
+
|
|
9
|
+
from .base import AsyncCallbackHandler
|
|
10
|
+
from ..telemetry import (
|
|
11
|
+
record_event,
|
|
12
|
+
is_telemetry_enabled,
|
|
13
|
+
set_dimension,
|
|
14
|
+
SYSTEM_INFO,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TelemetryCallback(AsyncCallbackHandler):
|
|
19
|
+
"""
|
|
20
|
+
Telemetry callback handler for Computer-Use Agent (cua-agent)
|
|
21
|
+
|
|
22
|
+
Tracks agent usage, performance metrics, and optionally trajectory data.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
agent,
|
|
28
|
+
log_trajectory: bool = False
|
|
29
|
+
):
|
|
30
|
+
"""
|
|
31
|
+
Initialize telemetry callback.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
agent: The ComputerAgent instance
|
|
35
|
+
log_trajectory: Whether to log full trajectory items (opt-in)
|
|
36
|
+
"""
|
|
37
|
+
self.agent = agent
|
|
38
|
+
self.log_trajectory = log_trajectory
|
|
39
|
+
|
|
40
|
+
# Generate session/run IDs
|
|
41
|
+
self.session_id = str(uuid.uuid4())
|
|
42
|
+
self.run_id = None
|
|
43
|
+
|
|
44
|
+
# Track timing and metrics
|
|
45
|
+
self.run_start_time = None
|
|
46
|
+
self.step_count = 0
|
|
47
|
+
self.step_start_time = None
|
|
48
|
+
self.total_usage = {
|
|
49
|
+
"prompt_tokens": 0,
|
|
50
|
+
"completion_tokens": 0,
|
|
51
|
+
"total_tokens": 0,
|
|
52
|
+
"response_cost": 0.0
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Record agent initialization
|
|
56
|
+
if is_telemetry_enabled():
|
|
57
|
+
self._record_agent_initialization()
|
|
58
|
+
|
|
59
|
+
def _record_agent_initialization(self) -> None:
|
|
60
|
+
"""Record agent type/model and session initialization."""
|
|
61
|
+
agent_info = {
|
|
62
|
+
"session_id": self.session_id,
|
|
63
|
+
"agent_type": self.agent.agent_loop.__name__,
|
|
64
|
+
"model": getattr(self.agent, 'model', 'unknown'),
|
|
65
|
+
**SYSTEM_INFO
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# Set session-level dimensions
|
|
69
|
+
set_dimension("session_id", self.session_id)
|
|
70
|
+
set_dimension("agent_type", agent_info["agent_type"])
|
|
71
|
+
set_dimension("model", agent_info["model"])
|
|
72
|
+
|
|
73
|
+
record_event("agent_session_start", agent_info)
|
|
74
|
+
|
|
75
|
+
async def on_run_start(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]]) -> None:
|
|
76
|
+
"""Called at the start of an agent run loop."""
|
|
77
|
+
if not is_telemetry_enabled():
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
self.run_id = str(uuid.uuid4())
|
|
81
|
+
self.run_start_time = time.time()
|
|
82
|
+
self.step_count = 0
|
|
83
|
+
|
|
84
|
+
# Calculate input context size
|
|
85
|
+
input_context_size = self._calculate_context_size(old_items)
|
|
86
|
+
|
|
87
|
+
run_data = {
|
|
88
|
+
"session_id": self.session_id,
|
|
89
|
+
"run_id": self.run_id,
|
|
90
|
+
"start_time": self.run_start_time,
|
|
91
|
+
"input_context_size": input_context_size,
|
|
92
|
+
"num_existing_messages": len(old_items)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Log trajectory if opted in
|
|
96
|
+
if self.log_trajectory:
|
|
97
|
+
trajectory = self._extract_trajectory(old_items)
|
|
98
|
+
if trajectory:
|
|
99
|
+
run_data["uploaded_trajectory"] = trajectory
|
|
100
|
+
|
|
101
|
+
set_dimension("run_id", self.run_id)
|
|
102
|
+
record_event("agent_run_start", run_data)
|
|
103
|
+
|
|
104
|
+
async def on_run_end(self, kwargs: Dict[str, Any], old_items: List[Dict[str, Any]], new_items: List[Dict[str, Any]]) -> None:
|
|
105
|
+
"""Called at the end of an agent run loop."""
|
|
106
|
+
if not is_telemetry_enabled() or not self.run_start_time:
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
run_duration = time.time() - self.run_start_time
|
|
110
|
+
|
|
111
|
+
run_data = {
|
|
112
|
+
"session_id": self.session_id,
|
|
113
|
+
"run_id": self.run_id,
|
|
114
|
+
"end_time": time.time(),
|
|
115
|
+
"duration_seconds": run_duration,
|
|
116
|
+
"num_steps": self.step_count,
|
|
117
|
+
"total_usage": self.total_usage.copy()
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# Log trajectory if opted in
|
|
121
|
+
if self.log_trajectory:
|
|
122
|
+
trajectory = self._extract_trajectory(new_items)
|
|
123
|
+
if trajectory:
|
|
124
|
+
run_data["uploaded_trajectory"] = trajectory
|
|
125
|
+
|
|
126
|
+
record_event("agent_run_end", run_data)
|
|
127
|
+
|
|
128
|
+
async def on_usage(self, usage: Dict[str, Any]) -> None:
|
|
129
|
+
"""Called when usage information is received."""
|
|
130
|
+
if not is_telemetry_enabled():
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
# Accumulate usage stats
|
|
134
|
+
self.total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
|
|
135
|
+
self.total_usage["completion_tokens"] += usage.get("completion_tokens", 0)
|
|
136
|
+
self.total_usage["total_tokens"] += usage.get("total_tokens", 0)
|
|
137
|
+
self.total_usage["response_cost"] += usage.get("response_cost", 0.0)
|
|
138
|
+
|
|
139
|
+
# Record individual usage event
|
|
140
|
+
usage_data = {
|
|
141
|
+
"session_id": self.session_id,
|
|
142
|
+
"run_id": self.run_id,
|
|
143
|
+
"step": self.step_count,
|
|
144
|
+
**usage
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
record_event("agent_usage", usage_data)
|
|
148
|
+
|
|
149
|
+
async def on_responses(self, kwargs: Dict[str, Any], responses: Dict[str, Any]) -> None:
|
|
150
|
+
"""Called when responses are received."""
|
|
151
|
+
if not is_telemetry_enabled():
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
self.step_count += 1
|
|
155
|
+
step_duration = None
|
|
156
|
+
|
|
157
|
+
if self.step_start_time:
|
|
158
|
+
step_duration = time.time() - self.step_start_time
|
|
159
|
+
|
|
160
|
+
self.step_start_time = time.time()
|
|
161
|
+
|
|
162
|
+
step_data = {
|
|
163
|
+
"session_id": self.session_id,
|
|
164
|
+
"run_id": self.run_id,
|
|
165
|
+
"step": self.step_count,
|
|
166
|
+
"timestamp": self.step_start_time
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if step_duration is not None:
|
|
170
|
+
step_data["duration_seconds"] = step_duration
|
|
171
|
+
|
|
172
|
+
record_event("agent_step", step_data)
|
|
173
|
+
|
|
174
|
+
def _calculate_context_size(self, items: List[Dict[str, Any]]) -> int:
|
|
175
|
+
"""Calculate approximate context size in tokens/characters."""
|
|
176
|
+
total_size = 0
|
|
177
|
+
|
|
178
|
+
for item in items:
|
|
179
|
+
if item.get("type") == "message" and "content" in item:
|
|
180
|
+
content = item["content"]
|
|
181
|
+
if isinstance(content, str):
|
|
182
|
+
total_size += len(content)
|
|
183
|
+
elif isinstance(content, list):
|
|
184
|
+
for part in content:
|
|
185
|
+
if isinstance(part, dict) and "text" in part:
|
|
186
|
+
total_size += len(part["text"])
|
|
187
|
+
elif "content" in item and isinstance(item["content"], str):
|
|
188
|
+
total_size += len(item["content"])
|
|
189
|
+
|
|
190
|
+
return total_size
|
|
191
|
+
|
|
192
|
+
def _extract_trajectory(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
193
|
+
"""Extract trajectory items that should be logged."""
|
|
194
|
+
trajectory = []
|
|
195
|
+
|
|
196
|
+
for item in items:
|
|
197
|
+
# Include user messages, assistant messages, reasoning, computer calls, and computer outputs
|
|
198
|
+
if (
|
|
199
|
+
item.get("role") == "user" or # User inputs
|
|
200
|
+
(item.get("type") == "message" and item.get("role") == "assistant") or # Model outputs
|
|
201
|
+
item.get("type") == "reasoning" or # Reasoning traces
|
|
202
|
+
item.get("type") == "computer_call" or # Computer actions
|
|
203
|
+
item.get("type") == "computer_call_output" # Computer outputs
|
|
204
|
+
):
|
|
205
|
+
# Create a copy of the item with timestamp
|
|
206
|
+
trajectory_item = item.copy()
|
|
207
|
+
trajectory_item["logged_at"] = time.time()
|
|
208
|
+
trajectory.append(trajectory_item)
|
|
209
|
+
|
|
210
|
+
return trajectory
|
|
@@ -1,207 +1,207 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Functions for making various Responses API items from different types of responses.
|
|
3
|
-
Based on the OpenAI spec for Responses API items.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import base64
|
|
7
|
-
import json
|
|
8
|
-
import uuid
|
|
9
|
-
from typing import List, Dict, Any, Literal, Union, Optional
|
|
10
|
-
|
|
11
|
-
from openai.types.responses.response_computer_tool_call_param import (
|
|
12
|
-
ResponseComputerToolCallParam,
|
|
13
|
-
ActionClick,
|
|
14
|
-
ActionDoubleClick,
|
|
15
|
-
ActionDrag,
|
|
16
|
-
ActionDragPath,
|
|
17
|
-
ActionKeypress,
|
|
18
|
-
ActionMove,
|
|
19
|
-
ActionScreenshot,
|
|
20
|
-
ActionScroll,
|
|
21
|
-
ActionType as ActionTypeAction,
|
|
22
|
-
ActionWait,
|
|
23
|
-
PendingSafetyCheck
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
from openai.types.responses.response_function_tool_call_param import ResponseFunctionToolCallParam
|
|
27
|
-
from openai.types.responses.response_output_text_param import ResponseOutputTextParam
|
|
28
|
-
from openai.types.responses.response_reasoning_item_param import ResponseReasoningItemParam, Summary
|
|
29
|
-
from openai.types.responses.response_output_message_param import ResponseOutputMessageParam
|
|
30
|
-
from openai.types.responses.easy_input_message_param import EasyInputMessageParam
|
|
31
|
-
from openai.types.responses.response_input_image_param import ResponseInputImageParam
|
|
32
|
-
|
|
33
|
-
def random_id():
|
|
34
|
-
return str(uuid.uuid4())
|
|
35
|
-
|
|
36
|
-
# User message items
|
|
37
|
-
def make_input_image_item(image_data: Union[str, bytes]) -> EasyInputMessageParam:
|
|
38
|
-
return EasyInputMessageParam(
|
|
39
|
-
content=[
|
|
40
|
-
ResponseInputImageParam(
|
|
41
|
-
type="input_image",
|
|
42
|
-
image_url=f"data:image/png;base64,{base64.b64encode(image_data).decode('utf-8') if isinstance(image_data, bytes) else image_data}"
|
|
43
|
-
)
|
|
44
|
-
],
|
|
45
|
-
role="user",
|
|
46
|
-
type="message"
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
# Text items
|
|
50
|
-
def make_reasoning_item(reasoning: str) -> ResponseReasoningItemParam:
|
|
51
|
-
return ResponseReasoningItemParam(
|
|
52
|
-
id=random_id(),
|
|
53
|
-
summary=[
|
|
54
|
-
Summary(text=reasoning, type="summary_text")
|
|
55
|
-
],
|
|
56
|
-
type="reasoning"
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
def make_output_text_item(content: str) -> ResponseOutputMessageParam:
|
|
60
|
-
return ResponseOutputMessageParam(
|
|
61
|
-
id=random_id(),
|
|
62
|
-
content=[
|
|
63
|
-
ResponseOutputTextParam(
|
|
64
|
-
text=content,
|
|
65
|
-
type="output_text",
|
|
66
|
-
annotations=[]
|
|
67
|
-
)
|
|
68
|
-
],
|
|
69
|
-
role="assistant",
|
|
70
|
-
status="completed",
|
|
71
|
-
type="message"
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
# Function call items
|
|
75
|
-
def make_function_call_item(function_name: str, arguments: Dict[str, Any], call_id: Optional[str] = None) -> ResponseFunctionToolCallParam:
|
|
76
|
-
return ResponseFunctionToolCallParam(
|
|
77
|
-
id=random_id(),
|
|
78
|
-
call_id=call_id if call_id else random_id(),
|
|
79
|
-
name=function_name,
|
|
80
|
-
arguments=json.dumps(arguments),
|
|
81
|
-
status="completed",
|
|
82
|
-
type="function_call"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
# Computer tool call items
|
|
86
|
-
def make_click_item(x: int, y: int, button: Literal["left", "right", "wheel", "back", "forward"] = "left", call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
87
|
-
return ResponseComputerToolCallParam(
|
|
88
|
-
id=random_id(),
|
|
89
|
-
call_id=call_id if call_id else random_id(),
|
|
90
|
-
action=ActionClick(
|
|
91
|
-
button=button,
|
|
92
|
-
type="click",
|
|
93
|
-
x=x,
|
|
94
|
-
y=y
|
|
95
|
-
),
|
|
96
|
-
pending_safety_checks=[],
|
|
97
|
-
status="completed",
|
|
98
|
-
type="computer_call"
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
def make_double_click_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
102
|
-
return ResponseComputerToolCallParam(
|
|
103
|
-
id=random_id(),
|
|
104
|
-
call_id=call_id if call_id else random_id(),
|
|
105
|
-
action=ActionDoubleClick(
|
|
106
|
-
type="double_click",
|
|
107
|
-
x=x,
|
|
108
|
-
y=y
|
|
109
|
-
),
|
|
110
|
-
pending_safety_checks=[],
|
|
111
|
-
status="completed",
|
|
112
|
-
type="computer_call"
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
def make_drag_item(path: List[Dict[str, int]], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
116
|
-
drag_path = [ActionDragPath(x=point["x"], y=point["y"]) for point in path]
|
|
117
|
-
return ResponseComputerToolCallParam(
|
|
118
|
-
id=random_id(),
|
|
119
|
-
call_id=call_id if call_id else random_id(),
|
|
120
|
-
action=ActionDrag(
|
|
121
|
-
path=drag_path,
|
|
122
|
-
type="drag"
|
|
123
|
-
),
|
|
124
|
-
pending_safety_checks=[],
|
|
125
|
-
status="completed",
|
|
126
|
-
type="computer_call"
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
def make_keypress_item(keys: List[str], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
130
|
-
return ResponseComputerToolCallParam(
|
|
131
|
-
id=random_id(),
|
|
132
|
-
call_id=call_id if call_id else random_id(),
|
|
133
|
-
action=ActionKeypress(
|
|
134
|
-
keys=keys,
|
|
135
|
-
type="keypress"
|
|
136
|
-
),
|
|
137
|
-
pending_safety_checks=[],
|
|
138
|
-
status="completed",
|
|
139
|
-
type="computer_call"
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
def make_move_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
143
|
-
return ResponseComputerToolCallParam(
|
|
144
|
-
id=random_id(),
|
|
145
|
-
call_id=call_id if call_id else random_id(),
|
|
146
|
-
action=ActionMove(
|
|
147
|
-
type="move",
|
|
148
|
-
x=x,
|
|
149
|
-
y=y
|
|
150
|
-
),
|
|
151
|
-
pending_safety_checks=[],
|
|
152
|
-
status="completed",
|
|
153
|
-
type="computer_call"
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
def make_screenshot_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
157
|
-
return ResponseComputerToolCallParam(
|
|
158
|
-
id=random_id(),
|
|
159
|
-
call_id=call_id if call_id else random_id(),
|
|
160
|
-
action=ActionScreenshot(
|
|
161
|
-
type="screenshot"
|
|
162
|
-
),
|
|
163
|
-
pending_safety_checks=[],
|
|
164
|
-
status="completed",
|
|
165
|
-
type="computer_call"
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
def make_scroll_item(x: int, y: int, scroll_x: int, scroll_y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
169
|
-
return ResponseComputerToolCallParam(
|
|
170
|
-
id=random_id(),
|
|
171
|
-
call_id=call_id if call_id else random_id(),
|
|
172
|
-
action=ActionScroll(
|
|
173
|
-
scroll_x=scroll_x,
|
|
174
|
-
scroll_y=scroll_y,
|
|
175
|
-
type="scroll",
|
|
176
|
-
x=x,
|
|
177
|
-
y=y
|
|
178
|
-
),
|
|
179
|
-
pending_safety_checks=[],
|
|
180
|
-
status="completed",
|
|
181
|
-
type="computer_call"
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
def make_type_item(text: str, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
185
|
-
return ResponseComputerToolCallParam(
|
|
186
|
-
id=random_id(),
|
|
187
|
-
call_id=call_id if call_id else random_id(),
|
|
188
|
-
action=ActionTypeAction(
|
|
189
|
-
text=text,
|
|
190
|
-
type="type"
|
|
191
|
-
),
|
|
192
|
-
pending_safety_checks=[],
|
|
193
|
-
status="completed",
|
|
194
|
-
type="computer_call"
|
|
195
|
-
)
|
|
196
|
-
|
|
197
|
-
def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
198
|
-
return ResponseComputerToolCallParam(
|
|
199
|
-
id=random_id(),
|
|
200
|
-
call_id=call_id if call_id else random_id(),
|
|
201
|
-
action=ActionWait(
|
|
202
|
-
type="wait"
|
|
203
|
-
),
|
|
204
|
-
pending_safety_checks=[],
|
|
205
|
-
status="completed",
|
|
206
|
-
type="computer_call"
|
|
207
|
-
)
|
|
1
|
+
"""
|
|
2
|
+
Functions for making various Responses API items from different types of responses.
|
|
3
|
+
Based on the OpenAI spec for Responses API items.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import json
|
|
8
|
+
import uuid
|
|
9
|
+
from typing import List, Dict, Any, Literal, Union, Optional
|
|
10
|
+
|
|
11
|
+
from openai.types.responses.response_computer_tool_call_param import (
|
|
12
|
+
ResponseComputerToolCallParam,
|
|
13
|
+
ActionClick,
|
|
14
|
+
ActionDoubleClick,
|
|
15
|
+
ActionDrag,
|
|
16
|
+
ActionDragPath,
|
|
17
|
+
ActionKeypress,
|
|
18
|
+
ActionMove,
|
|
19
|
+
ActionScreenshot,
|
|
20
|
+
ActionScroll,
|
|
21
|
+
ActionType as ActionTypeAction,
|
|
22
|
+
ActionWait,
|
|
23
|
+
PendingSafetyCheck
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from openai.types.responses.response_function_tool_call_param import ResponseFunctionToolCallParam
|
|
27
|
+
from openai.types.responses.response_output_text_param import ResponseOutputTextParam
|
|
28
|
+
from openai.types.responses.response_reasoning_item_param import ResponseReasoningItemParam, Summary
|
|
29
|
+
from openai.types.responses.response_output_message_param import ResponseOutputMessageParam
|
|
30
|
+
from openai.types.responses.easy_input_message_param import EasyInputMessageParam
|
|
31
|
+
from openai.types.responses.response_input_image_param import ResponseInputImageParam
|
|
32
|
+
|
|
33
|
+
def random_id():
|
|
34
|
+
return str(uuid.uuid4())
|
|
35
|
+
|
|
36
|
+
# User message items
|
|
37
|
+
def make_input_image_item(image_data: Union[str, bytes]) -> EasyInputMessageParam:
|
|
38
|
+
return EasyInputMessageParam(
|
|
39
|
+
content=[
|
|
40
|
+
ResponseInputImageParam(
|
|
41
|
+
type="input_image",
|
|
42
|
+
image_url=f"data:image/png;base64,{base64.b64encode(image_data).decode('utf-8') if isinstance(image_data, bytes) else image_data}"
|
|
43
|
+
)
|
|
44
|
+
],
|
|
45
|
+
role="user",
|
|
46
|
+
type="message"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Text items
|
|
50
|
+
def make_reasoning_item(reasoning: str) -> ResponseReasoningItemParam:
|
|
51
|
+
return ResponseReasoningItemParam(
|
|
52
|
+
id=random_id(),
|
|
53
|
+
summary=[
|
|
54
|
+
Summary(text=reasoning, type="summary_text")
|
|
55
|
+
],
|
|
56
|
+
type="reasoning"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def make_output_text_item(content: str) -> ResponseOutputMessageParam:
|
|
60
|
+
return ResponseOutputMessageParam(
|
|
61
|
+
id=random_id(),
|
|
62
|
+
content=[
|
|
63
|
+
ResponseOutputTextParam(
|
|
64
|
+
text=content,
|
|
65
|
+
type="output_text",
|
|
66
|
+
annotations=[]
|
|
67
|
+
)
|
|
68
|
+
],
|
|
69
|
+
role="assistant",
|
|
70
|
+
status="completed",
|
|
71
|
+
type="message"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Function call items
|
|
75
|
+
def make_function_call_item(function_name: str, arguments: Dict[str, Any], call_id: Optional[str] = None) -> ResponseFunctionToolCallParam:
|
|
76
|
+
return ResponseFunctionToolCallParam(
|
|
77
|
+
id=random_id(),
|
|
78
|
+
call_id=call_id if call_id else random_id(),
|
|
79
|
+
name=function_name,
|
|
80
|
+
arguments=json.dumps(arguments),
|
|
81
|
+
status="completed",
|
|
82
|
+
type="function_call"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Computer tool call items
|
|
86
|
+
def make_click_item(x: int, y: int, button: Literal["left", "right", "wheel", "back", "forward"] = "left", call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
87
|
+
return ResponseComputerToolCallParam(
|
|
88
|
+
id=random_id(),
|
|
89
|
+
call_id=call_id if call_id else random_id(),
|
|
90
|
+
action=ActionClick(
|
|
91
|
+
button=button,
|
|
92
|
+
type="click",
|
|
93
|
+
x=x,
|
|
94
|
+
y=y
|
|
95
|
+
),
|
|
96
|
+
pending_safety_checks=[],
|
|
97
|
+
status="completed",
|
|
98
|
+
type="computer_call"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def make_double_click_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
102
|
+
return ResponseComputerToolCallParam(
|
|
103
|
+
id=random_id(),
|
|
104
|
+
call_id=call_id if call_id else random_id(),
|
|
105
|
+
action=ActionDoubleClick(
|
|
106
|
+
type="double_click",
|
|
107
|
+
x=x,
|
|
108
|
+
y=y
|
|
109
|
+
),
|
|
110
|
+
pending_safety_checks=[],
|
|
111
|
+
status="completed",
|
|
112
|
+
type="computer_call"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def make_drag_item(path: List[Dict[str, int]], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
116
|
+
drag_path = [ActionDragPath(x=point["x"], y=point["y"]) for point in path]
|
|
117
|
+
return ResponseComputerToolCallParam(
|
|
118
|
+
id=random_id(),
|
|
119
|
+
call_id=call_id if call_id else random_id(),
|
|
120
|
+
action=ActionDrag(
|
|
121
|
+
path=drag_path,
|
|
122
|
+
type="drag"
|
|
123
|
+
),
|
|
124
|
+
pending_safety_checks=[],
|
|
125
|
+
status="completed",
|
|
126
|
+
type="computer_call"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def make_keypress_item(keys: List[str], call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
130
|
+
return ResponseComputerToolCallParam(
|
|
131
|
+
id=random_id(),
|
|
132
|
+
call_id=call_id if call_id else random_id(),
|
|
133
|
+
action=ActionKeypress(
|
|
134
|
+
keys=keys,
|
|
135
|
+
type="keypress"
|
|
136
|
+
),
|
|
137
|
+
pending_safety_checks=[],
|
|
138
|
+
status="completed",
|
|
139
|
+
type="computer_call"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
def make_move_item(x: int, y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
143
|
+
return ResponseComputerToolCallParam(
|
|
144
|
+
id=random_id(),
|
|
145
|
+
call_id=call_id if call_id else random_id(),
|
|
146
|
+
action=ActionMove(
|
|
147
|
+
type="move",
|
|
148
|
+
x=x,
|
|
149
|
+
y=y
|
|
150
|
+
),
|
|
151
|
+
pending_safety_checks=[],
|
|
152
|
+
status="completed",
|
|
153
|
+
type="computer_call"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def make_screenshot_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
157
|
+
return ResponseComputerToolCallParam(
|
|
158
|
+
id=random_id(),
|
|
159
|
+
call_id=call_id if call_id else random_id(),
|
|
160
|
+
action=ActionScreenshot(
|
|
161
|
+
type="screenshot"
|
|
162
|
+
),
|
|
163
|
+
pending_safety_checks=[],
|
|
164
|
+
status="completed",
|
|
165
|
+
type="computer_call"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def make_scroll_item(x: int, y: int, scroll_x: int, scroll_y: int, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
169
|
+
return ResponseComputerToolCallParam(
|
|
170
|
+
id=random_id(),
|
|
171
|
+
call_id=call_id if call_id else random_id(),
|
|
172
|
+
action=ActionScroll(
|
|
173
|
+
scroll_x=scroll_x,
|
|
174
|
+
scroll_y=scroll_y,
|
|
175
|
+
type="scroll",
|
|
176
|
+
x=x,
|
|
177
|
+
y=y
|
|
178
|
+
),
|
|
179
|
+
pending_safety_checks=[],
|
|
180
|
+
status="completed",
|
|
181
|
+
type="computer_call"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
def make_type_item(text: str, call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
185
|
+
return ResponseComputerToolCallParam(
|
|
186
|
+
id=random_id(),
|
|
187
|
+
call_id=call_id if call_id else random_id(),
|
|
188
|
+
action=ActionTypeAction(
|
|
189
|
+
text=text,
|
|
190
|
+
type="type"
|
|
191
|
+
),
|
|
192
|
+
pending_safety_checks=[],
|
|
193
|
+
status="completed",
|
|
194
|
+
type="computer_call"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallParam:
|
|
198
|
+
return ResponseComputerToolCallParam(
|
|
199
|
+
id=random_id(),
|
|
200
|
+
call_id=call_id if call_id else random_id(),
|
|
201
|
+
action=ActionWait(
|
|
202
|
+
type="wait"
|
|
203
|
+
),
|
|
204
|
+
pending_safety_checks=[],
|
|
205
|
+
status="completed",
|
|
206
|
+
type="computer_call"
|
|
207
|
+
)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Agent telemetry for tracking anonymous usage and feature usage."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import platform
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Dict, Any, Callable
|
|
8
|
+
|
|
9
|
+
# Import the core telemetry module
|
|
10
|
+
TELEMETRY_AVAILABLE = False
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Local fallbacks in case core telemetry isn't available
|
|
14
|
+
def _noop(*args: Any, **kwargs: Any) -> None:
|
|
15
|
+
"""No-op function for when telemetry is not available."""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Define default functions with unique names to avoid shadowing
|
|
20
|
+
_default_record_event = _noop
|
|
21
|
+
_default_increment_counter = _noop
|
|
22
|
+
_default_set_dimension = _noop
|
|
23
|
+
_default_get_telemetry_client = lambda: None
|
|
24
|
+
_default_flush = _noop
|
|
25
|
+
_default_is_telemetry_enabled = lambda: False
|
|
26
|
+
_default_is_telemetry_globally_disabled = lambda: True
|
|
27
|
+
|
|
28
|
+
# Set the actual functions to the defaults initially
|
|
29
|
+
record_event = _default_record_event
|
|
30
|
+
increment_counter = _default_increment_counter
|
|
31
|
+
set_dimension = _default_set_dimension
|
|
32
|
+
get_telemetry_client = _default_get_telemetry_client
|
|
33
|
+
flush = _default_flush
|
|
34
|
+
is_telemetry_enabled = _default_is_telemetry_enabled
|
|
35
|
+
is_telemetry_globally_disabled = _default_is_telemetry_globally_disabled
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger("agent.telemetry")
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# Import from core telemetry
|
|
41
|
+
from core.telemetry import (
|
|
42
|
+
record_event as core_record_event,
|
|
43
|
+
increment as core_increment,
|
|
44
|
+
get_telemetry_client as core_get_telemetry_client,
|
|
45
|
+
flush as core_flush,
|
|
46
|
+
is_telemetry_enabled as core_is_telemetry_enabled,
|
|
47
|
+
is_telemetry_globally_disabled as core_is_telemetry_globally_disabled,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Override the default functions with actual implementations
|
|
51
|
+
record_event = core_record_event
|
|
52
|
+
get_telemetry_client = core_get_telemetry_client
|
|
53
|
+
flush = core_flush
|
|
54
|
+
is_telemetry_enabled = core_is_telemetry_enabled
|
|
55
|
+
is_telemetry_globally_disabled = core_is_telemetry_globally_disabled
|
|
56
|
+
|
|
57
|
+
def increment_counter(counter_name: str, value: int = 1) -> None:
|
|
58
|
+
"""Wrapper for increment to maintain backward compatibility."""
|
|
59
|
+
if is_telemetry_enabled():
|
|
60
|
+
core_increment(counter_name, value)
|
|
61
|
+
|
|
62
|
+
def set_dimension(name: str, value: Any) -> None:
|
|
63
|
+
"""Set a dimension that will be attached to all events."""
|
|
64
|
+
logger.debug(f"Setting dimension {name}={value}")
|
|
65
|
+
|
|
66
|
+
TELEMETRY_AVAILABLE = True
|
|
67
|
+
logger.info("Successfully imported telemetry")
|
|
68
|
+
except ImportError as e:
|
|
69
|
+
logger.warning(f"Could not import telemetry: {e}")
|
|
70
|
+
logger.debug("Telemetry not available, using no-op functions")
|
|
71
|
+
|
|
72
|
+
# Get system info once to use in telemetry
|
|
73
|
+
SYSTEM_INFO = {
|
|
74
|
+
"os": platform.system().lower(),
|
|
75
|
+
"os_version": platform.release(),
|
|
76
|
+
"python_version": platform.python_version(),
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def enable_telemetry() -> bool:
|
|
81
|
+
"""Enable telemetry if available.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
bool: True if telemetry was successfully enabled, False otherwise
|
|
85
|
+
"""
|
|
86
|
+
global TELEMETRY_AVAILABLE, record_event, increment_counter, get_telemetry_client, flush, is_telemetry_enabled, is_telemetry_globally_disabled
|
|
87
|
+
|
|
88
|
+
# Check if globally disabled using core function
|
|
89
|
+
if TELEMETRY_AVAILABLE and is_telemetry_globally_disabled():
|
|
90
|
+
logger.info("Telemetry is globally disabled via environment variable - cannot enable")
|
|
91
|
+
return False
|
|
92
|
+
|
|
93
|
+
# Already enabled
|
|
94
|
+
if TELEMETRY_AVAILABLE:
|
|
95
|
+
return True
|
|
96
|
+
|
|
97
|
+
# Try to import and enable
|
|
98
|
+
try:
|
|
99
|
+
from core.telemetry import (
|
|
100
|
+
record_event,
|
|
101
|
+
increment,
|
|
102
|
+
get_telemetry_client,
|
|
103
|
+
flush,
|
|
104
|
+
is_telemetry_globally_disabled,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Check again after import
|
|
108
|
+
if is_telemetry_globally_disabled():
|
|
109
|
+
logger.info("Telemetry is globally disabled via environment variable - cannot enable")
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
TELEMETRY_AVAILABLE = True
|
|
113
|
+
logger.info("Telemetry successfully enabled")
|
|
114
|
+
return True
|
|
115
|
+
except ImportError as e:
|
|
116
|
+
logger.warning(f"Could not enable telemetry: {e}")
|
|
117
|
+
return False
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def is_telemetry_enabled() -> bool:
|
|
121
|
+
"""Check if telemetry is enabled.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
bool: True if telemetry is enabled, False otherwise
|
|
125
|
+
"""
|
|
126
|
+
# Use the core function if available, otherwise use our local flag
|
|
127
|
+
if TELEMETRY_AVAILABLE:
|
|
128
|
+
from core.telemetry import is_telemetry_enabled as core_is_enabled
|
|
129
|
+
|
|
130
|
+
return core_is_enabled()
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def record_agent_initialization() -> None:
|
|
135
|
+
"""Record when an agent instance is initialized."""
|
|
136
|
+
if TELEMETRY_AVAILABLE and is_telemetry_enabled():
|
|
137
|
+
record_event("agent_initialized", SYSTEM_INFO)
|
|
138
|
+
|
|
139
|
+
# Set dimensions that will be attached to all events
|
|
140
|
+
set_dimension("os", SYSTEM_INFO["os"])
|
|
141
|
+
set_dimension("os_version", SYSTEM_INFO["os_version"])
|
|
142
|
+
set_dimension("python_version", SYSTEM_INFO["python_version"])
|
|
@@ -72,26 +72,26 @@ def save_settings(settings: Dict[str, Any]):
|
|
|
72
72
|
print(f"Warning: Could not save settings to {SETTINGS_FILE}: {e}")
|
|
73
73
|
|
|
74
74
|
|
|
75
|
-
# Custom Screenshot Handler for Gradio chat
|
|
76
|
-
class GradioChatScreenshotHandler:
|
|
77
|
-
|
|
75
|
+
# # Custom Screenshot Handler for Gradio chat
|
|
76
|
+
# class GradioChatScreenshotHandler:
|
|
77
|
+
# """Custom handler that adds screenshots to the Gradio chatbot."""
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
79
|
+
# def __init__(self, chatbot_history: List[gr.ChatMessage]):
|
|
80
|
+
# self.chatbot_history = chatbot_history
|
|
81
|
+
# print("GradioChatScreenshotHandler initialized")
|
|
82
82
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
83
|
+
# async def on_screenshot(self, screenshot_base64: str, action_type: str = "") -> None:
|
|
84
|
+
# """Add screenshot to chatbot when a screenshot is taken."""
|
|
85
|
+
# image_markdown = f""
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
87
|
+
# if self.chatbot_history is not None:
|
|
88
|
+
# self.chatbot_history.append(
|
|
89
|
+
# gr.ChatMessage(
|
|
90
|
+
# role="assistant",
|
|
91
|
+
# content=image_markdown,
|
|
92
|
+
# metadata={"title": f"🖥️ Screenshot - {action_type}", "status": "done"},
|
|
93
|
+
# )
|
|
94
|
+
# )
|
|
95
95
|
|
|
96
96
|
|
|
97
97
|
# Detect platform capabilities
|
|
@@ -236,7 +236,7 @@ def create_agent(
|
|
|
236
236
|
return global_agent
|
|
237
237
|
|
|
238
238
|
|
|
239
|
-
def
|
|
239
|
+
def launch_ui():
|
|
240
240
|
"""Standalone function to launch the Gradio app."""
|
|
241
241
|
from agent.ui.gradio.ui_components import create_gradio_ui
|
|
242
242
|
print(f"Starting Gradio app for CUA Agent...")
|
|
@@ -245,4 +245,4 @@ def test_cua():
|
|
|
245
245
|
|
|
246
246
|
|
|
247
247
|
if __name__ == "__main__":
|
|
248
|
-
|
|
248
|
+
launch_ui()
|
|
@@ -14,9 +14,12 @@ from gradio.components.chatbot import MetadataDict
|
|
|
14
14
|
|
|
15
15
|
from .app import (
|
|
16
16
|
load_settings, save_settings, create_agent, get_model_string,
|
|
17
|
-
get_ollama_models,
|
|
17
|
+
get_ollama_models, global_agent, global_computer
|
|
18
18
|
)
|
|
19
19
|
|
|
20
|
+
# Global messages array to maintain conversation history
|
|
21
|
+
global_messages = []
|
|
22
|
+
|
|
20
23
|
|
|
21
24
|
def create_gradio_ui() -> gr.Blocks:
|
|
22
25
|
"""Create a Gradio UI for the Computer-Use Agent."""
|
|
@@ -571,15 +574,17 @@ if __name__ == "__main__":
|
|
|
571
574
|
yield history
|
|
572
575
|
return
|
|
573
576
|
|
|
574
|
-
#
|
|
575
|
-
|
|
576
|
-
|
|
577
|
+
# Add user message to global history
|
|
578
|
+
global global_messages
|
|
579
|
+
global_messages.append({"role": "user", "content": last_user_message})
|
|
580
|
+
|
|
577
581
|
# Stream responses from the agent
|
|
578
|
-
async for result in global_agent.run(
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
pprint
|
|
582
|
-
|
|
582
|
+
async for result in global_agent.run(global_messages):
|
|
583
|
+
global_messages += result.get("output", [])
|
|
584
|
+
# print(f"DEBUG - Agent response ------- START")
|
|
585
|
+
# from pprint import pprint
|
|
586
|
+
# pprint(result)
|
|
587
|
+
# print(f"DEBUG - Agent response ------- END")
|
|
583
588
|
|
|
584
589
|
# Process the result output
|
|
585
590
|
for item in result.get("output", []):
|
|
@@ -619,6 +624,14 @@ if __name__ == "__main__":
|
|
|
619
624
|
content=f"📤 Function output:\n```\n{output}\n```",
|
|
620
625
|
metadata={"title": "Function Output"}
|
|
621
626
|
))
|
|
627
|
+
elif item.get("type") == "computer_call_output":
|
|
628
|
+
output = item.get("output", {}).get("image_url", "")
|
|
629
|
+
image_markdown = f""
|
|
630
|
+
history.append(gr.ChatMessage(
|
|
631
|
+
role="assistant",
|
|
632
|
+
content=image_markdown,
|
|
633
|
+
metadata={"title": "🖥️ Computer Output"}
|
|
634
|
+
))
|
|
622
635
|
|
|
623
636
|
yield history
|
|
624
637
|
|
|
@@ -661,7 +674,12 @@ if __name__ == "__main__":
|
|
|
661
674
|
)
|
|
662
675
|
|
|
663
676
|
# Clear button functionality
|
|
664
|
-
|
|
677
|
+
def clear_chat():
|
|
678
|
+
global global_messages
|
|
679
|
+
global_messages.clear()
|
|
680
|
+
return None
|
|
681
|
+
|
|
682
|
+
clear.click(clear_chat, None, chatbot_history, queue=False)
|
|
665
683
|
|
|
666
684
|
# Connect cancel button
|
|
667
685
|
cancel_button.click(
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
agent - Decorator-based Computer Use Agent with liteLLM integration
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from .decorators import agent_loop
|
|
6
|
-
from .agent import ComputerAgent
|
|
7
|
-
from .types import Messages, AgentResponse
|
|
8
|
-
|
|
9
|
-
# Import loops to register them
|
|
10
|
-
from . import loops
|
|
11
|
-
|
|
12
|
-
__all__ = [
|
|
13
|
-
"agent_loop",
|
|
14
|
-
"ComputerAgent",
|
|
15
|
-
"Messages",
|
|
16
|
-
"AgentResponse"
|
|
17
|
-
]
|
|
18
|
-
|
|
19
|
-
__version__ = "0.4.0b3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|