cua-agent 0.4.30__py3-none-any.whl → 0.4.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/integrations/hud/__init__.py +19 -0
- {cua_agent-0.4.30.dist-info → cua_agent-0.4.31.dist-info}/METADATA +1 -1
- {cua_agent-0.4.30.dist-info → cua_agent-0.4.31.dist-info}/RECORD +5 -5
- {cua_agent-0.4.30.dist-info → cua_agent-0.4.31.dist-info}/WHEEL +0 -0
- {cua_agent-0.4.30.dist-info → cua_agent-0.4.31.dist-info}/entry_points.txt +0 -0
|
@@ -11,6 +11,7 @@ Exports:
|
|
|
11
11
|
import time
|
|
12
12
|
from typing import Any, Optional
|
|
13
13
|
|
|
14
|
+
from agent.computers import is_agent_computer
|
|
14
15
|
from datasets import load_dataset, Dataset
|
|
15
16
|
from hud.datasets import Task, run_dataset
|
|
16
17
|
from hud import trace
|
|
@@ -55,6 +56,15 @@ async def run_single_task(
|
|
|
55
56
|
sample_task = dataset[task_id] # type: ignore[index]
|
|
56
57
|
task_prompt = sample_task.get("prompt", f"Task {sample_task.get('id', 0)}") # type: ignore[attr-defined]
|
|
57
58
|
|
|
59
|
+
# Filter any existing Computer tools
|
|
60
|
+
# The eval framework will add its own Computer tool per task
|
|
61
|
+
if tools:
|
|
62
|
+
tools = [
|
|
63
|
+
tool
|
|
64
|
+
for tool in tools
|
|
65
|
+
if not is_agent_computer(tool)
|
|
66
|
+
]
|
|
67
|
+
|
|
58
68
|
with trace(name=task_prompt):
|
|
59
69
|
task = Task(**sample_task) # type: ignore[arg-type]
|
|
60
70
|
|
|
@@ -118,6 +128,15 @@ async def run_full_dataset(
|
|
|
118
128
|
dataset_name = "custom"
|
|
119
129
|
job_name = job_name or f"Evaluation {time.strftime('%H:%M %Y-%m-%d')}"
|
|
120
130
|
|
|
131
|
+
# Filter any existing Computer tools
|
|
132
|
+
# The eval framework will add its own Computer tool per task
|
|
133
|
+
if tools:
|
|
134
|
+
tools = [
|
|
135
|
+
tool
|
|
136
|
+
for tool in tools
|
|
137
|
+
if not is_agent_computer(tool)
|
|
138
|
+
]
|
|
139
|
+
|
|
121
140
|
# Execute evaluation
|
|
122
141
|
return await run_dataset(
|
|
123
142
|
name=job_name,
|
|
@@ -25,7 +25,7 @@ agent/human_tool/__init__.py,sha256=3m5_g-Fo_0yX5vi7eg-A92oTqO0N3aY929Ajp78HKsE,
|
|
|
25
25
|
agent/human_tool/__main__.py,sha256=VsW2BAghlonOuqZbP_xuCsaec9bemA1I_ibnDcED9D4,1068
|
|
26
26
|
agent/human_tool/server.py,sha256=ceuL5kw_RjgAi8fueLU3nTjyzOLE25Shv1oTJnSHsoQ,7964
|
|
27
27
|
agent/human_tool/ui.py,sha256=wu9eZorhxCkyPTlBSZjYaVzutoHMlucAz8UGNpAT4bM,30644
|
|
28
|
-
agent/integrations/hud/__init__.py,sha256=
|
|
28
|
+
agent/integrations/hud/__init__.py,sha256=xir5BVAlG2cFc7rHSx_Ea_2b1kp2TtFuKJk07jny7qY,5969
|
|
29
29
|
agent/integrations/hud/agent.py,sha256=GBikd9MhjDNKMiMG8J7PE3OMSmvmC_JLZ1p5xr2cZoc,14006
|
|
30
30
|
agent/integrations/hud/proxy.py,sha256=8HUoh7uZ8Z3vkhPXK0dskgePGsP8oCqyYij0mE_E7X8,10902
|
|
31
31
|
agent/loops/__init__.py,sha256=Ef8aj07l3osibwDk-DTo80PrpL4_GdKRTP1ikl_b-BQ,328
|
|
@@ -47,7 +47,7 @@ agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
|
|
|
47
47
|
agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
|
|
48
48
|
agent/ui/gradio/app.py,sha256=Ol97YEbwREZZQ9_PMjVHlfOcu9BGsawxgAGAm79hT80,9117
|
|
49
49
|
agent/ui/gradio/ui_components.py,sha256=dJUvKDmc1oSejtoR_gU_oWWYwxaOOQyPloSYRGMrUCQ,36068
|
|
50
|
-
cua_agent-0.4.
|
|
51
|
-
cua_agent-0.4.
|
|
52
|
-
cua_agent-0.4.
|
|
53
|
-
cua_agent-0.4.
|
|
50
|
+
cua_agent-0.4.31.dist-info/METADATA,sha256=HywszWj2eDguXdge9eANyyz57gVlY9YZr4yacHHkUhU,5610
|
|
51
|
+
cua_agent-0.4.31.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
52
|
+
cua_agent-0.4.31.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
53
|
+
cua_agent-0.4.31.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|