cua-agent 0.4.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (82) hide show
  1. agent/__init__.py +4 -19
  2. agent/__main__.py +2 -1
  3. agent/adapters/__init__.py +6 -0
  4. agent/adapters/azure_ml_adapter.py +283 -0
  5. agent/adapters/cua_adapter.py +161 -0
  6. agent/adapters/huggingfacelocal_adapter.py +67 -125
  7. agent/adapters/human_adapter.py +116 -114
  8. agent/adapters/mlxvlm_adapter.py +370 -0
  9. agent/adapters/models/__init__.py +41 -0
  10. agent/adapters/models/generic.py +78 -0
  11. agent/adapters/models/internvl.py +290 -0
  12. agent/adapters/models/opencua.py +115 -0
  13. agent/adapters/models/qwen2_5_vl.py +78 -0
  14. agent/agent.py +431 -241
  15. agent/callbacks/__init__.py +10 -3
  16. agent/callbacks/base.py +45 -31
  17. agent/callbacks/budget_manager.py +22 -10
  18. agent/callbacks/image_retention.py +54 -98
  19. agent/callbacks/logging.py +55 -42
  20. agent/callbacks/operator_validator.py +140 -0
  21. agent/callbacks/otel.py +291 -0
  22. agent/callbacks/pii_anonymization.py +19 -16
  23. agent/callbacks/prompt_instructions.py +47 -0
  24. agent/callbacks/telemetry.py +106 -69
  25. agent/callbacks/trajectory_saver.py +178 -70
  26. agent/cli.py +269 -119
  27. agent/computers/__init__.py +14 -9
  28. agent/computers/base.py +32 -19
  29. agent/computers/cua.py +52 -25
  30. agent/computers/custom.py +78 -71
  31. agent/decorators.py +23 -14
  32. agent/human_tool/__init__.py +2 -7
  33. agent/human_tool/__main__.py +6 -2
  34. agent/human_tool/server.py +48 -37
  35. agent/human_tool/ui.py +359 -235
  36. agent/integrations/hud/__init__.py +164 -74
  37. agent/integrations/hud/agent.py +338 -342
  38. agent/integrations/hud/proxy.py +297 -0
  39. agent/loops/__init__.py +44 -14
  40. agent/loops/anthropic.py +590 -492
  41. agent/loops/base.py +19 -15
  42. agent/loops/composed_grounded.py +142 -144
  43. agent/loops/fara/__init__.py +8 -0
  44. agent/loops/fara/config.py +506 -0
  45. agent/loops/fara/helpers.py +357 -0
  46. agent/loops/fara/schema.py +143 -0
  47. agent/loops/gelato.py +183 -0
  48. agent/loops/gemini.py +935 -0
  49. agent/loops/generic_vlm.py +601 -0
  50. agent/loops/glm45v.py +140 -135
  51. agent/loops/gta1.py +48 -51
  52. agent/loops/holo.py +218 -0
  53. agent/loops/internvl.py +180 -0
  54. agent/loops/moondream3.py +493 -0
  55. agent/loops/omniparser.py +326 -226
  56. agent/loops/openai.py +63 -56
  57. agent/loops/opencua.py +134 -0
  58. agent/loops/uiins.py +175 -0
  59. agent/loops/uitars.py +262 -212
  60. agent/loops/uitars2.py +951 -0
  61. agent/playground/__init__.py +5 -0
  62. agent/playground/server.py +301 -0
  63. agent/proxy/examples.py +196 -0
  64. agent/proxy/handlers.py +255 -0
  65. agent/responses.py +486 -339
  66. agent/tools/__init__.py +24 -0
  67. agent/tools/base.py +253 -0
  68. agent/tools/browser_tool.py +423 -0
  69. agent/types.py +20 -5
  70. agent/ui/__init__.py +1 -1
  71. agent/ui/__main__.py +1 -1
  72. agent/ui/gradio/app.py +25 -22
  73. agent/ui/gradio/ui_components.py +314 -167
  74. cua_agent-0.7.16.dist-info/METADATA +85 -0
  75. cua_agent-0.7.16.dist-info/RECORD +79 -0
  76. {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/WHEEL +1 -1
  77. agent/integrations/hud/adapter.py +0 -121
  78. agent/integrations/hud/computer_handler.py +0 -187
  79. agent/telemetry.py +0 -142
  80. cua_agent-0.4.14.dist-info/METADATA +0 -436
  81. cua_agent-0.4.14.dist-info/RECORD +0 -50
  82. {cua_agent-0.4.14.dist-info → cua_agent-0.7.16.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,85 @@
1
+ Metadata-Version: 2.1
2
+ Name: cua-agent
3
+ Version: 0.7.16
4
+ Summary: Cua (Computer Use) Agent for AI-driven computer interaction
5
+ Author-Email: TryCua <gh@trycua.com>
6
+ Requires-Python: <3.14,>=3.12
7
+ Requires-Dist: httpx>=0.27.0
8
+ Requires-Dist: aiohttp>=3.9.3
9
+ Requires-Dist: asyncio
10
+ Requires-Dist: anyio>=4.4.1
11
+ Requires-Dist: typing-extensions>=4.12.2
12
+ Requires-Dist: pydantic>=2.6.4
13
+ Requires-Dist: rich>=13.7.1
14
+ Requires-Dist: python-dotenv>=1.0.1
15
+ Requires-Dist: cua-computer<0.6.0,>=0.5.0
16
+ Requires-Dist: cua-core<0.2.0,>=0.1.8
17
+ Requires-Dist: certifi>=2024.2.2
18
+ Requires-Dist: litellm>=1.74.12
19
+ Provides-Extra: openai
20
+ Provides-Extra: anthropic
21
+ Provides-Extra: qwen
22
+ Requires-Dist: qwen-vl-utils; extra == "qwen"
23
+ Requires-Dist: qwen-agent; extra == "qwen"
24
+ Requires-Dist: Pillow>=10.0.0; extra == "qwen"
25
+ Provides-Extra: omni
26
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "omni"
27
+ Provides-Extra: uitars
28
+ Provides-Extra: uitars-mlx
29
+ Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "uitars-mlx"
30
+ Provides-Extra: uitars-hf
31
+ Requires-Dist: accelerate; extra == "uitars-hf"
32
+ Requires-Dist: torch; extra == "uitars-hf"
33
+ Requires-Dist: transformers>=4.54.0; extra == "uitars-hf"
34
+ Provides-Extra: glm45v-hf
35
+ Requires-Dist: accelerate; extra == "glm45v-hf"
36
+ Requires-Dist: torch; extra == "glm45v-hf"
37
+ Requires-Dist: transformers-v4.55.0-GLM-4.5V-preview; extra == "glm45v-hf"
38
+ Provides-Extra: opencua-hf
39
+ Requires-Dist: accelerate; extra == "opencua-hf"
40
+ Requires-Dist: torch; extra == "opencua-hf"
41
+ Requires-Dist: transformers>=4.53.0; extra == "opencua-hf"
42
+ Requires-Dist: tiktoken>=0.11.0; extra == "opencua-hf"
43
+ Requires-Dist: blobfile>=3.0.0; extra == "opencua-hf"
44
+ Provides-Extra: internvl-hf
45
+ Requires-Dist: accelerate; extra == "internvl-hf"
46
+ Requires-Dist: torch; extra == "internvl-hf"
47
+ Requires-Dist: transformers>=4.55.0; extra == "internvl-hf"
48
+ Requires-Dist: einops; extra == "internvl-hf"
49
+ Requires-Dist: timm; extra == "internvl-hf"
50
+ Provides-Extra: moondream3
51
+ Requires-Dist: accelerate; extra == "moondream3"
52
+ Requires-Dist: torch; extra == "moondream3"
53
+ Requires-Dist: transformers>=4.55.0; extra == "moondream3"
54
+ Provides-Extra: ui
55
+ Requires-Dist: gradio>=6.0.0; extra == "ui"
56
+ Requires-Dist: python-dotenv>=1.0.1; extra == "ui"
57
+ Provides-Extra: cli
58
+ Requires-Dist: yaspin>=3.1.0; extra == "cli"
59
+ Provides-Extra: hud
60
+ Requires-Dist: hud-python==0.4.52; extra == "hud"
61
+ Provides-Extra: gemini
62
+ Requires-Dist: google-genai>=1.41.0; extra == "gemini"
63
+ Provides-Extra: all
64
+ Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "all"
65
+ Requires-Dist: accelerate; extra == "all"
66
+ Requires-Dist: torch; extra == "all"
67
+ Requires-Dist: transformers>=4.55.0; extra == "all"
68
+ Requires-Dist: einops; extra == "all"
69
+ Requires-Dist: timm; extra == "all"
70
+ Requires-Dist: tiktoken>=0.11.0; extra == "all"
71
+ Requires-Dist: blobfile>=3.0.0; extra == "all"
72
+ Requires-Dist: gradio>=6.0.0; extra == "all"
73
+ Requires-Dist: python-dotenv>=1.0.1; extra == "all"
74
+ Requires-Dist: yaspin>=3.1.0; extra == "all"
75
+ Requires-Dist: google-genai>=1.41.0; extra == "all"
76
+ Requires-Dist: qwen-vl-utils; extra == "all"
77
+ Requires-Dist: qwen-agent; extra == "all"
78
+ Requires-Dist: Pillow>=10.0.0; extra == "all"
79
+ Description-Content-Type: text/markdown
80
+
81
+ # Cua Agent
82
+
83
+ Computer-Use framework with liteLLM integration for running agentic workflows on macOS, Windows, and Linux sandboxes.
84
+
85
+ **[Documentation](https://cua.ai/docs/cua/reference/agent-sdk)** - Installation, guides, and configuration.
@@ -0,0 +1,79 @@
1
+ agent/__init__.py,sha256=zaQexECXUaHLjkkkS166vXBLV6gMcKC2ssL3nCfJ2wA,1250
2
+ agent/__main__.py,sha256=hD-1r72Evv3eJ7v6RhuU2vpusqxx-thbdrK5_iZGD-I,539
3
+ agent/adapters/__init__.py,sha256=UFIRGjyjHZzIuPJP_6LxrPF0YLL_1uVFve7sB20NTQQ,422
4
+ agent/adapters/azure_ml_adapter.py,sha256=Nr3hc6jSkEJ7NKcd-qewBitxPm0ECvCZz21DmOBLIsM,10364
5
+ agent/adapters/cua_adapter.py,sha256=JYiKicHx0Al3ItYFX33ulPIkSbdfCkL_BWeePH9timY,5886
6
+ agent/adapters/huggingfacelocal_adapter.py,sha256=dL9fuDNC3MaA67DQye5UT-Sq-gmD9_2NIS5UTWLlSjU,6641
7
+ agent/adapters/human_adapter.py,sha256=SWUYgJZ_o8_5qw6uuXEahBPeHQ2fCLrTrM7U1PhQta8,12548
8
+ agent/adapters/mlxvlm_adapter.py,sha256=n8Amj8RPmdnUHVZUW-HfwsLEqakrkwMp6KnwQKTHyAE,14277
9
+ agent/adapters/models/__init__.py,sha256=aojUJzqd02Zo0u4VdTgOiAWAxyzwjTrn-FuCAsAoBEs,1474
10
+ agent/adapters/models/generic.py,sha256=qwhJXc8v9s44x1r0RnlTCjAIl862pwx33OEhyfwc72Y,2782
11
+ agent/adapters/models/internvl.py,sha256=8V5l-A7sR43oePcCLdWL9p9BWS65tKW0jtCzS_Hc-Ls,11766
12
+ agent/adapters/models/opencua.py,sha256=KziBcGQl4fJ4ad-sdQeHqFP_Ez4hzJWxcMou-cBLAOI,4083
13
+ agent/adapters/models/qwen2_5_vl.py,sha256=QzZawBxA_yLmqcG2rTtrQtWkKubyI9Oik5Lv2FUyQlw,2823
14
+ agent/agent.py,sha256=4Db48Vk9zV5Y0pXP2bURsnqjKrloS7Fb0zF32NwBAgU,35575
15
+ agent/callbacks/__init__.py,sha256=0LCm6Su3tszYBqaXCroCkIveAkFE2UjBOms-Ku8NN4s,819
16
+ agent/callbacks/base.py,sha256=onbJlS6HzgptBYfvPIDe8-PWK71Zr3mFaGVUyupNwCo,4622
17
+ agent/callbacks/budget_manager.py,sha256=RQ0P4s-w6xvtLQsIxgZ5n-ocfcYnqljSlGftmG1cVk8,1899
18
+ agent/callbacks/image_retention.py,sha256=qWe_XLyisvZVYn-_WHyIgHSpcxdqdS0tJKI4ZvVyEXI,3578
19
+ agent/callbacks/logging.py,sha256=MNbWH3DxSkI7RZJ-BAwr6lCClXVDNM6864wNA2TAQ6g,10855
20
+ agent/callbacks/operator_validator.py,sha256=MrQZC5ySjHtjGryVrY8T0bUL9rOqSqTA7rMNqkqmXvU,6484
21
+ agent/callbacks/otel.py,sha256=4svx0T-vuDKT1qMQxP4SicINT6xpUwvoFy8GST9lLRg,8793
22
+ agent/callbacks/pii_anonymization.py,sha256=OSnb309Klc5GIQcrYuZvw-ddJJ0FNJ3U0WiJeT3Q8BA,3179
23
+ agent/callbacks/prompt_instructions.py,sha256=RUqsJhiNiXqaOM_P2AfyBinWUDdgDku46BExLMUJHn4,1517
24
+ agent/callbacks/telemetry.py,sha256=sVLQ2COK0TAQ2RJtdnlzMyQqGov566O2gWNDwxW27qk,8371
25
+ agent/callbacks/trajectory_saver.py,sha256=ASd5wmYSt2izEbjOtVobEn_7wbGcVapiMd51qpK8HaU,16011
26
+ agent/cli.py,sha256=B4wtY7YfcJbgrv5ocztj1pkPYqWjLhBRvVGLSPHntAM,17894
27
+ agent/computers/__init__.py,sha256=Q69OOH4m8GXY_O0BrEfwPkdiW7Muc97-N8DXi3maKOM,1475
28
+ agent/computers/base.py,sha256=Ud62zbSfgMuJ2Y6JrHVH25GG208rqKJBr4v1EXFfbKI,2310
29
+ agent/computers/cua.py,sha256=0QdX3aC4btiUWVZ9iAdxz9BXUL1viFWGYGZVE1CZ_kM,5621
30
+ agent/computers/custom.py,sha256=r010ew-tO0mq3sjvEPome2ELTA5tPCtEgInDyhICaak,7970
31
+ agent/decorators.py,sha256=KLSLczVt6AIh8IPp5YUIqJhNMpcbYUu-irCpc6uGKfI,1875
32
+ agent/human_tool/__init__.py,sha256=2lp9aZLdId4iooY6sdMw4TwVmDdAvsKyZFJla99BpA0,748
33
+ agent/human_tool/__main__.py,sha256=P4H50miHpkqRax6sfRG9PSRct2g82RLwfmshFvqpSLs,1069
34
+ agent/human_tool/server.py,sha256=YeTsVDwRFBn6x7lGCywNe3H94hA6Fsp8SFfHslhxkac,7933
35
+ agent/human_tool/ui.py,sha256=d_nhtJWH40tOwDP84t9EqnOiEpzDu88F2MvZR6kQu2M,30751
36
+ agent/integrations/hud/__init__.py,sha256=fVJXPhTdu3-2-8h1qC4kTCtsphgajUO-rnuDJbMnvbw,5854
37
+ agent/integrations/hud/agent.py,sha256=vfuU0t1vcwZhpxnuTNXs8-zQQ3p1RxJq53cI3PmGGqw,14544
38
+ agent/integrations/hud/proxy.py,sha256=Kj9grnLbuaCS-2y2TXVuRBQwqifzh-UX0Q916V9PWyY,11718
39
+ agent/loops/__init__.py,sha256=xLSxiw0zIpeyI60s-OLpPfANGs5DtOcRoFDCdmN-c0o,634
40
+ agent/loops/anthropic.py,sha256=asu9G0pssv2yrOsOOOhn51K6X11deF-HzFHQ-_qgE_A,73097
41
+ agent/loops/base.py,sha256=E61rNE8WttIXZ1-AP6k2cZeSp8dzvWkXzUOeU8SLwN8,2677
42
+ agent/loops/composed_grounded.py,sha256=Cc5w9gU-5D0MP-Wjb4XLcjuNIN9EeRKXNyMtLwRoq8I,12395
43
+ agent/loops/fara/__init__.py,sha256=kaN1VsFZOf0dWUPPX4M2qbTl3JqzN3O3jlq5ByOrkW8,182
44
+ agent/loops/fara/config.py,sha256=EO0WGxF_e51s16o7PP7gBBNqhpOeotB_1njbXNC_fmY,21478
45
+ agent/loops/fara/helpers.py,sha256=j5fWcVNdXM0vhtdDf9IjzlqCSgAh2XpKRhhn7C6b9MY,13657
46
+ agent/loops/fara/schema.py,sha256=rVZyF3kXecmg-uve9ekeeROk_Yo4tJId2NowyeNakOk,4221
47
+ agent/loops/gelato.py,sha256=72ctdzpaZRHqs_bsNquq-E5NisITzusOMJSB5pyxUZI,5784
48
+ agent/loops/gemini.py,sha256=U_yaexk1N60sB9Se5qAalcz1hr5JPt9kM_o8XsCtpLE,36564
49
+ agent/loops/generic_vlm.py,sha256=YnK96VqHnGaRndIves5riAEh3bY3lKP5ZlO90N68Vs8,24784
50
+ agent/loops/glm45v.py,sha256=HkLJMYMYbQDUBxD5upBMwY7jrJZh44UUaPKLjaYMUsI,34585
51
+ agent/loops/gta1.py,sha256=Q6OPf54_9Jvy0L4af9H8omyIfDTdrQVz7bmW_vKH-Gc,5597
52
+ agent/loops/holo.py,sha256=0FQJifXNrTaNIHaREb8R14byHOmzGvJfe_gUC5p9fP0,7503
53
+ agent/loops/internvl.py,sha256=x9CCwYvANEWrWgO0ThE0trUKng28g5306L3pBT4CEFI,6561
54
+ agent/loops/model_types.csv,sha256=GmFn4x80yoUpQZuQ-GXtJkPVlOLYWZ5u_5A73HRyeNE,112
55
+ agent/loops/moondream3.py,sha256=Dr7rL-yqXD3TR-2YT6xQ588WMVTB_uobdUF-oLtQi_Y,18557
56
+ agent/loops/omniparser.py,sha256=6LPPpYxdoQ9GvfoXg8DJRq52JhWJZDAKwxNviwRaxdk,19516
57
+ agent/loops/openai.py,sha256=XGzAs-de1qCNTWEUOZrHrK7CwmY7fpav-Q4k2fXbBcc,8469
58
+ agent/loops/opencua.py,sha256=XpOLQpwpd7zeCWg2BMmJt0QPglW8we4azC_ehqrhYys,4421
59
+ agent/loops/uiins.py,sha256=iDgOu0m_kfsHy4aN4ACaAqHI479Km1An5gFEC2oKuTw,6058
60
+ agent/loops/uitars.py,sha256=maXgyi2_Er8HN1VzPpVklSNkqF1yDWeIv27VGGRaEwE,32224
61
+ agent/loops/uitars2.py,sha256=405TyqOqMZfR61X_2ei8FEnadAHcbuxQYsJpGZVOMpM,37120
62
+ agent/playground/__init__.py,sha256=k-BpN4kailwxga_mlssokoDhNPut7hliex2aoCQ-S-0,110
63
+ agent/playground/server.py,sha256=-DTw6Bt8CHQjFWbIbTUZgD7ifdWWgyoj3x7N1oSC9Bk,11212
64
+ agent/proxy/examples.py,sha256=G-KfjSZMz7yvf9VYgCBt_XNDNN3aMbCjYHS-mbxttrA,6062
65
+ agent/proxy/handlers.py,sha256=w-mKONPzyRNJo5lPVIBKmpZgwMKW5uqGzYeRxlE2WTI,9930
66
+ agent/responses.py,sha256=DnGG7y_yos09Mk7mVFdUlmdw4JMeIndtAbYpLzvZKgo,35317
67
+ agent/tools/__init__.py,sha256=PrW8OUNrK1AKDgShJ1ZbKYz_JjFyj6s_CLvCCNztlzU,430
68
+ agent/tools/base.py,sha256=tF-Hsoyv57b6NGVHuVCB4VSe02HBL2BwRsJKm1So4FE,7510
69
+ agent/tools/browser_tool.py,sha256=rK4EY3q8prFLA8WJwUntPWGBwXYjJE21EOmncCWRy_U,18625
70
+ agent/types.py,sha256=qK8yjPIPAfXykXok9nS8dvmbFYFUUpxdd3W3JUMCFsI,1061
71
+ agent/ui/__init__.py,sha256=vHINeH6zAcseCZSFe7pzcsaIdVTVoATm5YhEJtZdeMM,126
72
+ agent/ui/__main__.py,sha256=Ee9KF16h4fWlb6J48OBqc7cQEbzSUZgNe0L7GlKsdpg,74
73
+ agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
74
+ agent/ui/gradio/app.py,sha256=OCErZD_6vh8cdYGZFT-IiFQQpkoEiNwPDoEkPge-Ngg,8901
75
+ agent/ui/gradio/ui_components.py,sha256=B9CHC7sIMMEzsz5gdA16hMf8VVQ_-NO4lY49AKylciI,38852
76
+ cua_agent-0.7.16.dist-info/METADATA,sha256=Zxnt0OSOQEKBXFBwgui8IPjJI5TENvHQGOXupixp8cw,3433
77
+ cua_agent-0.7.16.dist-info/WHEEL,sha256=tsUv_t7BDeJeRHaSrczbGeuK-TtDpGsWi_JfpzD255I,90
78
+ cua_agent-0.7.16.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
79
+ cua_agent-0.7.16.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: pdm-backend (2.4.5)
2
+ Generator: pdm-backend (2.4.6)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,121 +0,0 @@
1
- """HUD Adapter for ComputerAgent integration."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Any, ClassVar
6
-
7
- from hud.adapters.common import CLA, Adapter
8
- from hud.adapters.common.types import (
9
- CLAButton,
10
- CLAKey,
11
- ClickAction,
12
- CustomAction,
13
- DragAction,
14
- MoveAction,
15
- Point,
16
- PressAction,
17
- ResponseAction,
18
- ScreenshotFetch,
19
- ScrollAction,
20
- TypeAction,
21
- WaitAction,
22
- )
23
-
24
-
25
- class ComputerAgentAdapter(Adapter):
26
- """Adapter for ComputerAgent to work with HUD."""
27
-
28
- KEY_MAP: ClassVar[dict[str, CLAKey]] = {
29
- "return": "enter",
30
- "arrowup": "up",
31
- "arrowdown": "down",
32
- "arrowleft": "left",
33
- "arrowright": "right",
34
- "cmd": "ctrl",
35
- "super": "win",
36
- "meta": "win",
37
- }
38
-
39
- BUTTON_MAP: ClassVar[dict[str, CLAButton]] = {
40
- "wheel": "middle",
41
- "middle": "middle",
42
- }
43
-
44
- def __init__(self) -> None:
45
- super().__init__()
46
- # ComputerAgent default dimensions (can be overridden)
47
- self.agent_width = 1024
48
- self.agent_height = 768
49
-
50
- def _map_key(self, key: str) -> CLAKey:
51
- """Map a key to its standardized form."""
52
- return self.KEY_MAP.get(key.lower(), key.lower()) # type: ignore
53
-
54
- def convert(self, data: Any) -> CLA:
55
- """Convert a ComputerAgent action to a HUD action."""
56
- try:
57
- action_type = data.get("type")
58
-
59
- if action_type == "click":
60
- x, y = data.get("x", 0), data.get("y", 0)
61
- button = data.get("button", "left")
62
- button = self.BUTTON_MAP.get(button, button)
63
- if button is None:
64
- button = "left"
65
- converted_action = ClickAction(point=Point(x=x, y=y), button=button)
66
-
67
- elif action_type == "double_click":
68
- x, y = data.get("x", 0), data.get("y", 0)
69
- converted_action = ClickAction(point=Point(x=x, y=y), button="left", pattern=[100])
70
-
71
- elif action_type == "scroll":
72
- x, y = int(data.get("x", 0)), int(data.get("y", 0))
73
- scroll_x = int(data.get("scroll_x", 0))
74
- scroll_y = int(data.get("scroll_y", 0))
75
- converted_action = ScrollAction(
76
- point=Point(x=x, y=y), scroll=Point(x=scroll_x, y=scroll_y)
77
- )
78
-
79
- elif action_type == "type":
80
- text = data.get("text", "")
81
- converted_action = TypeAction(text=text, enter_after=False)
82
-
83
- elif action_type == "wait":
84
- ms = data.get("ms", 1000)
85
- converted_action = WaitAction(time=ms)
86
-
87
- elif action_type == "move":
88
- x, y = data.get("x", 0), data.get("y", 0)
89
- converted_action = MoveAction(point=Point(x=x, y=y))
90
-
91
- elif action_type == "keypress":
92
- keys = data.get("keys", [])
93
- if isinstance(keys, str):
94
- keys = [keys]
95
- converted_action = PressAction(keys=[self._map_key(k) for k in keys])
96
-
97
- elif action_type == "drag":
98
- path = data.get("path", [])
99
- points = [Point(x=p.get("x", 0), y=p.get("y", 0)) for p in path]
100
- converted_action = DragAction(path=points)
101
-
102
- elif action_type == "screenshot":
103
- converted_action = ScreenshotFetch()
104
-
105
- elif action_type == "response":
106
- converted_action = ResponseAction(text=data.get("text", ""))
107
-
108
- elif action_type == "custom":
109
- converted_action = CustomAction(action=data.get("action", ""))
110
-
111
- else:
112
- raise ValueError(f"Unsupported action type: {action_type}")
113
-
114
- # Add reasoning and logs if available
115
- converted_action.reasoning = data.get("reasoning", "")
116
- converted_action.logs = data.get("logs", "")
117
-
118
- return converted_action
119
-
120
- except Exception as e:
121
- raise ValueError(f"Invalid action: {data}. Error: {e!s}") from e
@@ -1,187 +0,0 @@
1
- """HUD Computer Handler for ComputerAgent integration."""
2
-
3
- import base64
4
- from io import BytesIO
5
- from typing import Literal, Optional, Any, Dict, Callable
6
- from PIL import Image
7
-
8
- from agent.computers import AsyncComputerHandler
9
-
10
-
11
- class HUDComputerHandler(AsyncComputerHandler):
12
- """Computer handler that interfaces with HUD environment."""
13
-
14
- def __init__(
15
- self,
16
- environment: Literal["windows", "mac", "linux", "browser"] = "linux",
17
- dimensions: tuple[int, int] = (1024, 768),
18
- screenshot_callback: Optional[Callable] = None,
19
- action_callback: Optional[Callable] = None,
20
- ):
21
- """
22
- Initialize HUD computer handler.
23
-
24
- Args:
25
- environment: The environment type for HUD
26
- dimensions: Screen dimensions as (width, height)
27
- screenshot_callback: Optional callback to get screenshots from HUD environment
28
- action_callback: Optional callback to execute actions in HUD environment
29
- """
30
- super().__init__()
31
- self._environment = environment
32
- self._dimensions = dimensions
33
- self._screenshot_callback = screenshot_callback
34
- self._action_callback = action_callback
35
-
36
- # Store the last screenshot for reuse
37
- self._last_screenshot: Optional[str] = None
38
-
39
- def set_screenshot_callback(self, callback: Callable) -> None:
40
- """Set the screenshot callback."""
41
- self._screenshot_callback = callback
42
-
43
- def set_action_callback(self, callback: Callable) -> None:
44
- """Set the action callback."""
45
- self._action_callback = callback
46
-
47
- def update_screenshot(self, screenshot: str) -> None:
48
- """Update the stored screenshot (base64 string)."""
49
- self._last_screenshot = screenshot
50
-
51
- async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
52
- """Get the current environment type."""
53
- return self._environment # type: ignore
54
-
55
- async def get_dimensions(self) -> tuple[int, int]:
56
- """Get screen dimensions as (width, height)."""
57
- return self._dimensions
58
-
59
- async def screenshot(self) -> str:
60
- """Take a screenshot and return as base64 string."""
61
- if self._screenshot_callback:
62
- screenshot = await self._screenshot_callback()
63
- if isinstance(screenshot, str):
64
- self._last_screenshot = screenshot
65
- return screenshot
66
- elif isinstance(screenshot, Image.Image):
67
- # Convert PIL Image to base64
68
- buffer = BytesIO()
69
- screenshot.save(buffer, format="PNG")
70
- screenshot_b64 = base64.b64encode(buffer.getvalue()).decode()
71
- self._last_screenshot = screenshot_b64
72
- return screenshot_b64
73
- elif isinstance(screenshot, bytes):
74
- screenshot_b64 = base64.b64encode(screenshot).decode()
75
- self._last_screenshot = screenshot_b64
76
- return screenshot_b64
77
-
78
- # Return last screenshot if available, otherwise create a blank one
79
- if self._last_screenshot:
80
- return self._last_screenshot
81
-
82
- # Create a blank screenshot as fallback
83
- blank_image = Image.new('RGB', self._dimensions, color='white')
84
- buffer = BytesIO()
85
- blank_image.save(buffer, format="PNG")
86
- screenshot_b64 = base64.b64encode(buffer.getvalue()).decode()
87
- self._last_screenshot = screenshot_b64
88
- return screenshot_b64
89
-
90
- async def click(self, x: int, y: int, button: str = "left") -> None:
91
- """Click at coordinates with specified button."""
92
- if self._action_callback:
93
- await self._action_callback({
94
- "type": "click",
95
- "x": x,
96
- "y": y,
97
- "button": button
98
- })
99
-
100
- async def double_click(self, x: int, y: int) -> None:
101
- """Double click at coordinates."""
102
- if self._action_callback:
103
- await self._action_callback({
104
- "type": "double_click",
105
- "x": x,
106
- "y": y
107
- })
108
-
109
- async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
110
- """Scroll at coordinates with specified scroll amounts."""
111
- if self._action_callback:
112
- await self._action_callback({
113
- "type": "scroll",
114
- "x": x,
115
- "y": y,
116
- "scroll_x": scroll_x,
117
- "scroll_y": scroll_y
118
- })
119
-
120
- async def type(self, text: str) -> None:
121
- """Type text."""
122
- if self._action_callback:
123
- await self._action_callback({
124
- "type": "type",
125
- "text": text
126
- })
127
-
128
- async def wait(self, ms: int = 1000) -> None:
129
- """Wait for specified milliseconds."""
130
- if self._action_callback:
131
- await self._action_callback({
132
- "type": "wait",
133
- "ms": ms
134
- })
135
-
136
- async def move(self, x: int, y: int) -> None:
137
- """Move cursor to coordinates."""
138
- if self._action_callback:
139
- await self._action_callback({
140
- "type": "move",
141
- "x": x,
142
- "y": y
143
- })
144
-
145
- async def keypress(self, keys: list[str] | str) -> None:
146
- """Press key combination."""
147
- if isinstance(keys, str):
148
- keys = [keys]
149
- if self._action_callback:
150
- await self._action_callback({
151
- "type": "keypress",
152
- "keys": keys
153
- })
154
-
155
- async def drag(self, path: list[dict[str, int]]) -> None:
156
- """Drag along a path of points."""
157
- if self._action_callback:
158
- await self._action_callback({
159
- "type": "drag",
160
- "path": path
161
- })
162
-
163
- async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
164
- """Left mouse down at coordinates."""
165
- if self._action_callback:
166
- await self._action_callback({
167
- "type": "left_mouse_down",
168
- "x": x,
169
- "y": y
170
- })
171
-
172
- async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
173
- """Left mouse up at coordinates."""
174
- if self._action_callback:
175
- await self._action_callback({
176
- "type": "left_mouse_up",
177
- "x": x,
178
- "y": y
179
- })
180
-
181
- async def get_current_url(self) -> str:
182
- """Get the current URL."""
183
- if self._action_callback:
184
- return await self._action_callback({
185
- "type": "get_current_url"
186
- })
187
- return ""
agent/telemetry.py DELETED
@@ -1,142 +0,0 @@
1
- """Agent telemetry for tracking anonymous usage and feature usage."""
2
-
3
- import logging
4
- import os
5
- import platform
6
- import sys
7
- from typing import Dict, Any, Callable
8
-
9
- # Import the core telemetry module
10
- TELEMETRY_AVAILABLE = False
11
-
12
-
13
- # Local fallbacks in case core telemetry isn't available
14
- def _noop(*args: Any, **kwargs: Any) -> None:
15
- """No-op function for when telemetry is not available."""
16
- pass
17
-
18
-
19
- # Define default functions with unique names to avoid shadowing
20
- _default_record_event = _noop
21
- _default_increment_counter = _noop
22
- _default_set_dimension = _noop
23
- _default_get_telemetry_client = lambda: None
24
- _default_flush = _noop
25
- _default_is_telemetry_enabled = lambda: False
26
- _default_is_telemetry_globally_disabled = lambda: True
27
-
28
- # Set the actual functions to the defaults initially
29
- record_event = _default_record_event
30
- increment_counter = _default_increment_counter
31
- set_dimension = _default_set_dimension
32
- get_telemetry_client = _default_get_telemetry_client
33
- flush = _default_flush
34
- is_telemetry_enabled = _default_is_telemetry_enabled
35
- is_telemetry_globally_disabled = _default_is_telemetry_globally_disabled
36
-
37
- logger = logging.getLogger("agent.telemetry")
38
-
39
- try:
40
- # Import from core telemetry
41
- from core.telemetry import (
42
- record_event as core_record_event,
43
- increment as core_increment,
44
- get_telemetry_client as core_get_telemetry_client,
45
- flush as core_flush,
46
- is_telemetry_enabled as core_is_telemetry_enabled,
47
- is_telemetry_globally_disabled as core_is_telemetry_globally_disabled,
48
- )
49
-
50
- # Override the default functions with actual implementations
51
- record_event = core_record_event
52
- get_telemetry_client = core_get_telemetry_client
53
- flush = core_flush
54
- is_telemetry_enabled = core_is_telemetry_enabled
55
- is_telemetry_globally_disabled = core_is_telemetry_globally_disabled
56
-
57
- def increment_counter(counter_name: str, value: int = 1) -> None:
58
- """Wrapper for increment to maintain backward compatibility."""
59
- if is_telemetry_enabled():
60
- core_increment(counter_name, value)
61
-
62
- def set_dimension(name: str, value: Any) -> None:
63
- """Set a dimension that will be attached to all events."""
64
- logger.debug(f"Setting dimension {name}={value}")
65
-
66
- TELEMETRY_AVAILABLE = True
67
- logger.info("Successfully imported telemetry")
68
- except ImportError as e:
69
- logger.warning(f"Could not import telemetry: {e}")
70
- logger.debug("Telemetry not available, using no-op functions")
71
-
72
- # Get system info once to use in telemetry
73
- SYSTEM_INFO = {
74
- "os": platform.system().lower(),
75
- "os_version": platform.release(),
76
- "python_version": platform.python_version(),
77
- }
78
-
79
-
80
- def enable_telemetry() -> bool:
81
- """Enable telemetry if available.
82
-
83
- Returns:
84
- bool: True if telemetry was successfully enabled, False otherwise
85
- """
86
- global TELEMETRY_AVAILABLE, record_event, increment_counter, get_telemetry_client, flush, is_telemetry_enabled, is_telemetry_globally_disabled
87
-
88
- # Check if globally disabled using core function
89
- if TELEMETRY_AVAILABLE and is_telemetry_globally_disabled():
90
- logger.info("Telemetry is globally disabled via environment variable - cannot enable")
91
- return False
92
-
93
- # Already enabled
94
- if TELEMETRY_AVAILABLE:
95
- return True
96
-
97
- # Try to import and enable
98
- try:
99
- from core.telemetry import (
100
- record_event,
101
- increment,
102
- get_telemetry_client,
103
- flush,
104
- is_telemetry_globally_disabled,
105
- )
106
-
107
- # Check again after import
108
- if is_telemetry_globally_disabled():
109
- logger.info("Telemetry is globally disabled via environment variable - cannot enable")
110
- return False
111
-
112
- TELEMETRY_AVAILABLE = True
113
- logger.info("Telemetry successfully enabled")
114
- return True
115
- except ImportError as e:
116
- logger.warning(f"Could not enable telemetry: {e}")
117
- return False
118
-
119
-
120
- def is_telemetry_enabled() -> bool:
121
- """Check if telemetry is enabled.
122
-
123
- Returns:
124
- bool: True if telemetry is enabled, False otherwise
125
- """
126
- # Use the core function if available, otherwise use our local flag
127
- if TELEMETRY_AVAILABLE:
128
- from core.telemetry import is_telemetry_enabled as core_is_enabled
129
-
130
- return core_is_enabled()
131
- return False
132
-
133
-
134
- def record_agent_initialization() -> None:
135
- """Record when an agent instance is initialized."""
136
- if TELEMETRY_AVAILABLE and is_telemetry_enabled():
137
- record_event("agent_initialized", SYSTEM_INFO)
138
-
139
- # Set dimensions that will be attached to all events
140
- set_dimension("os", SYSTEM_INFO["os"])
141
- set_dimension("os_version", SYSTEM_INFO["os_version"])
142
- set_dimension("python_version", SYSTEM_INFO["python_version"])