lm-deluge 0.0.67__py3-none-any.whl → 0.0.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lm-deluge might be problematic. Click here for more details.
- lm_deluge/__init__.py +25 -2
- lm_deluge/api_requests/anthropic.py +92 -17
- lm_deluge/api_requests/base.py +47 -11
- lm_deluge/api_requests/bedrock.py +7 -4
- lm_deluge/api_requests/chat_reasoning.py +4 -0
- lm_deluge/api_requests/gemini.py +138 -18
- lm_deluge/api_requests/openai.py +114 -21
- lm_deluge/client.py +282 -49
- lm_deluge/config.py +15 -3
- lm_deluge/mock_openai.py +643 -0
- lm_deluge/models/__init__.py +12 -1
- lm_deluge/models/anthropic.py +17 -2
- lm_deluge/models/arcee.py +16 -0
- lm_deluge/models/deepseek.py +36 -4
- lm_deluge/models/google.py +29 -0
- lm_deluge/models/grok.py +24 -0
- lm_deluge/models/kimi.py +36 -0
- lm_deluge/models/minimax.py +10 -0
- lm_deluge/models/openai.py +100 -0
- lm_deluge/models/openrouter.py +86 -8
- lm_deluge/models/together.py +11 -0
- lm_deluge/models/zai.py +1 -0
- lm_deluge/pipelines/gepa/__init__.py +95 -0
- lm_deluge/pipelines/gepa/core.py +354 -0
- lm_deluge/pipelines/gepa/docs/samples.py +696 -0
- lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
- lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
- lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
- lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
- lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
- lm_deluge/pipelines/gepa/optimizer.py +435 -0
- lm_deluge/pipelines/gepa/proposer.py +235 -0
- lm_deluge/pipelines/gepa/util.py +165 -0
- lm_deluge/{llm_tools → pipelines}/score.py +2 -2
- lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
- lm_deluge/prompt.py +224 -40
- lm_deluge/request_context.py +7 -2
- lm_deluge/tool/__init__.py +1118 -0
- lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
- lm_deluge/tool/builtin/gemini.py +59 -0
- lm_deluge/tool/builtin/openai.py +74 -0
- lm_deluge/tool/cua/__init__.py +173 -0
- lm_deluge/tool/cua/actions.py +148 -0
- lm_deluge/tool/cua/base.py +27 -0
- lm_deluge/tool/cua/batch.py +215 -0
- lm_deluge/tool/cua/converters.py +466 -0
- lm_deluge/tool/cua/kernel.py +702 -0
- lm_deluge/tool/cua/trycua.py +989 -0
- lm_deluge/tool/prefab/__init__.py +45 -0
- lm_deluge/tool/prefab/batch_tool.py +156 -0
- lm_deluge/tool/prefab/docs.py +1119 -0
- lm_deluge/tool/prefab/email.py +294 -0
- lm_deluge/tool/prefab/filesystem.py +1711 -0
- lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
- lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
- lm_deluge/tool/prefab/memory.py +458 -0
- lm_deluge/tool/prefab/otc/__init__.py +165 -0
- lm_deluge/tool/prefab/otc/executor.py +281 -0
- lm_deluge/tool/prefab/otc/parse.py +188 -0
- lm_deluge/tool/prefab/random.py +212 -0
- lm_deluge/tool/prefab/rlm/__init__.py +296 -0
- lm_deluge/tool/prefab/rlm/executor.py +349 -0
- lm_deluge/tool/prefab/rlm/parse.py +144 -0
- lm_deluge/tool/prefab/sandbox.py +1621 -0
- lm_deluge/tool/prefab/sheets.py +385 -0
- lm_deluge/tool/prefab/subagents.py +233 -0
- lm_deluge/tool/prefab/todos.py +342 -0
- lm_deluge/tool/prefab/tool_search.py +169 -0
- lm_deluge/tool/prefab/web_search.py +199 -0
- lm_deluge/tracker.py +16 -13
- lm_deluge/util/schema.py +412 -0
- lm_deluge/warnings.py +8 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/METADATA +22 -9
- lm_deluge-0.0.88.dist-info/RECORD +117 -0
- lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
- lm_deluge/built_in_tools/openai.py +0 -28
- lm_deluge/presets/cerebras.py +0 -17
- lm_deluge/presets/meta.py +0 -13
- lm_deluge/tool.py +0 -849
- lm_deluge-0.0.67.dist-info/RECORD +0 -72
- lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
- /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
- /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/bash.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/computer_use.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
- /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/WHEEL +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/licenses/LICENSE +0 -0
- {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.88.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,702 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Kernel (onkernel.com) implementation of ComputerExecutor.
|
|
3
|
+
|
|
4
|
+
This module provides a ComputerExecutor that connects to Kernel's browser-as-a-service
|
|
5
|
+
platform to execute computer use actions in a sandboxed cloud browser environment.
|
|
6
|
+
|
|
7
|
+
Requires: pip install kernel
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
from .actions import (
|
|
14
|
+
Click,
|
|
15
|
+
CUAction,
|
|
16
|
+
DoubleClick,
|
|
17
|
+
Drag,
|
|
18
|
+
GoBack,
|
|
19
|
+
GoForward,
|
|
20
|
+
Keypress,
|
|
21
|
+
Move,
|
|
22
|
+
Navigate,
|
|
23
|
+
Scroll,
|
|
24
|
+
Search,
|
|
25
|
+
TripleClick,
|
|
26
|
+
Type,
|
|
27
|
+
Wait,
|
|
28
|
+
)
|
|
29
|
+
from .base import ComputerExecutor, CUActionResult, Screenshot as ScreenshotResult
|
|
30
|
+
|
|
31
|
+
# Lazy import kernel SDK to avoid import errors if not installed
|
|
32
|
+
_kernel_client = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _get_kernel_client():
|
|
36
|
+
"""Get or create the Kernel client singleton."""
|
|
37
|
+
global _kernel_client
|
|
38
|
+
if _kernel_client is None:
|
|
39
|
+
try:
|
|
40
|
+
from kernel import Kernel
|
|
41
|
+
except ImportError:
|
|
42
|
+
raise ImportError(
|
|
43
|
+
"The 'kernel' package is required for KernelExecutor. "
|
|
44
|
+
"Install it with: pip install kernel"
|
|
45
|
+
)
|
|
46
|
+
_kernel_client = Kernel()
|
|
47
|
+
return _kernel_client
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class KernelBrowser:
|
|
51
|
+
"""
|
|
52
|
+
Manages a Kernel browser session lifecycle.
|
|
53
|
+
|
|
54
|
+
Usage:
|
|
55
|
+
async with KernelBrowser() as browser:
|
|
56
|
+
executor = KernelExecutor(browser.session_id)
|
|
57
|
+
result = executor.execute(Screenshot(kind="screenshot"))
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
*,
|
|
63
|
+
headless: bool = True,
|
|
64
|
+
viewport_width: int = 1024,
|
|
65
|
+
viewport_height: int = 768,
|
|
66
|
+
timeout_seconds: int = 300,
|
|
67
|
+
persistence_id: str | None = None,
|
|
68
|
+
):
|
|
69
|
+
"""
|
|
70
|
+
Initialize a Kernel browser session configuration.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
headless: Whether to run in headless mode (default True)
|
|
74
|
+
viewport_width: Browser viewport width in pixels
|
|
75
|
+
viewport_height: Browser viewport height in pixels
|
|
76
|
+
timeout_seconds: Auto-terminate after this many seconds of inactivity
|
|
77
|
+
persistence_id: Optional ID for session persistence (reuse cookies, etc.)
|
|
78
|
+
"""
|
|
79
|
+
self.headless = headless
|
|
80
|
+
self.viewport_width = viewport_width
|
|
81
|
+
self.viewport_height = viewport_height
|
|
82
|
+
self.timeout_seconds = timeout_seconds
|
|
83
|
+
self.persistence_id = persistence_id
|
|
84
|
+
self.session_id: str | None = None
|
|
85
|
+
self._client = None
|
|
86
|
+
|
|
87
|
+
def create(self) -> "KernelBrowser":
|
|
88
|
+
"""Create the browser session synchronously."""
|
|
89
|
+
self._client = _get_kernel_client()
|
|
90
|
+
|
|
91
|
+
create_params = {
|
|
92
|
+
"headless": self.headless,
|
|
93
|
+
"viewport": {
|
|
94
|
+
"width": self.viewport_width,
|
|
95
|
+
"height": self.viewport_height,
|
|
96
|
+
},
|
|
97
|
+
"timeout_seconds": self.timeout_seconds,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if self.persistence_id:
|
|
101
|
+
create_params["persistence"] = {"id": self.persistence_id}
|
|
102
|
+
|
|
103
|
+
browser = self._client.browsers.create(**create_params)
|
|
104
|
+
self.session_id = browser.session_id
|
|
105
|
+
return self
|
|
106
|
+
|
|
107
|
+
def delete(self) -> None:
|
|
108
|
+
"""Delete the browser session."""
|
|
109
|
+
if self._client and self.session_id:
|
|
110
|
+
self._client.browsers.delete_by_id(self.session_id)
|
|
111
|
+
self.session_id = None
|
|
112
|
+
|
|
113
|
+
def __enter__(self) -> "KernelBrowser":
|
|
114
|
+
return self.create()
|
|
115
|
+
|
|
116
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
117
|
+
self.delete()
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class AsyncKernelBrowser:
|
|
121
|
+
"""
|
|
122
|
+
Async version of KernelBrowser for use with asyncio.
|
|
123
|
+
|
|
124
|
+
Usage:
|
|
125
|
+
async with AsyncKernelBrowser() as browser:
|
|
126
|
+
executor = AsyncKernelExecutor(browser.session_id)
|
|
127
|
+
result = await executor.execute(Screenshot(kind="screenshot"))
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
def __init__(
|
|
131
|
+
self,
|
|
132
|
+
*,
|
|
133
|
+
headless: bool = True,
|
|
134
|
+
viewport_width: int = 1024,
|
|
135
|
+
viewport_height: int = 768,
|
|
136
|
+
timeout_seconds: int = 300,
|
|
137
|
+
persistence_id: str | None = None,
|
|
138
|
+
):
|
|
139
|
+
self.headless = headless
|
|
140
|
+
self.viewport_width = viewport_width
|
|
141
|
+
self.viewport_height = viewport_height
|
|
142
|
+
self.timeout_seconds = timeout_seconds
|
|
143
|
+
self.persistence_id = persistence_id
|
|
144
|
+
self.session_id: str | None = None
|
|
145
|
+
self._client = None
|
|
146
|
+
|
|
147
|
+
async def create(self) -> "AsyncKernelBrowser":
|
|
148
|
+
"""Create the browser session asynchronously."""
|
|
149
|
+
try:
|
|
150
|
+
from kernel import AsyncKernel
|
|
151
|
+
except ImportError:
|
|
152
|
+
raise ImportError(
|
|
153
|
+
"The 'kernel' package is required for AsyncKernelBrowser. "
|
|
154
|
+
"Install it with: pip install kernel"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
self._client = AsyncKernel()
|
|
158
|
+
|
|
159
|
+
create_params = {
|
|
160
|
+
"headless": self.headless,
|
|
161
|
+
"viewport": {
|
|
162
|
+
"width": self.viewport_width,
|
|
163
|
+
"height": self.viewport_height,
|
|
164
|
+
},
|
|
165
|
+
"timeout_seconds": self.timeout_seconds,
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if self.persistence_id:
|
|
169
|
+
create_params["persistence"] = {"id": self.persistence_id}
|
|
170
|
+
|
|
171
|
+
browser = await self._client.browsers.create(**create_params)
|
|
172
|
+
self.session_id = browser.session_id
|
|
173
|
+
return self
|
|
174
|
+
|
|
175
|
+
async def delete(self) -> None:
|
|
176
|
+
"""Delete the browser session."""
|
|
177
|
+
if self._client and self.session_id:
|
|
178
|
+
try:
|
|
179
|
+
await self._client.browsers.delete_by_id(self.session_id)
|
|
180
|
+
except Exception:
|
|
181
|
+
# Session may have already been deleted (timeout, etc.)
|
|
182
|
+
pass
|
|
183
|
+
self.session_id = None
|
|
184
|
+
|
|
185
|
+
async def __aenter__(self) -> "AsyncKernelBrowser":
|
|
186
|
+
return await self.create()
|
|
187
|
+
|
|
188
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
189
|
+
await self.delete()
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class KernelExecutor(ComputerExecutor):
|
|
193
|
+
"""
|
|
194
|
+
Execute computer use actions on a Kernel browser session.
|
|
195
|
+
|
|
196
|
+
This executor maps CUAction types to Kernel's computer control API,
|
|
197
|
+
enabling vision-based LLM loops to control a remote browser.
|
|
198
|
+
|
|
199
|
+
Example:
|
|
200
|
+
with KernelBrowser() as browser:
|
|
201
|
+
executor = KernelExecutor(browser.session_id)
|
|
202
|
+
|
|
203
|
+
# Take a screenshot
|
|
204
|
+
result = executor.execute(Screenshot(kind="screenshot"))
|
|
205
|
+
print(f"Got {len(result['screenshot']['content'])} bytes")
|
|
206
|
+
|
|
207
|
+
# Click at coordinates
|
|
208
|
+
executor.execute(Click(kind="click", x=100, y=200, button="left"))
|
|
209
|
+
|
|
210
|
+
# Type text
|
|
211
|
+
executor.execute(Type(kind="type", text="Hello, world!"))
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
def __init__(self, session_id: str):
|
|
215
|
+
"""
|
|
216
|
+
Initialize the executor with an active Kernel browser session.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
session_id: The session ID from KernelBrowser.create()
|
|
220
|
+
"""
|
|
221
|
+
self.session_id = session_id
|
|
222
|
+
self._client = _get_kernel_client()
|
|
223
|
+
|
|
224
|
+
def execute(self, action: CUAction) -> CUActionResult:
|
|
225
|
+
"""
|
|
226
|
+
Execute a computer use action on the Kernel browser.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
action: The action to execute (Click, Type, Screenshot, etc.)
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
CUActionResult with screenshot (if applicable) and metadata
|
|
233
|
+
"""
|
|
234
|
+
kind = action["kind"]
|
|
235
|
+
|
|
236
|
+
if kind == "screenshot":
|
|
237
|
+
return self._screenshot()
|
|
238
|
+
elif kind == "click":
|
|
239
|
+
return self._click(action) # type: ignore
|
|
240
|
+
elif kind == "double_click":
|
|
241
|
+
return self._double_click(action) # type: ignore
|
|
242
|
+
elif kind == "triple_click":
|
|
243
|
+
return self._triple_click(action) # type: ignore
|
|
244
|
+
elif kind == "move":
|
|
245
|
+
return self._move(action) # type: ignore
|
|
246
|
+
elif kind == "scroll":
|
|
247
|
+
return self._scroll(action) # type: ignore
|
|
248
|
+
elif kind == "type":
|
|
249
|
+
return self._type(action) # type: ignore
|
|
250
|
+
elif kind == "keypress":
|
|
251
|
+
return self._keypress(action) # type: ignore
|
|
252
|
+
elif kind == "drag":
|
|
253
|
+
return self._drag(action) # type: ignore
|
|
254
|
+
elif kind == "wait":
|
|
255
|
+
return self._wait(action) # type: ignore
|
|
256
|
+
elif kind == "navigate":
|
|
257
|
+
return self._navigate(action) # type: ignore
|
|
258
|
+
elif kind == "go_back":
|
|
259
|
+
return self._go_back(action) # type: ignore
|
|
260
|
+
elif kind == "go_forward":
|
|
261
|
+
return self._go_forward(action) # type: ignore
|
|
262
|
+
elif kind == "search":
|
|
263
|
+
return self._search(action) # type: ignore
|
|
264
|
+
else:
|
|
265
|
+
raise ValueError(f"Unsupported action kind: {kind}")
|
|
266
|
+
|
|
267
|
+
def _screenshot(self) -> CUActionResult:
|
|
268
|
+
"""Capture a screenshot of the browser."""
|
|
269
|
+
response = self._client.browsers.computer.capture_screenshot(self.session_id)
|
|
270
|
+
# Response is a BinaryAPIResponse, read the content bytes
|
|
271
|
+
content = response.read()
|
|
272
|
+
return CUActionResult(
|
|
273
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
274
|
+
data={},
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
def _click(self, action: Click) -> CUActionResult:
|
|
278
|
+
"""Execute a click action."""
|
|
279
|
+
x = action["x"]
|
|
280
|
+
y = action["y"]
|
|
281
|
+
if x is None or y is None:
|
|
282
|
+
raise ValueError("Click action requires x and y coordinates")
|
|
283
|
+
button = action.get("button", "left")
|
|
284
|
+
|
|
285
|
+
self._client.browsers.computer.click_mouse(
|
|
286
|
+
self.session_id,
|
|
287
|
+
x=x,
|
|
288
|
+
y=y,
|
|
289
|
+
button=button,
|
|
290
|
+
num_clicks=1,
|
|
291
|
+
)
|
|
292
|
+
return CUActionResult(screenshot=None, data={"action": "click"})
|
|
293
|
+
|
|
294
|
+
def _double_click(self, action: DoubleClick) -> CUActionResult:
|
|
295
|
+
"""Execute a double click action."""
|
|
296
|
+
params = {
|
|
297
|
+
"x": action.get("x"),
|
|
298
|
+
"y": action.get("y"),
|
|
299
|
+
"button": "left",
|
|
300
|
+
"num_clicks": 2,
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
self._client.browsers.computer.click_mouse(self.session_id, **params)
|
|
304
|
+
return CUActionResult(screenshot=None, data={"action": "double_click"})
|
|
305
|
+
|
|
306
|
+
def _triple_click(self, action: TripleClick) -> CUActionResult:
|
|
307
|
+
"""Execute a triple click action."""
|
|
308
|
+
params = {
|
|
309
|
+
"x": action.get("x"),
|
|
310
|
+
"y": action.get("y"),
|
|
311
|
+
"button": "left",
|
|
312
|
+
"num_clicks": 3,
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
self._client.browsers.computer.click_mouse(self.session_id, **params)
|
|
316
|
+
return CUActionResult(screenshot=None, data={"action": "triple_click"})
|
|
317
|
+
|
|
318
|
+
def _move(self, action: Move) -> CUActionResult:
|
|
319
|
+
"""Move the mouse cursor."""
|
|
320
|
+
self._client.browsers.computer.move_mouse(
|
|
321
|
+
self.session_id,
|
|
322
|
+
x=action["x"],
|
|
323
|
+
y=action["y"],
|
|
324
|
+
)
|
|
325
|
+
return CUActionResult(screenshot=None, data={"action": "move"})
|
|
326
|
+
|
|
327
|
+
def _scroll(self, action: Scroll) -> CUActionResult:
|
|
328
|
+
"""Execute a scroll action."""
|
|
329
|
+
self._client.browsers.computer.scroll(
|
|
330
|
+
self.session_id,
|
|
331
|
+
x=action.get("x") or 0,
|
|
332
|
+
y=action.get("y") or 0,
|
|
333
|
+
delta_x=action["dx"],
|
|
334
|
+
delta_y=action["dy"],
|
|
335
|
+
)
|
|
336
|
+
return CUActionResult(screenshot=None, data={"action": "scroll"})
|
|
337
|
+
|
|
338
|
+
def _type(self, action: Type) -> CUActionResult:
|
|
339
|
+
"""Type text."""
|
|
340
|
+
self._client.browsers.computer.type_text(
|
|
341
|
+
self.session_id,
|
|
342
|
+
text=action["text"],
|
|
343
|
+
)
|
|
344
|
+
return CUActionResult(screenshot=None, data={"action": "type"})
|
|
345
|
+
|
|
346
|
+
def _keypress(self, action: Keypress) -> CUActionResult:
|
|
347
|
+
"""Press key(s)."""
|
|
348
|
+
# Kernel expects keys as a list of key combinations
|
|
349
|
+
# e.g., ["Ctrl+a", "Enter"]
|
|
350
|
+
self._client.browsers.computer.press_key(
|
|
351
|
+
self.session_id,
|
|
352
|
+
keys=action["keys"],
|
|
353
|
+
)
|
|
354
|
+
return CUActionResult(screenshot=None, data={"action": "keypress"})
|
|
355
|
+
|
|
356
|
+
def _drag(self, action: Drag) -> CUActionResult:
|
|
357
|
+
"""Execute a drag action."""
|
|
358
|
+
# Build the path including start position
|
|
359
|
+
path = []
|
|
360
|
+
if action.get("start_x") is not None and action.get("start_y") is not None:
|
|
361
|
+
path.append([action["start_x"], action["start_y"]])
|
|
362
|
+
path.extend(action["path"])
|
|
363
|
+
|
|
364
|
+
self._client.browsers.computer.drag_mouse(
|
|
365
|
+
self.session_id,
|
|
366
|
+
path=path,
|
|
367
|
+
button="left",
|
|
368
|
+
)
|
|
369
|
+
return CUActionResult(screenshot=None, data={"action": "drag"})
|
|
370
|
+
|
|
371
|
+
def _wait(self, action: Wait) -> CUActionResult:
|
|
372
|
+
"""Wait for a specified duration."""
|
|
373
|
+
import time
|
|
374
|
+
|
|
375
|
+
time.sleep(action["ms"] / 1000.0)
|
|
376
|
+
return CUActionResult(
|
|
377
|
+
screenshot=None, data={"action": "wait", "ms": action["ms"]}
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
def _navigate(self, action: Navigate) -> CUActionResult:
|
|
381
|
+
"""Navigate to a URL using keyboard shortcuts."""
|
|
382
|
+
import time
|
|
383
|
+
|
|
384
|
+
# Ctrl+L to focus address bar, type URL, press Enter
|
|
385
|
+
self._client.browsers.computer.press_key(self.session_id, keys=["ctrl+l"])
|
|
386
|
+
time.sleep(0.2)
|
|
387
|
+
self._client.browsers.computer.type_text(self.session_id, text=action["url"])
|
|
388
|
+
time.sleep(0.1)
|
|
389
|
+
self._client.browsers.computer.press_key(self.session_id, keys=["Return"])
|
|
390
|
+
time.sleep(1.5) # Wait for page load
|
|
391
|
+
# Take screenshot after navigation
|
|
392
|
+
response = self._client.browsers.computer.capture_screenshot(self.session_id)
|
|
393
|
+
content = response.read()
|
|
394
|
+
return CUActionResult(
|
|
395
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
396
|
+
data={"action": "navigate", "url": action["url"]},
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def _go_back(self, action: GoBack) -> CUActionResult:
|
|
400
|
+
"""Go back in browser history using keyboard shortcut."""
|
|
401
|
+
import time
|
|
402
|
+
|
|
403
|
+
self._client.browsers.computer.press_key(self.session_id, keys=["Alt+Left"])
|
|
404
|
+
time.sleep(0.5) # Wait for page load
|
|
405
|
+
response = self._client.browsers.computer.capture_screenshot(self.session_id)
|
|
406
|
+
content = response.read()
|
|
407
|
+
return CUActionResult(
|
|
408
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
409
|
+
data={"action": "go_back"},
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
def _go_forward(self, action: GoForward) -> CUActionResult:
|
|
413
|
+
"""Go forward in browser history using keyboard shortcut."""
|
|
414
|
+
import time
|
|
415
|
+
|
|
416
|
+
self._client.browsers.computer.press_key(self.session_id, keys=["Alt+Right"])
|
|
417
|
+
time.sleep(0.5) # Wait for page load
|
|
418
|
+
response = self._client.browsers.computer.capture_screenshot(self.session_id)
|
|
419
|
+
content = response.read()
|
|
420
|
+
return CUActionResult(
|
|
421
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
422
|
+
data={"action": "go_forward"},
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
def _search(self, action: Search) -> CUActionResult:
|
|
426
|
+
"""Navigate to Google search using keyboard shortcuts."""
|
|
427
|
+
import time
|
|
428
|
+
from urllib.parse import quote
|
|
429
|
+
|
|
430
|
+
search_url = f"https://www.google.com/search?q={quote(action['query'])}"
|
|
431
|
+
# Ctrl+L to focus address bar, type search URL, press Enter
|
|
432
|
+
self._client.browsers.computer.press_key(self.session_id, keys=["ctrl+l"])
|
|
433
|
+
time.sleep(0.2)
|
|
434
|
+
self._client.browsers.computer.type_text(self.session_id, text=search_url)
|
|
435
|
+
time.sleep(0.1)
|
|
436
|
+
self._client.browsers.computer.press_key(self.session_id, keys=["Return"])
|
|
437
|
+
time.sleep(1.5) # Wait for page load
|
|
438
|
+
response = self._client.browsers.computer.capture_screenshot(self.session_id)
|
|
439
|
+
content = response.read()
|
|
440
|
+
return CUActionResult(
|
|
441
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
442
|
+
data={"action": "search", "query": action["query"]},
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
class AsyncKernelExecutor:
|
|
447
|
+
"""
|
|
448
|
+
Async version of KernelExecutor for use with asyncio.
|
|
449
|
+
|
|
450
|
+
Example:
|
|
451
|
+
async with AsyncKernelBrowser() as browser:
|
|
452
|
+
executor = AsyncKernelExecutor(browser.session_id)
|
|
453
|
+
result = await executor.execute(Screenshot(kind="screenshot"))
|
|
454
|
+
"""
|
|
455
|
+
|
|
456
|
+
def __init__(self, session_id: str):
|
|
457
|
+
"""
|
|
458
|
+
Initialize the executor with an active Kernel browser session.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
session_id: The session ID from AsyncKernelBrowser.create()
|
|
462
|
+
"""
|
|
463
|
+
self.session_id = session_id
|
|
464
|
+
self._client = None
|
|
465
|
+
|
|
466
|
+
def _get_client(self):
|
|
467
|
+
"""Lazy load the async client."""
|
|
468
|
+
if self._client is None:
|
|
469
|
+
try:
|
|
470
|
+
from kernel import AsyncKernel
|
|
471
|
+
except ImportError:
|
|
472
|
+
raise ImportError(
|
|
473
|
+
"The 'kernel' package is required for AsyncKernelExecutor. "
|
|
474
|
+
"Install it with: pip install kernel"
|
|
475
|
+
)
|
|
476
|
+
self._client = AsyncKernel()
|
|
477
|
+
return self._client
|
|
478
|
+
|
|
479
|
+
async def execute(self, action: CUAction) -> CUActionResult:
|
|
480
|
+
"""
|
|
481
|
+
Execute a computer use action on the Kernel browser asynchronously.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
action: The action to execute (Click, Type, Screenshot, etc.)
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
CUActionResult with screenshot (if applicable) and metadata
|
|
488
|
+
"""
|
|
489
|
+
kind = action["kind"]
|
|
490
|
+
|
|
491
|
+
if kind == "screenshot":
|
|
492
|
+
return await self._screenshot()
|
|
493
|
+
elif kind == "click":
|
|
494
|
+
return await self._click(action) # type: ignore
|
|
495
|
+
elif kind == "double_click":
|
|
496
|
+
return await self._double_click(action) # type: ignore
|
|
497
|
+
elif kind == "triple_click":
|
|
498
|
+
return await self._triple_click(action) # type: ignore
|
|
499
|
+
elif kind == "move":
|
|
500
|
+
return await self._move(action) # type: ignore
|
|
501
|
+
elif kind == "scroll":
|
|
502
|
+
return await self._scroll(action) # type: ignore
|
|
503
|
+
elif kind == "type":
|
|
504
|
+
return await self._type(action) # type: ignore
|
|
505
|
+
elif kind == "keypress":
|
|
506
|
+
return await self._keypress(action) # type: ignore
|
|
507
|
+
elif kind == "drag":
|
|
508
|
+
return await self._drag(action) # type: ignore
|
|
509
|
+
elif kind == "wait":
|
|
510
|
+
return await self._wait(action) # type: ignore
|
|
511
|
+
elif kind == "navigate":
|
|
512
|
+
return await self._navigate(action) # type: ignore
|
|
513
|
+
elif kind == "go_back":
|
|
514
|
+
return await self._go_back(action) # type: ignore
|
|
515
|
+
elif kind == "go_forward":
|
|
516
|
+
return await self._go_forward(action) # type: ignore
|
|
517
|
+
elif kind == "search":
|
|
518
|
+
return await self._search(action) # type: ignore
|
|
519
|
+
else:
|
|
520
|
+
raise ValueError(f"Unsupported action kind: {kind}")
|
|
521
|
+
|
|
522
|
+
async def _screenshot(self) -> CUActionResult:
|
|
523
|
+
"""Capture a screenshot of the browser."""
|
|
524
|
+
client = self._get_client()
|
|
525
|
+
response = await client.browsers.computer.capture_screenshot(self.session_id)
|
|
526
|
+
# AsyncBinaryAPIResponse requires await on .read()
|
|
527
|
+
content = await response.read()
|
|
528
|
+
return CUActionResult(
|
|
529
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
530
|
+
data={},
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
async def _click(self, action: Click) -> CUActionResult:
|
|
534
|
+
"""Execute a click action."""
|
|
535
|
+
client = self._get_client()
|
|
536
|
+
params = {
|
|
537
|
+
"x": action["x"],
|
|
538
|
+
"y": action["y"],
|
|
539
|
+
"button": action.get("button", "left"),
|
|
540
|
+
"num_clicks": 1,
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
await client.browsers.computer.click_mouse(self.session_id, **params)
|
|
544
|
+
return CUActionResult(screenshot=None, data={"action": "click"})
|
|
545
|
+
|
|
546
|
+
async def _double_click(self, action: DoubleClick) -> CUActionResult:
|
|
547
|
+
"""Execute a double click action."""
|
|
548
|
+
client = self._get_client()
|
|
549
|
+
params = {
|
|
550
|
+
"x": action.get("x"),
|
|
551
|
+
"y": action.get("y"),
|
|
552
|
+
"button": "left",
|
|
553
|
+
"num_clicks": 2,
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
await client.browsers.computer.click_mouse(self.session_id, **params)
|
|
557
|
+
return CUActionResult(screenshot=None, data={"action": "double_click"})
|
|
558
|
+
|
|
559
|
+
async def _triple_click(self, action: TripleClick) -> CUActionResult:
|
|
560
|
+
"""Execute a triple click action."""
|
|
561
|
+
client = self._get_client()
|
|
562
|
+
params = {
|
|
563
|
+
"x": action.get("x"),
|
|
564
|
+
"y": action.get("y"),
|
|
565
|
+
"button": "left",
|
|
566
|
+
"num_clicks": 3,
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
await client.browsers.computer.click_mouse(self.session_id, **params)
|
|
570
|
+
return CUActionResult(screenshot=None, data={"action": "triple_click"})
|
|
571
|
+
|
|
572
|
+
async def _move(self, action: Move) -> CUActionResult:
|
|
573
|
+
"""Move the mouse cursor."""
|
|
574
|
+
client = self._get_client()
|
|
575
|
+
await client.browsers.computer.move_mouse(
|
|
576
|
+
self.session_id,
|
|
577
|
+
x=action["x"],
|
|
578
|
+
y=action["y"],
|
|
579
|
+
)
|
|
580
|
+
return CUActionResult(screenshot=None, data={"action": "move"})
|
|
581
|
+
|
|
582
|
+
async def _scroll(self, action: Scroll) -> CUActionResult:
|
|
583
|
+
"""Execute a scroll action."""
|
|
584
|
+
client = self._get_client()
|
|
585
|
+
await client.browsers.computer.scroll(
|
|
586
|
+
self.session_id,
|
|
587
|
+
x=action.get("x") or 0,
|
|
588
|
+
y=action.get("y") or 0,
|
|
589
|
+
delta_x=action["dx"],
|
|
590
|
+
delta_y=action["dy"],
|
|
591
|
+
)
|
|
592
|
+
return CUActionResult(screenshot=None, data={"action": "scroll"})
|
|
593
|
+
|
|
594
|
+
async def _type(self, action: Type) -> CUActionResult:
|
|
595
|
+
"""Type text."""
|
|
596
|
+
client = self._get_client()
|
|
597
|
+
await client.browsers.computer.type_text(
|
|
598
|
+
self.session_id,
|
|
599
|
+
text=action["text"],
|
|
600
|
+
)
|
|
601
|
+
return CUActionResult(screenshot=None, data={"action": "type"})
|
|
602
|
+
|
|
603
|
+
async def _keypress(self, action: Keypress) -> CUActionResult:
|
|
604
|
+
"""Press key(s)."""
|
|
605
|
+
client = self._get_client()
|
|
606
|
+
await client.browsers.computer.press_key(
|
|
607
|
+
self.session_id,
|
|
608
|
+
keys=action["keys"],
|
|
609
|
+
)
|
|
610
|
+
return CUActionResult(screenshot=None, data={"action": "keypress"})
|
|
611
|
+
|
|
612
|
+
async def _drag(self, action: Drag) -> CUActionResult:
|
|
613
|
+
"""Execute a drag action."""
|
|
614
|
+
client = self._get_client()
|
|
615
|
+
path = []
|
|
616
|
+
if action.get("start_x") is not None and action.get("start_y") is not None:
|
|
617
|
+
path.append([action["start_x"], action["start_y"]])
|
|
618
|
+
path.extend(action["path"])
|
|
619
|
+
|
|
620
|
+
await client.browsers.computer.drag_mouse(
|
|
621
|
+
self.session_id,
|
|
622
|
+
path=path,
|
|
623
|
+
button="left",
|
|
624
|
+
)
|
|
625
|
+
return CUActionResult(screenshot=None, data={"action": "drag"})
|
|
626
|
+
|
|
627
|
+
async def _wait(self, action: Wait) -> CUActionResult:
|
|
628
|
+
"""Wait for a specified duration."""
|
|
629
|
+
import asyncio
|
|
630
|
+
|
|
631
|
+
await asyncio.sleep(action["ms"] / 1000.0)
|
|
632
|
+
return CUActionResult(
|
|
633
|
+
screenshot=None, data={"action": "wait", "ms": action["ms"]}
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
async def _navigate(self, action: Navigate) -> CUActionResult:
|
|
637
|
+
"""Navigate to a URL using keyboard shortcuts."""
|
|
638
|
+
import asyncio
|
|
639
|
+
|
|
640
|
+
client = self._get_client()
|
|
641
|
+
# Ctrl+L to focus address bar, type URL, press Enter
|
|
642
|
+
await client.browsers.computer.press_key(self.session_id, keys=["ctrl+l"])
|
|
643
|
+
await asyncio.sleep(0.2)
|
|
644
|
+
await client.browsers.computer.type_text(self.session_id, text=action["url"])
|
|
645
|
+
await asyncio.sleep(0.1)
|
|
646
|
+
await client.browsers.computer.press_key(self.session_id, keys=["Return"])
|
|
647
|
+
await asyncio.sleep(1.5) # Wait for page load
|
|
648
|
+
response = await client.browsers.computer.capture_screenshot(self.session_id)
|
|
649
|
+
content = await response.read()
|
|
650
|
+
return CUActionResult(
|
|
651
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
652
|
+
data={"action": "navigate", "url": action["url"]},
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
async def _go_back(self, action: GoBack) -> CUActionResult:
|
|
656
|
+
"""Go back in browser history using keyboard shortcut."""
|
|
657
|
+
import asyncio
|
|
658
|
+
|
|
659
|
+
client = self._get_client()
|
|
660
|
+
await client.browsers.computer.press_key(self.session_id, keys=["Alt+Left"])
|
|
661
|
+
await asyncio.sleep(0.5) # Wait for page load
|
|
662
|
+
response = await client.browsers.computer.capture_screenshot(self.session_id)
|
|
663
|
+
content = await response.read()
|
|
664
|
+
return CUActionResult(
|
|
665
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
666
|
+
data={"action": "go_back"},
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
async def _go_forward(self, action: GoForward) -> CUActionResult:
|
|
670
|
+
"""Go forward in browser history using keyboard shortcut."""
|
|
671
|
+
import asyncio
|
|
672
|
+
|
|
673
|
+
client = self._get_client()
|
|
674
|
+
await client.browsers.computer.press_key(self.session_id, keys=["Alt+Right"])
|
|
675
|
+
await asyncio.sleep(0.5) # Wait for page load
|
|
676
|
+
response = await client.browsers.computer.capture_screenshot(self.session_id)
|
|
677
|
+
content = await response.read()
|
|
678
|
+
return CUActionResult(
|
|
679
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
680
|
+
data={"action": "go_forward"},
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
async def _search(self, action: Search) -> CUActionResult:
|
|
684
|
+
"""Navigate to Google search using keyboard shortcuts."""
|
|
685
|
+
import asyncio
|
|
686
|
+
from urllib.parse import quote
|
|
687
|
+
|
|
688
|
+
client = self._get_client()
|
|
689
|
+
search_url = f"https://www.google.com/search?q={quote(action['query'])}"
|
|
690
|
+
# Ctrl+L to focus address bar, type search URL, press Enter
|
|
691
|
+
await client.browsers.computer.press_key(self.session_id, keys=["ctrl+l"])
|
|
692
|
+
await asyncio.sleep(0.2)
|
|
693
|
+
await client.browsers.computer.type_text(self.session_id, text=search_url)
|
|
694
|
+
await asyncio.sleep(0.1)
|
|
695
|
+
await client.browsers.computer.press_key(self.session_id, keys=["Return"])
|
|
696
|
+
await asyncio.sleep(1.5) # Wait for page load
|
|
697
|
+
response = await client.browsers.computer.capture_screenshot(self.session_id)
|
|
698
|
+
content = await response.read()
|
|
699
|
+
return CUActionResult(
|
|
700
|
+
screenshot=ScreenshotResult(media_type="image/png", content=content),
|
|
701
|
+
data={"action": "search", "query": action["query"]},
|
|
702
|
+
)
|