nebula-ai-plugin-system 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/package.json +49 -0
- package/src/browser.ts +713 -0
- package/src/code-execute.ts +151 -0
- package/src/cwd-state.ts +33 -0
- package/src/delegate.ts +88 -0
- package/src/fs.ts +0 -0
- package/src/index.ts +160 -0
- package/src/session-search.ts +103 -0
- package/src/shell-cd.ts +73 -0
- package/src/shell-process.ts +255 -0
- package/src/shell.ts +104 -0
- package/src/skills-manage.ts +126 -0
- package/src/skills.ts +107 -0
- package/src/todo.ts +91 -0
- package/src/vision.ts +242 -0
- package/src/web-fetch.ts +310 -0
package/src/browser.ts
ADDED
|
@@ -0,0 +1,713 @@
|
|
|
1
|
+
import { type ChildProcess, spawn, spawnSync } from 'node:child_process'
|
|
2
|
+
import {
|
|
3
|
+
closeSync,
|
|
4
|
+
existsSync,
|
|
5
|
+
mkdirSync,
|
|
6
|
+
openSync,
|
|
7
|
+
readFileSync,
|
|
8
|
+
readdirSync,
|
|
9
|
+
rmSync,
|
|
10
|
+
statSync,
|
|
11
|
+
} from 'node:fs'
|
|
12
|
+
import { tmpdir } from 'node:os'
|
|
13
|
+
import { delimiter, join } from 'node:path'
|
|
14
|
+
import { type ToolDef, type VisionInferFn, coerceBool, coerceInt, redactEnv } from 'nebula-ai-core'
|
|
15
|
+
import { z } from 'zod'
|
|
16
|
+
import { sniffMimeFromBytes } from './vision'
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Phase 9.4 + Task #74 browser tools. Wraps the `agent-browser` CLI with
|
|
20
|
+
* hermes-grade resilience: PATH-walker for unlinked Homebrew node@N installs,
|
|
21
|
+
* per-session AGENT_BROWSER_SOCKET_DIR (sidesteps macOS 104-byte AF_UNIX
|
|
22
|
+
* limit), stdout/stderr to temp files (avoids daemon-fd pipe deadlock),
|
|
23
|
+
* optional `NEBULA_BROWSER_CDP_URL` override for connecting to a user-supplied
|
|
24
|
+
* CDP endpoint, and on-exit cleanup of the spawned daemon.
|
|
25
|
+
*
|
|
26
|
+
* Defaults to local headless Chromium via `agent-browser --session`. Set
|
|
27
|
+
* `NEBULA_BROWSER_CDP_URL` to opt into CDP override (e.g. qutebrowser proxy,
|
|
28
|
+
* Browserbase websocket).
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
interface BrowserDeps {
|
|
32
|
+
/** Override the agent-browser binary path. Default: PATH walker resolves it lazily. */
|
|
33
|
+
binPath?: string
|
|
34
|
+
/** Working directory for the spawned process. Default cwd. */
|
|
35
|
+
cwd?: string
|
|
36
|
+
/** Override timeout (ms). Default 60000. */
|
|
37
|
+
timeoutMs?: number
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
interface RunResult {
|
|
41
|
+
ok: boolean
|
|
42
|
+
data?: { stdout: string; stderr?: string; exit_code: number | null }
|
|
43
|
+
error?: string
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const DEFAULT_TIMEOUT_MS = 60_000
|
|
47
|
+
const SANE_PATH_DIRS = [
|
|
48
|
+
'/opt/homebrew/bin',
|
|
49
|
+
'/opt/homebrew/sbin',
|
|
50
|
+
'/usr/local/bin',
|
|
51
|
+
'/usr/local/sbin',
|
|
52
|
+
'/usr/bin',
|
|
53
|
+
'/usr/sbin',
|
|
54
|
+
'/bin',
|
|
55
|
+
'/sbin',
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
let cachedSessionName: string | null = null
|
|
59
|
+
let cachedSocketDir: string | null = null
|
|
60
|
+
let cleanupRegistered = false
|
|
61
|
+
|
|
62
|
+
function discoverHomebrewNodeDirs(): string[] {
|
|
63
|
+
const homebrewOpt = '/opt/homebrew/opt'
|
|
64
|
+
if (!existsSync(homebrewOpt)) return []
|
|
65
|
+
try {
|
|
66
|
+
return readdirSync(homebrewOpt)
|
|
67
|
+
.filter(name => name.startsWith('node') && name !== 'node')
|
|
68
|
+
.map(name => join(homebrewOpt, name, 'bin'))
|
|
69
|
+
.filter(dir => existsSync(dir))
|
|
70
|
+
} catch {
|
|
71
|
+
return []
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function whichIn(name: string, dirs: string[]): string | null {
|
|
76
|
+
for (const dir of dirs) {
|
|
77
|
+
const candidate = join(dir, name)
|
|
78
|
+
// Use statSync (follows symlinks) so a dangling brew symlink (target
|
|
79
|
+
// moved by `brew upgrade`) returns null instead of pointing at a path
|
|
80
|
+
// that exists but can't be exec'd. Returns null on ENOENT via the
|
|
81
|
+
// throwIfNoEntry: false flag.
|
|
82
|
+
const stat = statSync(candidate, { throwIfNoEntry: false })
|
|
83
|
+
if (stat?.isFile()) return candidate
|
|
84
|
+
}
|
|
85
|
+
return null
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Resolve the `agent-browser` CLI. Canonical path is the workspace's
|
|
90
|
+
* `node_modules/.bin/agent-browser` (npm dep, bun-workspace hoist). PATH and
|
|
91
|
+
* known-dir walks catch operator-installed copies (e.g. Homebrew on the host).
|
|
92
|
+
*
|
|
93
|
+
* NOT cached: resolution is a few syscalls (microseconds), and caching invites
|
|
94
|
+
* the dangling-symlink trap when `brew upgrade` runs in another shell.
|
|
95
|
+
*
|
|
96
|
+
* `cwdOverride` is a test-only hook. Production callers leave it unset.
|
|
97
|
+
*/
|
|
98
|
+
function findAgentBrowser(override?: string, cwdOverride?: string): string | null {
|
|
99
|
+
if (override) return override
|
|
100
|
+
|
|
101
|
+
const cwd = cwdOverride ?? process.cwd()
|
|
102
|
+
|
|
103
|
+
// Search a small ladder of candidate roots: the operator-supplied cwd
|
|
104
|
+
// first, then the daemon's bun cwd, then a probe one level deeper
|
|
105
|
+
// ("./nebula") which catches the sandbox-harness case where the daemon
|
|
106
|
+
// boots from $HOME but the workspace tree (with node_modules) lives in
|
|
107
|
+
// a sibling dir. Without that probe enigma's `findAgentBrowser` would
|
|
108
|
+
// miss `/home/daytona/nebula/node_modules/.bin/agent-browser` and the
|
|
109
|
+
// brain quietly falls back to web.fetch.
|
|
110
|
+
const candidates = Array.from(new Set([cwd, process.cwd(), join(cwd, 'nebula')]))
|
|
111
|
+
for (const root of candidates) {
|
|
112
|
+
const localBin = join(root, 'node_modules', '.bin', 'agent-browser')
|
|
113
|
+
if (statSync(localBin, { throwIfNoEntry: false })?.isFile()) return localBin
|
|
114
|
+
const localPkg = join(root, 'node_modules', 'agent-browser', 'bin', 'agent-browser.js')
|
|
115
|
+
if (statSync(localPkg, { throwIfNoEntry: false })?.isFile()) return localPkg
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Bun global install layout (npm-bootstrapped sandbox containers + any
|
|
119
|
+
// `bun add -g nebula-ai-cli` install). Bun symlinks third-party bins
|
|
120
|
+
// here but does NOT add this dir to $PATH automatically, so the PATH walk
|
|
121
|
+
// below would miss it. Probe explicitly.
|
|
122
|
+
const homeDir = process.env.HOME
|
|
123
|
+
if (homeDir) {
|
|
124
|
+
const bunGlobalBin = join(
|
|
125
|
+
homeDir,
|
|
126
|
+
'.bun',
|
|
127
|
+
'install',
|
|
128
|
+
'global',
|
|
129
|
+
'node_modules',
|
|
130
|
+
'.bin',
|
|
131
|
+
'agent-browser',
|
|
132
|
+
)
|
|
133
|
+
if (statSync(bunGlobalBin, { throwIfNoEntry: false })?.isFile()) return bunGlobalBin
|
|
134
|
+
const bunGlobalPkg = join(
|
|
135
|
+
homeDir,
|
|
136
|
+
'.bun',
|
|
137
|
+
'install',
|
|
138
|
+
'global',
|
|
139
|
+
'node_modules',
|
|
140
|
+
'agent-browser',
|
|
141
|
+
'bin',
|
|
142
|
+
'agent-browser.js',
|
|
143
|
+
)
|
|
144
|
+
if (statSync(bunGlobalPkg, { throwIfNoEntry: false })?.isFile()) return bunGlobalPkg
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const pathEnv = process.env.PATH ?? ''
|
|
148
|
+
const pathDirs = pathEnv.split(delimiter).filter(Boolean)
|
|
149
|
+
const inPath = whichIn('agent-browser', pathDirs)
|
|
150
|
+
if (inPath) return inPath
|
|
151
|
+
|
|
152
|
+
const extraDirs = [...discoverHomebrewNodeDirs(), ...SANE_PATH_DIRS].filter(d => existsSync(d))
|
|
153
|
+
const inExtra = whichIn('agent-browser', extraDirs)
|
|
154
|
+
if (inExtra) return inExtra
|
|
155
|
+
|
|
156
|
+
return null
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* True when `agent-browser` resolves on this machine. Gates browser.* tool
|
|
161
|
+
* registration so dev installs that skip `bun install` don't crash on first
|
|
162
|
+
* browser.* call.
|
|
163
|
+
*/
|
|
164
|
+
/**
|
|
165
|
+
* Detect whether the agent-browser binary is reachable from disk. Accepts
|
|
166
|
+
* an optional `cwdOverride` because the daemon's `process.cwd()` is not
|
|
167
|
+
* always the workspace root — in the enigma sandbox the harness boots
|
|
168
|
+
* from `/home/daytona`, but `node_modules/.bin/agent-browser` lives one
|
|
169
|
+
* level deeper at `/home/daytona/nebula/node_modules/.bin/`. The plugin
|
|
170
|
+
* loader passes `ctx.workspaceRoot` here so registration uses the right
|
|
171
|
+
* tree on both surfaces.
|
|
172
|
+
*/
|
|
173
|
+
export function isBrowserAvailable(cwdOverride?: string): boolean {
|
|
174
|
+
return findAgentBrowser(undefined, cwdOverride) !== null
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Same as `isBrowserAvailable` but returns the resolved path (or null).
|
|
179
|
+
* Plugin loaders use this once at registration time and pass the result
|
|
180
|
+
* as `binPath` to each factory so per-call spawns don't re-search PATH —
|
|
181
|
+
* a re-search would fail again when daemon cwd ≠ workspace root.
|
|
182
|
+
*/
|
|
183
|
+
export function findAgentBrowserOrNull(cwdOverride?: string): string | null {
|
|
184
|
+
return findAgentBrowser(undefined, cwdOverride)
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function socketSafeTmpdir(): string {
|
|
188
|
+
if (process.platform === 'darwin') return '/tmp'
|
|
189
|
+
return tmpdir()
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function randomHex(bytes: number): string {
|
|
193
|
+
const buf = new Uint8Array(bytes)
|
|
194
|
+
crypto.getRandomValues(buf)
|
|
195
|
+
return Array.from(buf, b => b.toString(16).padStart(2, '0')).join('')
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function getSessionName(): string {
|
|
199
|
+
if (cachedSessionName) return cachedSessionName
|
|
200
|
+
cachedSessionName = `a_${randomHex(5)}`
|
|
201
|
+
return cachedSessionName
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function getSocketDir(): string {
|
|
205
|
+
if (cachedSocketDir) return cachedSocketDir
|
|
206
|
+
const dir = join(socketSafeTmpdir(), `agent-browser-${getSessionName()}`)
|
|
207
|
+
mkdirSync(dir, { recursive: true, mode: 0o700 })
|
|
208
|
+
cachedSocketDir = dir
|
|
209
|
+
registerCleanup()
|
|
210
|
+
return dir
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function registerCleanup(): void {
|
|
214
|
+
if (cleanupRegistered) return
|
|
215
|
+
cleanupRegistered = true
|
|
216
|
+
const cleanup = () => {
|
|
217
|
+
try {
|
|
218
|
+
const bin = findAgentBrowser()
|
|
219
|
+
const sess = cachedSessionName
|
|
220
|
+
if (bin && sess && !process.env.NEBULA_BROWSER_CDP_URL) {
|
|
221
|
+
try {
|
|
222
|
+
// spawnSync so the daemon actually receives `close` before we exit.
|
|
223
|
+
// Async + detached drops the message: the parent exits before the
|
|
224
|
+
// child IPC connects to the daemon socket. 5s cap prevents hangs
|
|
225
|
+
// on a frozen daemon.
|
|
226
|
+
spawnSync(bin, ['--session', sess, 'close'], {
|
|
227
|
+
stdio: 'ignore',
|
|
228
|
+
env: cachedSocketDir
|
|
229
|
+
? { ...process.env, AGENT_BROWSER_SOCKET_DIR: cachedSocketDir }
|
|
230
|
+
: process.env,
|
|
231
|
+
timeout: 5000,
|
|
232
|
+
})
|
|
233
|
+
} catch {}
|
|
234
|
+
}
|
|
235
|
+
if (cachedSocketDir) {
|
|
236
|
+
rmSync(cachedSocketDir, { recursive: true, force: true })
|
|
237
|
+
}
|
|
238
|
+
} catch {}
|
|
239
|
+
}
|
|
240
|
+
process.on('exit', cleanup)
|
|
241
|
+
process.on('SIGINT', () => {
|
|
242
|
+
cleanup()
|
|
243
|
+
process.exit(130)
|
|
244
|
+
})
|
|
245
|
+
process.on('SIGTERM', () => {
|
|
246
|
+
cleanup()
|
|
247
|
+
process.exit(143)
|
|
248
|
+
})
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function buildBrowserEnv(socketDir: string): NodeJS.ProcessEnv {
|
|
252
|
+
const { env } = redactEnv(process.env as Record<string, string>)
|
|
253
|
+
const existing = (env.PATH ?? '').split(delimiter).filter(Boolean)
|
|
254
|
+
const candidates = [...discoverHomebrewNodeDirs(), ...SANE_PATH_DIRS]
|
|
255
|
+
for (const dir of candidates) {
|
|
256
|
+
if (existsSync(dir) && !existing.includes(dir)) existing.unshift(dir)
|
|
257
|
+
}
|
|
258
|
+
return {
|
|
259
|
+
...env,
|
|
260
|
+
PATH: existing.join(delimiter),
|
|
261
|
+
AGENT_BROWSER_SOCKET_DIR: socketDir,
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
function readFileSafe(path: string): string {
|
|
266
|
+
try {
|
|
267
|
+
return readFileSync(path, 'utf8')
|
|
268
|
+
} catch {
|
|
269
|
+
return ''
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function rmSafe(path: string): void {
|
|
274
|
+
try {
|
|
275
|
+
rmSync(path, { force: true })
|
|
276
|
+
} catch {}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
interface RunOpts {
|
|
280
|
+
/**
|
|
281
|
+
* After the primary command completes, run `agent-browser wait <ms>` so
|
|
282
|
+
* page transitions (navigation, JS-handled form submits) settle before
|
|
283
|
+
* the next snapshot. Set to 0 to skip. Default 0 (the caller chooses).
|
|
284
|
+
*/
|
|
285
|
+
settleAfterMs?: number
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
async function runAgentBrowser(
|
|
289
|
+
command: string,
|
|
290
|
+
extraArgs: string[],
|
|
291
|
+
deps: BrowserDeps,
|
|
292
|
+
opts: RunOpts = {},
|
|
293
|
+
): Promise<RunResult> {
|
|
294
|
+
const result = await runAgentBrowserOnce(command, extraArgs, deps)
|
|
295
|
+
if (!result.ok || !opts.settleAfterMs) return result
|
|
296
|
+
// Best-effort settle wait — the primary call's result is what we report;
|
|
297
|
+
// a wait failure (e.g. timeout) doesn't invalidate the action that just
|
|
298
|
+
// succeeded. We DO surface it via stderr though.
|
|
299
|
+
const settleMs = Math.min(opts.settleAfterMs, 10_000)
|
|
300
|
+
await runAgentBrowserOnce('wait', [String(settleMs)], deps)
|
|
301
|
+
return result
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
async function runAgentBrowserOnce(
|
|
305
|
+
command: string,
|
|
306
|
+
extraArgs: string[],
|
|
307
|
+
deps: BrowserDeps,
|
|
308
|
+
): Promise<RunResult> {
|
|
309
|
+
const bin = findAgentBrowser(deps.binPath)
|
|
310
|
+
if (!bin) {
|
|
311
|
+
return {
|
|
312
|
+
ok: false,
|
|
313
|
+
error:
|
|
314
|
+
'agent-browser CLI not found in node_modules/.bin or PATH. Re-run `nebula upgrade` to repair, or `bun install` in the workspace root if running from source.',
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// Path may contain a space if a user-supplied override was passed; preserve
|
|
318
|
+
// it as a single argv0 since spawn() doesn't shell-tokenize.
|
|
319
|
+
const cmdParts = [bin]
|
|
320
|
+
|
|
321
|
+
const cdpOverride = process.env.NEBULA_BROWSER_CDP_URL
|
|
322
|
+
const backendArgs = cdpOverride ? ['--cdp', cdpOverride] : ['--session', getSessionName()]
|
|
323
|
+
|
|
324
|
+
const socketDir = getSocketDir()
|
|
325
|
+
const sanitizedCmd = command.replace(/[^a-z0-9_-]/gi, '_')
|
|
326
|
+
const stdoutPath = join(socketDir, `_stdout_${sanitizedCmd}_${Date.now()}`)
|
|
327
|
+
const stderrPath = join(socketDir, `_stderr_${sanitizedCmd}_${Date.now()}`)
|
|
328
|
+
|
|
329
|
+
const fullArgs = [...cmdParts.slice(1), ...backendArgs, command, ...extraArgs]
|
|
330
|
+
const env = buildBrowserEnv(socketDir)
|
|
331
|
+
|
|
332
|
+
let stdoutFd = -1
|
|
333
|
+
let stderrFd = -1
|
|
334
|
+
try {
|
|
335
|
+
stdoutFd = openSync(stdoutPath, 'w', 0o600)
|
|
336
|
+
stderrFd = openSync(stderrPath, 'w', 0o600)
|
|
337
|
+
} catch (err) {
|
|
338
|
+
return { ok: false, error: `failed to open browser temp files: ${(err as Error).message}` }
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const timeoutMs = deps.timeoutMs ?? DEFAULT_TIMEOUT_MS
|
|
342
|
+
return await new Promise<RunResult>(resolve => {
|
|
343
|
+
let proc: ChildProcess
|
|
344
|
+
try {
|
|
345
|
+
proc = spawn(cmdParts[0]!, fullArgs, {
|
|
346
|
+
cwd: deps.cwd ?? process.cwd(),
|
|
347
|
+
env,
|
|
348
|
+
stdio: ['ignore', stdoutFd, stderrFd],
|
|
349
|
+
})
|
|
350
|
+
} catch (err) {
|
|
351
|
+
try {
|
|
352
|
+
closeSync(stdoutFd)
|
|
353
|
+
} catch {}
|
|
354
|
+
try {
|
|
355
|
+
closeSync(stderrFd)
|
|
356
|
+
} catch {}
|
|
357
|
+
rmSafe(stdoutPath)
|
|
358
|
+
rmSafe(stderrPath)
|
|
359
|
+
const msg = (err as Error).message
|
|
360
|
+
const code = (err as NodeJS.ErrnoException).code
|
|
361
|
+
if (code === 'ENOENT') {
|
|
362
|
+
resolve({
|
|
363
|
+
ok: false,
|
|
364
|
+
error:
|
|
365
|
+
'agent-browser binary not executable at resolved path. Re-run `nebula upgrade` (sandbox) or `bun install` (host) to repair the workspace install.',
|
|
366
|
+
})
|
|
367
|
+
} else {
|
|
368
|
+
resolve({ ok: false, error: msg })
|
|
369
|
+
}
|
|
370
|
+
return
|
|
371
|
+
}
|
|
372
|
+
try {
|
|
373
|
+
closeSync(stdoutFd)
|
|
374
|
+
} catch {}
|
|
375
|
+
try {
|
|
376
|
+
closeSync(stderrFd)
|
|
377
|
+
} catch {}
|
|
378
|
+
|
|
379
|
+
let timedOut = false
|
|
380
|
+
const timer = setTimeout(() => {
|
|
381
|
+
timedOut = true
|
|
382
|
+
try {
|
|
383
|
+
proc.kill('SIGKILL')
|
|
384
|
+
} catch {}
|
|
385
|
+
}, timeoutMs)
|
|
386
|
+
|
|
387
|
+
proc.on('error', err => {
|
|
388
|
+
clearTimeout(timer)
|
|
389
|
+
rmSafe(stdoutPath)
|
|
390
|
+
rmSafe(stderrPath)
|
|
391
|
+
const code = (err as NodeJS.ErrnoException).code
|
|
392
|
+
if (code === 'ENOENT') {
|
|
393
|
+
resolve({
|
|
394
|
+
ok: false,
|
|
395
|
+
error:
|
|
396
|
+
'agent-browser binary not executable at resolved path. Re-run `nebula upgrade` (sandbox) or `bun install` (host) to repair the workspace install.',
|
|
397
|
+
})
|
|
398
|
+
return
|
|
399
|
+
}
|
|
400
|
+
resolve({ ok: false, error: err.message })
|
|
401
|
+
})
|
|
402
|
+
|
|
403
|
+
proc.on('close', code => {
|
|
404
|
+
clearTimeout(timer)
|
|
405
|
+
const stdout = readFileSafe(stdoutPath).slice(-100_000)
|
|
406
|
+
const stderr = readFileSafe(stderrPath).slice(-50_000)
|
|
407
|
+
rmSafe(stdoutPath)
|
|
408
|
+
rmSafe(stderrPath)
|
|
409
|
+
if (timedOut) {
|
|
410
|
+
resolve({
|
|
411
|
+
ok: false,
|
|
412
|
+
error: `agent-browser ${command} timed out after ${timeoutMs}ms`,
|
|
413
|
+
data: { stdout, stderr, exit_code: code },
|
|
414
|
+
})
|
|
415
|
+
return
|
|
416
|
+
}
|
|
417
|
+
resolve({
|
|
418
|
+
ok: (code ?? 1) === 0,
|
|
419
|
+
data: { stdout, stderr, exit_code: code },
|
|
420
|
+
})
|
|
421
|
+
})
|
|
422
|
+
})
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
const NavigateSchema = z.object({
|
|
426
|
+
url: z.string().min(1).describe('Absolute URL to navigate to (e.g. https://...).'),
|
|
427
|
+
})
|
|
428
|
+
|
|
429
|
+
export function makeBrowserNavigate(deps: BrowserDeps): ToolDef<z.infer<typeof NavigateSchema>> {
|
|
430
|
+
return {
|
|
431
|
+
name: 'browser.navigate',
|
|
432
|
+
description:
|
|
433
|
+
'Open a URL in the agent-browser tab. Returns the new page metadata. Auto-waits 1500ms after navigation so the next browser.snapshot reflects the new page.',
|
|
434
|
+
shouldDefer: true,
|
|
435
|
+
searchHint: 'browser navigate open url page',
|
|
436
|
+
schema: NavigateSchema,
|
|
437
|
+
handler: async args => runAgentBrowser('open', [args.url], deps, { settleAfterMs: 1500 }),
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
const SnapshotSchema = z.object({
|
|
442
|
+
with_image: coerceBool
|
|
443
|
+
.optional()
|
|
444
|
+
.describe('When true, also captures a screenshot saved alongside the accessibility tree.'),
|
|
445
|
+
cap: coerceBool
|
|
446
|
+
.optional()
|
|
447
|
+
.describe('Cap the snapshot output for compactness. Defaults to true (-c flag).'),
|
|
448
|
+
})
|
|
449
|
+
|
|
450
|
+
export function makeBrowserSnapshot(deps: BrowserDeps): ToolDef<z.infer<typeof SnapshotSchema>> {
|
|
451
|
+
return {
|
|
452
|
+
name: 'browser.snapshot',
|
|
453
|
+
description:
|
|
454
|
+
'Capture the page accessibility tree with element refs (@e1, @e2, ...). Use refs returned here for click/type/scroll actions. Set with_image=true to also write a screenshot.',
|
|
455
|
+
shouldDefer: true,
|
|
456
|
+
searchHint: 'browser snapshot accessibility tree refs page state',
|
|
457
|
+
schema: SnapshotSchema,
|
|
458
|
+
handler: async args => {
|
|
459
|
+
const flags: string[] = []
|
|
460
|
+
if (args.with_image !== false) flags.push('-i')
|
|
461
|
+
if (args.cap !== false) flags.push('-c')
|
|
462
|
+
return runAgentBrowser('snapshot', flags, deps)
|
|
463
|
+
},
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
const ClickSchema = z.object({
|
|
468
|
+
selector: z
|
|
469
|
+
.string()
|
|
470
|
+
.min(1)
|
|
471
|
+
.describe(
|
|
472
|
+
"Snapshot ref (e.g. '@e5') from the most recent browser.snapshot — preferred — OR a plain CSS selector ('button.primary', '#submit'). NOT a Playwright-style pseudo-class: ':has-text()', ':has()', ':contains()' are NOT supported and will fail.",
|
|
473
|
+
),
|
|
474
|
+
})
|
|
475
|
+
|
|
476
|
+
export function makeBrowserClick(deps: BrowserDeps): ToolDef<z.infer<typeof ClickSchema>> {
|
|
477
|
+
return {
|
|
478
|
+
name: 'browser.click',
|
|
479
|
+
description:
|
|
480
|
+
"Click an element. Arg name is `selector` (snapshot @ref like '@e5' or plain CSS like 'button.primary'). Auto-waits 1200ms post-click so any triggered navigation/state change settles before the next snapshot. To click a link by visible text, take a fresh `browser.snapshot` first and pass the @eN ref of the matching node — Playwright pseudo-classes (:has-text, :contains) are not supported.",
|
|
481
|
+
shouldDefer: true,
|
|
482
|
+
searchHint: 'browser click element selector ref',
|
|
483
|
+
schema: ClickSchema,
|
|
484
|
+
handler: async args => runAgentBrowser('click', [args.selector], deps, { settleAfterMs: 1200 }),
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
const TypeSchema = z.object({
|
|
489
|
+
selector: z.string().min(1),
|
|
490
|
+
text: z.string().describe('Text to type into the element.'),
|
|
491
|
+
})
|
|
492
|
+
|
|
493
|
+
export function makeBrowserType(deps: BrowserDeps): ToolDef<z.infer<typeof TypeSchema>> {
|
|
494
|
+
return {
|
|
495
|
+
name: 'browser.type',
|
|
496
|
+
description:
|
|
497
|
+
'Type text into an element by selector or snapshot ref. Auto-waits 600ms post-type so debounced input handlers settle before the next snapshot.',
|
|
498
|
+
shouldDefer: true,
|
|
499
|
+
searchHint: 'browser type input text fill',
|
|
500
|
+
schema: TypeSchema,
|
|
501
|
+
handler: async args =>
|
|
502
|
+
runAgentBrowser('type', [args.selector, args.text], deps, { settleAfterMs: 600 }),
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
const ScrollSchema = z.object({
|
|
507
|
+
direction: z
|
|
508
|
+
.enum(['up', 'down', 'left', 'right'])
|
|
509
|
+
.optional()
|
|
510
|
+
.describe(
|
|
511
|
+
"Scroll direction. Defaults to 'down' when omitted. Pass 'up'/'left'/'right' when needed.",
|
|
512
|
+
),
|
|
513
|
+
pixels: coerceInt
|
|
514
|
+
.refine(n => n > 0, 'pixels must be > 0')
|
|
515
|
+
.optional()
|
|
516
|
+
.describe('Optional scroll distance in pixels. Default 800.'),
|
|
517
|
+
// `amount` is a tolerated alias for `pixels` — observed brain calls
|
|
518
|
+
// (qwen3.6-plus) routinely emit `amount=N` instead of `pixels=N` because
|
|
519
|
+
// the operator's natural-language prompt says "scroll N pixels" and the
|
|
520
|
+
// brain projects that onto a generic `amount` slot. Without this alias
|
|
521
|
+
// the schema silently strips the unknown key and the tool defaults to
|
|
522
|
+
// 800 — the call succeeds but with the wrong distance, which reads as
|
|
523
|
+
// the tool ignoring the operator's intent. Accept both spellings; merge
|
|
524
|
+
// in the handler.
|
|
525
|
+
amount: coerceInt
|
|
526
|
+
.refine(n => n > 0, 'amount must be > 0')
|
|
527
|
+
.optional()
|
|
528
|
+
.describe('Alias for `pixels`. Prefer `pixels`; `amount` accepted for compatibility.'),
|
|
529
|
+
})
|
|
530
|
+
|
|
531
|
+
export function makeBrowserScroll(deps: BrowserDeps): ToolDef<z.infer<typeof ScrollSchema>> {
|
|
532
|
+
return {
|
|
533
|
+
name: 'browser.scroll',
|
|
534
|
+
description:
|
|
535
|
+
"Scroll the page. Both args are optional: `direction` defaults to 'down' (override with 'up'/'left'/'right'); `pixels` defaults to 800. For 'scroll down N pixels' pass pixels=N. The schema also accepts `amount` as an alias for `pixels` — use either; pixels is preferred.",
|
|
536
|
+
shouldDefer: true,
|
|
537
|
+
searchHint: 'browser scroll page up down',
|
|
538
|
+
schema: ScrollSchema,
|
|
539
|
+
handler: async args => {
|
|
540
|
+
const args2: string[] = [args.direction ?? 'down']
|
|
541
|
+
const px = args.pixels ?? args.amount
|
|
542
|
+
if (px) args2.push(String(px))
|
|
543
|
+
return runAgentBrowser('scroll', args2, deps)
|
|
544
|
+
},
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
const BackSchema = z.object({})
|
|
549
|
+
|
|
550
|
+
export function makeBrowserBack(deps: BrowserDeps): ToolDef<z.infer<typeof BackSchema>> {
|
|
551
|
+
return {
|
|
552
|
+
name: 'browser.back',
|
|
553
|
+
description:
|
|
554
|
+
'Navigate the browser history back one step. Auto-waits 1500ms for the previous page to render before the next snapshot.',
|
|
555
|
+
shouldDefer: true,
|
|
556
|
+
searchHint: 'browser back history previous page',
|
|
557
|
+
schema: BackSchema,
|
|
558
|
+
handler: async () => runAgentBrowser('back', [], deps, { settleAfterMs: 1500 }),
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
const PressSchema = z.object({
|
|
563
|
+
key: z.string().min(1).describe("Key to press, e.g. 'Enter', 'Tab', 'Escape', 'Control+a'."),
|
|
564
|
+
})
|
|
565
|
+
|
|
566
|
+
export function makeBrowserPress(deps: BrowserDeps): ToolDef<z.infer<typeof PressSchema>> {
|
|
567
|
+
return {
|
|
568
|
+
name: 'browser.press',
|
|
569
|
+
description:
|
|
570
|
+
'Send a single key press (Enter, Tab, Escape, Ctrl+A, etc.). Auto-waits 1500ms post-press so a form submit triggered by Enter has time to navigate before the next snapshot.',
|
|
571
|
+
shouldDefer: true,
|
|
572
|
+
searchHint: 'browser press key keyboard',
|
|
573
|
+
schema: PressSchema,
|
|
574
|
+
handler: async args => runAgentBrowser('press', [args.key], deps, { settleAfterMs: 1500 }),
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
const GetImagesSchema = z.object({
|
|
579
|
+
selector: z.string().optional().describe('Optional CSS selector to scope image extraction.'),
|
|
580
|
+
limit: coerceInt
|
|
581
|
+
.refine(n => n > 0 && n <= 200, 'limit must be 1..200')
|
|
582
|
+
.optional()
|
|
583
|
+
.describe('Cap on returned URLs. Default 50.'),
|
|
584
|
+
})
|
|
585
|
+
|
|
586
|
+
export function makeBrowserGetImages(deps: BrowserDeps): ToolDef<z.infer<typeof GetImagesSchema>> {
|
|
587
|
+
return {
|
|
588
|
+
name: 'browser.get_images',
|
|
589
|
+
description:
|
|
590
|
+
'Extract image URLs from the current page. Optionally scoped to a CSS selector. Returns up to `limit` (default 50) src URLs as a JSON array string.',
|
|
591
|
+
shouldDefer: true,
|
|
592
|
+
searchHint: 'browser images src extract list',
|
|
593
|
+
schema: GetImagesSchema,
|
|
594
|
+
handler: async args => {
|
|
595
|
+
const sel = (args.selector ?? 'img').replace(/'/g, "\\'")
|
|
596
|
+
const limit = args.limit ?? 50
|
|
597
|
+
// agent-browser `get attr` only returns the first match; eval gets all.
|
|
598
|
+
const js = `JSON.stringify(Array.from(document.querySelectorAll('${sel}')).slice(0, ${limit}).map(i => i.src || i.getAttribute('src') || '').filter(Boolean))`
|
|
599
|
+
return runAgentBrowser('eval', [js], deps)
|
|
600
|
+
},
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
const VisionSchema = z.object({
|
|
605
|
+
prompt: z
|
|
606
|
+
.string()
|
|
607
|
+
.min(1)
|
|
608
|
+
.describe('What you want the vision model to answer/describe about the screenshot.'),
|
|
609
|
+
})
|
|
610
|
+
|
|
611
|
+
export function makeBrowserVision(
|
|
612
|
+
deps: BrowserDeps & { visionInfer: VisionInferFn | null },
|
|
613
|
+
): ToolDef<z.infer<typeof VisionSchema>> {
|
|
614
|
+
return {
|
|
615
|
+
name: 'browser.vision',
|
|
616
|
+
description:
|
|
617
|
+
"Capture the current page as a screenshot and send it to the configured vision model with a prompt. Returns the model's reply. Routes to the configured vision provider on Mantle Compute (qwen3-vl-30b on mainnet by default).",
|
|
618
|
+
shouldDefer: true,
|
|
619
|
+
searchHint: 'browser vision screenshot describe ocr image',
|
|
620
|
+
schema: VisionSchema,
|
|
621
|
+
handler: async args => {
|
|
622
|
+
if (!deps.visionInfer) {
|
|
623
|
+
return {
|
|
624
|
+
ok: false,
|
|
625
|
+
error:
|
|
626
|
+
'vision provider not configured. Set `vision.provider` in ~/.nebula/config.ts to a Mantle Compute multimodal provider.',
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
const path = join(tmpdir(), `nebula-vision-${Date.now()}-${process.pid}.png`)
|
|
630
|
+
const shot = await runAgentBrowser('screenshot', [path], deps)
|
|
631
|
+
if (!shot.ok) return shot
|
|
632
|
+
let bytes: Uint8Array
|
|
633
|
+
try {
|
|
634
|
+
bytes = new Uint8Array(readFileSync(path))
|
|
635
|
+
} catch (e) {
|
|
636
|
+
return { ok: false, error: `screenshot read failed: ${(e as Error).message}` }
|
|
637
|
+
} finally {
|
|
638
|
+
rmSafe(path)
|
|
639
|
+
}
|
|
640
|
+
const mediaType = sniffMimeFromBytes(bytes, 'png') ?? 'image/png'
|
|
641
|
+
try {
|
|
642
|
+
const result = await deps.visionInfer({
|
|
643
|
+
images: [{ bytes, mediaType }],
|
|
644
|
+
prompt: args.prompt,
|
|
645
|
+
maxOutputTokens: 1024,
|
|
646
|
+
})
|
|
647
|
+
return {
|
|
648
|
+
ok: true,
|
|
649
|
+
data: {
|
|
650
|
+
content: result.content,
|
|
651
|
+
model: result.model ?? null,
|
|
652
|
+
usage: result.usage,
|
|
653
|
+
finishReason: result.finishReason,
|
|
654
|
+
},
|
|
655
|
+
}
|
|
656
|
+
} catch (e) {
|
|
657
|
+
return { ok: false, error: `vision call failed: ${(e as Error).message.slice(0, 240)}` }
|
|
658
|
+
}
|
|
659
|
+
},
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
const ConsoleSchema = z.object({
|
|
664
|
+
clear: coerceBool.optional().describe('When true, clears console after reading.'),
|
|
665
|
+
})
|
|
666
|
+
|
|
667
|
+
export function makeBrowserConsole(deps: BrowserDeps): ToolDef<z.infer<typeof ConsoleSchema>> {
|
|
668
|
+
return {
|
|
669
|
+
name: 'browser.console',
|
|
670
|
+
description: 'Read accumulated console output (logs, warnings, errors) from the page.',
|
|
671
|
+
shouldDefer: true,
|
|
672
|
+
searchHint: 'browser console logs warnings errors',
|
|
673
|
+
schema: ConsoleSchema,
|
|
674
|
+
handler: async args => {
|
|
675
|
+
const flags: string[] = []
|
|
676
|
+
if (args.clear) flags.push('--clear')
|
|
677
|
+
return runAgentBrowser('console', flags, deps)
|
|
678
|
+
},
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
export const ALL_BROWSER_TOOL_FACTORIES = [
|
|
683
|
+
makeBrowserNavigate,
|
|
684
|
+
makeBrowserSnapshot,
|
|
685
|
+
makeBrowserClick,
|
|
686
|
+
makeBrowserType,
|
|
687
|
+
makeBrowserScroll,
|
|
688
|
+
makeBrowserBack,
|
|
689
|
+
makeBrowserPress,
|
|
690
|
+
makeBrowserGetImages,
|
|
691
|
+
makeBrowserConsole,
|
|
692
|
+
]
|
|
693
|
+
|
|
694
|
+
// Test-only hooks for the regression suite. Resets module-level cache so a
|
|
695
|
+
// test can stub PATH or override the platform without leaking state.
|
|
696
|
+
export const __test = {
|
|
697
|
+
reset(): void {
|
|
698
|
+
cachedSessionName = null
|
|
699
|
+
if (cachedSocketDir) {
|
|
700
|
+
try {
|
|
701
|
+
rmSync(cachedSocketDir, { recursive: true, force: true })
|
|
702
|
+
} catch {}
|
|
703
|
+
}
|
|
704
|
+
cachedSocketDir = null
|
|
705
|
+
cleanupRegistered = false
|
|
706
|
+
},
|
|
707
|
+
findAgentBrowser,
|
|
708
|
+
isBrowserAvailable,
|
|
709
|
+
socketSafeTmpdir,
|
|
710
|
+
getSessionName,
|
|
711
|
+
getSocketDir,
|
|
712
|
+
buildBrowserEnv,
|
|
713
|
+
}
|