@porcupine/kuskus 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +23 -1
- package/.env.example +13 -1
- package/README.md +431 -0
- package/SKILL.md +48 -0
- package/bin/cli.js +47 -19
- package/package.json +4 -2
- package/src/agent/executor.js +36 -14
- package/src/agent/index.js +4 -2
- package/src/agent/planner.js +29 -39
- package/src/agent/providers.js +124 -0
- package/src/agent/tools.js +17 -0
- package/src/cdp/client.js +94 -49
- package/src/cdp/domains/page.js +6 -6
- package/src/cdp/domains/runtime.js +19 -1
- package/src/cdp/session.js +89 -57
- package/src/mcp/handlers.js +4 -2
- package/src/utils/browser.js +159 -14
- package/src/utils/chromium.js +239 -0
- package/src/utils/dom-to-text.js +11 -46
- package/tests/agent/providers.test.js +33 -0
- package/src/utils/install.js +0 -138
|
@@ -3,7 +3,29 @@
|
|
|
3
3
|
"allow": [
|
|
4
4
|
"Bash(npm install 2>&1)",
|
|
5
5
|
"Bash(node -e \"import\\('@modelcontextprotocol/sdk/server/stdio.js'\\).then\\(m => console.log\\(Object.keys\\(m\\)\\)\\).catch\\(e => console.error\\(e.message\\)\\)\" && node -e \"import\\('zod'\\).then\\(m => console.log\\('zod ok'\\)\\).catch\\(e => console.error\\(e.message\\)\\)\")",
|
|
6
|
-
"Bash(npx vitest run --reporter=verbose 2>&1)"
|
|
6
|
+
"Bash(npx vitest run --reporter=verbose 2>&1)",
|
|
7
|
+
"WebFetch(domain:opencode.ai)",
|
|
8
|
+
"Read(//Users/anak10thn/.config/opencode/**)",
|
|
9
|
+
"Read(//Users/anak10thn/**)",
|
|
10
|
+
"Bash(node bin/cli.js install 2>&1)",
|
|
11
|
+
"Bash(~/.local/bin/lightpanda --version 2>&1 || file ~/.local/bin/lightpanda)",
|
|
12
|
+
"Bash(echo \"ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY\" | head -c 50)",
|
|
13
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnpx vitest run --reporter=verbose 2>&1)",
|
|
14
|
+
"Bash(node -e \"\nimport\\('dotenv/config'\\);\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: console.log }\\);\n\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n\n const sc = await sm.getActiveSession\\(\\);\n const page = createPageDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n\n console.log\\('Navigating to example.com...'\\);\n await page.navigate\\('https://example.com'\\);\n\n const title = await rt.evaluate\\('document.title'\\);\n const url = await page.getURL\\(\\);\n console.log\\('URL :', url\\);\n console.log\\('Title :', title\\);\n\n const screenshot = await page.screenshot\\(\\);\n console.log\\('Screenshot size:', screenshot.length, 'chars \\(base64\\)'\\);\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('ERROR:', e.message\\); process.exit\\(1\\); }\\);\n\" 2>&1)",
|
|
15
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\necho \"Masukkan ANTHROPIC_API_KEY untuk test \\(atau set di .env\\):\"\nls .env 2>/dev/null && source .env 2>/dev/null\nprintenv ANTHROPIC_API_KEY | cut -c1-20 || echo \"\\(tidak ada\\)\")",
|
|
16
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: console.log }\\);\n\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createInputDomain } = await import\\('./src/cdp/domains/input.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { htmlToReadableText } = await import\\('./src/utils/dom-to-text.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n const sc = await sm.getActiveSession\\(\\);\n\n const page = createPageDomain\\(sc\\);\n const dom = createDOMDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n const inp = createInputDomain\\(sc\\);\n\n // 1. Navigate\n console.log\\('\\\\n[1] Navigate to https://example.com'\\);\n await page.navigate\\('https://example.com'\\);\n console.log\\(' URL:', await page.getURL\\(\\)\\);\n console.log\\(' Title:', await page.getTitle\\(\\)\\);\n\n // 2. DOM query\n console.log\\('\\\\n[2] querySelector h1'\\);\n const h1Id = await dom.querySelector\\('h1'\\);\n const h1HTML = await dom.getOuterHTML\\(h1Id\\);\n console.log\\(' h1:', h1HTML\\);\n\n // 3. Page content\n console.log\\('\\\\n[3] Page content \\(truncated\\)'\\);\n const html = await rt.evaluate\\('document.documentElement.outerHTML'\\);\n const text = htmlToReadableText\\(html, { maxLength: 300 }\\);\n console.log\\(' ', text.split\\('\\\\n'\\).slice\\(0,5\\).join\\('\\\\n '\\)\\);\n\n // 4. Navigate to google and search\n console.log\\('\\\\n[4] Navigate to google.com'\\);\n await page.navigate\\('https://google.com'\\);\n console.log\\(' URL:', await page.getURL\\(\\)\\);\n\n // 5. Screenshot \\(may return null on Lightpanda\\)\n console.log\\('\\\\n[5] Screenshot'\\);\n const ss = await page.screenshot\\(\\);\n console.log\\(' Result:', ss ? ss.length + ' chars base64' : 'null \\(unsupported by Lightpanda — OK\\)'\\);\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n console.log\\('\\\\nAll checks passed.'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('FAIL:', e.message, e.stack\\); process.exit\\(1\\); }\\);\n\" 2>&1)",
|
|
17
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: console.log }\\);\n\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createInputDomain } = await import\\('./src/cdp/domains/input.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { htmlToReadableText } = await import\\('./src/utils/dom-to-text.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n const sc = await sm.getActiveSession\\(\\);\n\n const page = createPageDomain\\(sc\\);\n const dom = createDOMDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n const inp = createInputDomain\\(sc\\);\n\n // 1. Navigate\n console.log\\('\\\\n[1] Navigate https://example.com'\\);\n await page.navigate\\('https://example.com'\\);\n console.log\\(' URL:', await page.getURL\\(\\)\\);\n console.log\\(' Title:', await page.getTitle\\(\\)\\);\n\n // 2. DOM query\n console.log\\('\\\\n[2] querySelector h1'\\);\n const h1Id = await dom.querySelector\\('h1'\\);\n const h1HTML = await dom.getOuterHTML\\(h1Id\\);\n console.log\\(' h1:', h1HTML.trim\\(\\)\\);\n\n // 3. Page content\n console.log\\('\\\\n[3] Page content'\\);\n const html = await rt.evaluate\\('document.documentElement.outerHTML'\\);\n const text = htmlToReadableText\\(html, { maxLength: 400 }\\);\n console.log\\(' ', text.replace\\(/\\\\n/g, '\\\\n '\\)\\);\n\n // 4. Screenshot\n console.log\\('\\\\n[4] Screenshot'\\);\n const ss = await page.screenshot\\(\\);\n console.log\\(' Result:', ss ? ss.length + ' chars base64' : 'null \\(unsupported — OK\\)'\\);\n\n // 5. list tabs\n console.log\\('\\\\n[5] List tabs'\\);\n const tabs = await sm.listTargets\\(\\);\n console.log\\(' Tabs:', JSON.stringify\\(tabs\\)\\);\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n console.log\\('\\\\nAll checks passed.'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('FAIL:', e.message\\); process.exit\\(1\\); }\\);\n\" 2>&1)",
|
|
18
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: console.log }\\);\n\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createInputDomain } = await import\\('./src/cdp/domains/input.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { htmlToReadableText } = await import\\('./src/utils/dom-to-text.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n const sc = await sm.getActiveSession\\(\\);\n\n const page = createPageDomain\\(sc\\);\n const dom = createDOMDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n const inp = createInputDomain\\(sc\\);\n\n console.log\\('\\\\n[1] Navigate https://example.com'\\);\n await page.navigate\\('https://example.com'\\);\n console.log\\(' URL:', await page.getURL\\(\\)\\);\n console.log\\(' Title:', await page.getTitle\\(\\)\\);\n\n console.log\\('\\\\n[2] querySelector h1'\\);\n const h1Id = await dom.querySelector\\('h1'\\);\n console.log\\(' h1:', \\(await dom.getOuterHTML\\(h1Id\\)\\).trim\\(\\)\\);\n\n console.log\\('\\\\n[3] Page content'\\);\n const html = await rt.evaluate\\('document.documentElement.outerHTML'\\);\n const text = htmlToReadableText\\(html, { maxLength: 300 }\\);\n console.log\\(' ', text.replace\\(/\\\\n/g, '\\\\n '\\)\\);\n\n console.log\\('\\\\n[4] Screenshot'\\);\n const ss = await page.screenshot\\(\\);\n console.log\\(' Result:', ss ? ss.length + ' chars base64' : 'null \\(unsupported — OK\\)'\\);\n\n console.log\\('\\\\n[5] List tabs'\\);\n const tabs = await sm.listTargets\\(\\);\n console.log\\(' Tabs:', JSON.stringify\\(tabs, null, 2\\)\\);\n\n console.log\\('\\\\n[6] New tab + navigate'\\);\n const sc2 = await sm.createTarget\\('https://httpbin.org/get'\\);\n const page2 = createPageDomain\\(sc2\\);\n await new Promise\\(r => setTimeout\\(r, 1500\\)\\);\n console.log\\(' Tab2 URL:', await page2.getURL\\(\\)\\);\n\n console.log\\('\\\\n[7] list tabs after new tab'\\);\n const tabs2 = await sm.listTargets\\(\\);\n console.log\\(' Tabs:', tabs2.map\\(t => t.url\\)\\);\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n console.log\\('\\\\nAll checks passed.'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('FAIL:', e.message\\); process.exit\\(1\\); }\\);\n\" 2>&1)",
|
|
19
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: console.log }\\);\n\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createInputDomain } = await import\\('./src/cdp/domains/input.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { htmlToReadableText } = await import\\('./src/utils/dom-to-text.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n console.log\\('Capabilities:', sm.capabilities\\);\n\n const sc = await sm.getActiveSession\\(\\);\n const page = createPageDomain\\(sc, sm.capabilities\\);\n const dom = createDOMDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n const inp = createInputDomain\\(sc\\);\n\n console.log\\('\\\\n[1] Navigate https://example.com'\\);\n await page.navigate\\('https://example.com'\\);\n console.log\\(' URL:', await page.getURL\\(\\)\\);\n console.log\\(' Title:', await page.getTitle\\(\\)\\);\n\n console.log\\('\\\\n[2] querySelector h1'\\);\n const h1Id = await dom.querySelector\\('h1'\\);\n console.log\\(' h1:', \\(await dom.getOuterHTML\\(h1Id\\)\\).trim\\(\\)\\);\n\n console.log\\('\\\\n[3] Page content'\\);\n const html = await rt.evaluate\\('document.documentElement.outerHTML'\\);\n const text = htmlToReadableText\\(html, { maxLength: 300 }\\);\n console.log\\(' ', text.replace\\(/\\\\n/g, '\\\\n '\\)\\);\n\n console.log\\('\\\\n[4] Screenshot'\\);\n const ss = await page.screenshot\\(\\);\n console.log\\(' Result:', ss ? ss.length + ' chars base64' : 'null \\(skipped — Lightpanda\\)'\\);\n\n console.log\\('\\\\n[5] List tabs'\\);\n const tabs = await sm.listTargets\\(\\);\n console.log\\(' Tabs:', JSON.stringify\\(tabs.map\\(t => t.url\\)\\)\\);\n\n console.log\\('\\\\n[6] New tab'\\);\n const sc2 = await sm.createTarget\\('https://httpbin.org/json'\\);\n const page2 = createPageDomain\\(sc2, sm.capabilities\\);\n await new Promise\\(r => setTimeout\\(r, 1500\\)\\);\n console.log\\(' Tab2 URL:', await page2.getURL\\(\\)\\);\n\n console.log\\('\\\\n[7] List tabs after new tab'\\);\n const tabs2 = await sm.listTargets\\(\\);\n console.log\\(' Tabs:', tabs2.map\\(t => t.url\\)\\);\n\n console.log\\('\\\\n[8] JS eval'\\);\n const rt2 = createRuntimeDomain\\(sc2\\);\n const json = await rt2.evaluate\\('JSON.stringify\\(document.title\\)'\\);\n console.log\\(' title:', json\\);\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n console.log\\('\\\\nAll checks passed.'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('FAIL:', e.message, '\\\\n', e.stack\\); process.exit\\(1\\); }\\);\n\" 2>&1)",
|
|
20
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: console.log }\\);\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createInputDomain } = await import\\('./src/cdp/domains/input.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { htmlToReadableText } = await import\\('./src/utils/dom-to-text.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n console.log\\('Capabilities:', sm.capabilities\\);\n\n const sc = await sm.getActiveSession\\(\\);\n const page = createPageDomain\\(sc, sm.capabilities\\);\n const dom = createDOMDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n const inp = createInputDomain\\(sc\\);\n\n console.log\\('\\\\n[1] Navigate example.com'\\);\n await page.navigate\\('https://example.com'\\);\n console.log\\(' URL:', await page.getURL\\(\\)\\);\n console.log\\(' Title:', await page.getTitle\\(\\)\\);\n\n console.log\\('\\\\n[2] querySelector h1'\\);\n const h1Id = await dom.querySelector\\('h1'\\);\n console.log\\(' h1:', \\(await dom.getOuterHTML\\(h1Id\\)\\).trim\\(\\)\\);\n\n console.log\\('\\\\n[3] Page content'\\);\n const html = await rt.evaluate\\('document.documentElement.outerHTML'\\);\n console.log\\(' ', htmlToReadableText\\(html, { maxLength: 250 }\\).replace\\(/\\\\n/g, '\\\\n '\\)\\);\n\n console.log\\('\\\\n[4] Screenshot \\(expect null on Lightpanda\\)'\\);\n console.log\\(' Result:', await page.screenshot\\(\\) ?? 'null \\(skipped\\)'\\);\n\n console.log\\('\\\\n[5] List tabs'\\);\n console.log\\(' Tabs:', JSON.stringify\\(\\(await sm.listTargets\\(\\)\\).map\\(t => t.url\\)\\)\\);\n\n console.log\\('\\\\n[6] createTarget \\(single-target fallback\\)'\\);\n const sc2 = await sm.createTarget\\('https://httpbin.org/json'\\);\n const page2 = createPageDomain\\(sc2, sm.capabilities\\);\n await new Promise\\(r => setTimeout\\(r, 1500\\)\\);\n console.log\\(' URL after createTarget:', await page2.getURL\\(\\)\\);\n\n console.log\\('\\\\n[7] click + type'\\);\n await page.navigate\\('https://example.com'\\);\n await new Promise\\(r => setTimeout\\(r, 1000\\)\\);\n const linkId = await dom.querySelector\\('a'\\);\n if \\(linkId\\) {\n await dom.scrollIntoView\\(linkId\\);\n const { x, y } = await dom.getCenter\\(linkId\\);\n console.log\\(' link center:', x, y\\);\n await inp.click\\(x, y\\);\n await new Promise\\(r => setTimeout\\(r, 1000\\)\\);\n console.log\\(' URL after click:', await page.getURL\\(\\)\\);\n }\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n console.log\\('\\\\nAll checks passed.'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('FAIL:', e.message\\); process.exit\\(1\\); }\\);\n\" 2>&1)",
|
|
21
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: console.log }\\);\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createInputDomain } = await import\\('./src/cdp/domains/input.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { htmlToReadableText } = await import\\('./src/utils/dom-to-text.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n console.log\\('Capabilities:', sm.capabilities\\);\n\n const sc = await sm.getActiveSession\\(\\);\n const page = createPageDomain\\(sc, sm.capabilities\\);\n const dom = createDOMDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n const inp = createInputDomain\\(sc\\);\n\n console.log\\('\\\\n[1] Navigate example.com'\\);\n await page.navigate\\('https://example.com'\\);\n console.log\\(' URL:', await page.getURL\\(\\)\\);\n console.log\\(' Title:', await page.getTitle\\(\\)\\);\n\n console.log\\('\\\\n[2] querySelector h1'\\);\n const h1Id = await dom.querySelector\\('h1'\\);\n console.log\\(' h1:', \\(await dom.getOuterHTML\\(h1Id\\)\\).trim\\(\\)\\);\n\n console.log\\('\\\\n[3] Page content'\\);\n const html = await rt.evaluate\\('document.documentElement.outerHTML'\\);\n console.log\\(' ', htmlToReadableText\\(html, { maxLength: 200 }\\).replace\\(/\\\\n/g, '\\\\n '\\)\\);\n\n console.log\\('\\\\n[4] Screenshot \\(expect null\\)'\\);\n console.log\\(' Result:', await page.screenshot\\(\\) ?? 'null \\(skipped — Lightpanda\\)'\\);\n\n console.log\\('\\\\n[5] List tabs'\\);\n console.log\\(' Tabs:', JSON.stringify\\(\\(await sm.listTargets\\(\\)\\).map\\(t => t.url\\)\\)\\);\n\n console.log\\('\\\\n[6] createTarget fallback — navigate to httpbin'\\);\n await sm.createTarget\\('https://httpbin.org/json'\\);\n await new Promise\\(r => setTimeout\\(r, 1500\\)\\);\n console.log\\(' URL:', await page.getURL\\(\\)\\);\n\n console.log\\('\\\\n[7] click link on example.com'\\);\n await page.navigate\\('https://example.com'\\);\n await new Promise\\(r => setTimeout\\(r, 800\\)\\);\n const linkId = await dom.querySelector\\('a'\\);\n if \\(linkId\\) {\n const { x, y } = await dom.getCenter\\(linkId\\);\n console.log\\(' click at', x, y\\);\n await inp.click\\(x, y\\);\n await new Promise\\(r => setTimeout\\(r, 1000\\)\\);\n console.log\\(' URL after click:', await page.getURL\\(\\)\\);\n } else {\n console.log\\(' no link found'\\);\n }\n\n console.log\\('\\\\n[8] evaluate JS'\\);\n const links = await rt.evaluate\\('JSON.stringify\\(Array.from\\(document.querySelectorAll\\(\\\\\"a\\\\\"\\)\\).map\\(a=>a.href\\)\\)'\\);\n console.log\\(' links:', links\\);\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n console.log\\('\\\\nAll checks passed.'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('FAIL:', e.message, '\\\\n', e.stack?.split\\('\\\\n'\\)[1]\\); process.exit\\(1\\); }\\);\n\" 2>&1)",
|
|
22
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: console.log }\\);\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createInputDomain } = await import\\('./src/cdp/domains/input.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { htmlToReadableText } = await import\\('./src/utils/dom-to-text.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n\n const sc = await sm.getActiveSession\\(\\);\n const page = createPageDomain\\(sc, sm.capabilities\\);\n const dom = createDOMDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n const inp = createInputDomain\\(sc\\);\n\n console.log\\('[1] navigate example.com'\\);\n await page.navigate\\('https://example.com'\\);\n console.log\\(' URL:', await page.getURL\\(\\), '| Title:', await page.getTitle\\(\\)\\);\n\n console.log\\('[2] h1 text'\\);\n const h1 = await dom.querySelector\\('h1'\\);\n console.log\\(' ', \\(await dom.getOuterHTML\\(h1\\)\\).trim\\(\\)\\);\n\n console.log\\('[3] page content'\\);\n const text = htmlToReadableText\\(await rt.evaluate\\('document.documentElement.outerHTML'\\), { maxLength: 200 }\\);\n console.log\\(' ', text.replace\\(/\\\\n/g, '\\\\n '\\)\\);\n\n console.log\\('[4] screenshot:', await page.screenshot\\(\\) ?? 'null \\(Lightpanda — OK\\)'\\);\n\n console.log\\('[5] tabs:', \\(await sm.listTargets\\(\\)\\).map\\(t => t.url\\)\\);\n\n console.log\\('[6] createTarget \\(single-target fallback\\)'\\);\n await sm.createTarget\\('https://httpbin.org/json'\\);\n await new Promise\\(r => setTimeout\\(r, 1500\\)\\);\n console.log\\(' URL:', await page.getURL\\(\\)\\);\n\n console.log\\('[7] click link'\\);\n await page.navigate\\('https://example.com'\\);\n await new Promise\\(r => setTimeout\\(r, 1000\\)\\);\n const link = await dom.querySelector\\('a'\\);\n if \\(link\\) {\n const { x, y } = await dom.getCenter\\(link\\);\n await inp.click\\(x, y\\);\n await new Promise\\(r => setTimeout\\(r, 1500\\)\\);\n console.log\\(' URL after click:', await page.getURL\\(\\)\\);\n }\n\n console.log\\('[8] type into input \\(httpbin form\\)'\\);\n await page.navigate\\('https://httpbin.org/forms/post'\\);\n await new Promise\\(r => setTimeout\\(r, 1500\\)\\);\n const custname = await dom.querySelector\\('input[name=custname]'\\);\n if \\(custname\\) {\n await dom.scrollIntoView\\(custname\\);\n const { x, y } = await dom.getCenter\\(custname\\);\n await inp.click\\(x, y\\);\n await inp.type\\('Kuskus Bot'\\);\n const val = await rt.evaluate\\('document.querySelector\\(\\\\\"input[name=custname]\\\\\"\\).value'\\);\n console.log\\(' input value:', val\\);\n } else {\n console.log\\(' input not found'\\);\n }\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n console.log\\('\\\\nAll checks passed.'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('FAIL:', e.message, '\\\\n', e.stack?.split\\('\\\\n'\\)[1]\\); process.exit\\(1\\); }\\);\n\" 2>&1)",
|
|
23
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: \\(m\\) => process.stdout.write\\(m+'\\\\n'\\) }\\);\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createInputDomain } = await import\\('./src/cdp/domains/input.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { htmlToReadableText } = await import\\('./src/utils/dom-to-text.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n\n const sc = await sm.getActiveSession\\(\\);\n const page = createPageDomain\\(sc, sm.capabilities\\);\n const dom = createDOMDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n const inp = createInputDomain\\(sc\\);\n\n process.stdout.write\\('[1] navigate example.com\\\\n'\\);\n await page.navigate\\('https://example.com'\\);\n process.stdout.write\\(' URL: ' + await page.getURL\\(\\) + '\\\\n'\\);\n process.stdout.write\\(' Title: ' + await page.getTitle\\(\\) + '\\\\n'\\);\n\n process.stdout.write\\('[2] h1: ' + \\(await dom.getOuterHTML\\(await dom.querySelector\\('h1'\\)\\)\\).trim\\(\\) + '\\\\n'\\);\n\n process.stdout.write\\('[3] screenshot: ' + \\(await page.screenshot\\(\\) ?? 'null \\(Lightpanda OK\\)'\\) + '\\\\n'\\);\n\n process.stdout.write\\('[4] tabs: ' + JSON.stringify\\(\\(await sm.listTargets\\(\\)\\).map\\(t=>t.url\\)\\) + '\\\\n'\\);\n\n process.stdout.write\\('[5] createTarget fallback\\\\n'\\);\n await sm.createTarget\\('https://httpbin.org/json'\\);\n await new Promise\\(r=>setTimeout\\(r,1500\\)\\);\n process.stdout.write\\(' URL: ' + await page.getURL\\(\\) + '\\\\n'\\);\n\n process.stdout.write\\('[6] click link\\\\n'\\);\n await page.navigate\\('https://example.com'\\);\n await new Promise\\(r=>setTimeout\\(r,1000\\)\\);\n const link = await dom.querySelector\\('a'\\);\n if \\(link\\) {\n const {x,y} = await dom.getCenter\\(link\\);\n await inp.click\\(x, y\\);\n await new Promise\\(r=>setTimeout\\(r,2000\\)\\);\n process.stdout.write\\(' URL after click: ' + await page.getURL\\(\\) + '\\\\n'\\);\n }\n\n process.stdout.write\\('[7] type into form\\\\n'\\);\n await page.navigate\\('https://httpbin.org/forms/post'\\);\n await new Promise\\(r=>setTimeout\\(r,2000\\)\\);\n const input = await dom.querySelector\\('input[name=custname]'\\);\n if \\(input\\) {\n await dom.scrollIntoView\\(input\\);\n const {x,y} = await dom.getCenter\\(input\\);\n await inp.click\\(x,y\\);\n await inp.type\\('Kuskus Bot'\\);\n const val = await rt.evaluate\\('document.querySelector\\(\\\\\"input[name=custname]\\\\\"\\).value'\\);\n process.stdout.write\\(' typed value: ' + val + '\\\\n'\\);\n }\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n process.stdout.write\\('\\\\nAll checks passed.\\\\n'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { process.stderr.write\\('FAIL: '+e.message+'\\\\n'\\); process.exit\\(1\\); }\\);\n\" 2>&1 | grep -v '^\\\\[32m')",
|
|
24
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: m => process.stdout.write\\(m+'\\\\n'\\) }\\);\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createInputDomain } = await import\\('./src/cdp/domains/input.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { htmlToReadableText } = await import\\('./src/utils/dom-to-text.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n\n const sc = await sm.getActiveSession\\(\\);\n const page = createPageDomain\\(sc, sm.capabilities\\);\n const dom = createDOMDomain\\(sc\\);\n const rt = createRuntimeDomain\\(sc\\);\n const inp = createInputDomain\\(sc\\);\n\n console.log\\('[1] navigate + title'\\);\n await page.navigate\\('https://example.com'\\);\n console.log\\(' ✓', await page.getURL\\(\\), '|', await page.getTitle\\(\\)\\);\n\n console.log\\('[2] DOM query'\\);\n const h1 = await dom.querySelector\\('h1'\\);\n console.log\\(' ✓', \\(await dom.getOuterHTML\\(h1\\)\\).trim\\(\\)\\);\n\n console.log\\('[3] page content'\\);\n const text = htmlToReadableText\\(await rt.evaluate\\('document.documentElement.outerHTML'\\), {maxLength:200}\\);\n console.log\\(' ✓', text.split\\('\\\\n'\\)[0]\\);\n\n console.log\\('[4] screenshot capability'\\);\n console.log\\(' ✓ screenshot:', await page.screenshot\\(\\) ?? 'null \\(Lightpanda — skipped safely\\)'\\);\n\n console.log\\('[5] list tabs'\\);\n console.log\\(' ✓', \\(await sm.listTargets\\(\\)\\).map\\(t=>t.url\\)\\);\n\n console.log\\('[6] form fill + type'\\);\n await page.navigate\\('https://httpbin.org/forms/post'\\);\n await new Promise\\(r=>setTimeout\\(r,2000\\)\\);\n const custname = await dom.querySelector\\('input[name=custname]'\\);\n if \\(custname\\) {\n await dom.scrollIntoView\\(custname\\);\n const {x,y} = await dom.getCenter\\(custname\\);\n await inp.click\\(x,y\\);\n await inp.type\\('Kuskus Bot'\\);\n const val = await rt.evaluate\\('document.querySelector\\(\\\\\"input[name=custname]\\\\\"\\).value'\\);\n console.log\\(' ✓ typed:', val\\);\n } else { console.log\\(' ✗ input not found'\\); }\n\n console.log\\('[7] select dropdown'\\);\n const sel = await dom.querySelector\\('select[name=size]'\\);\n if \\(sel\\) {\n await rt.evaluate\\('const el=document.querySelector\\(\\\\\"select[name=size]\\\\\"\\);el.value=\\\\\"large\\\\\";el.dispatchEvent\\(new Event\\(\\\\\"change\\\\\",{bubbles:true}\\)\\)'\\);\n const val = await rt.evaluate\\('document.querySelector\\(\\\\\"select[name=size]\\\\\"\\).value'\\);\n console.log\\(' ✓ select value:', val\\);\n }\n\n console.log\\('[8] evaluate JS'\\);\n const links = await rt.evaluate\\('Array.from\\(document.querySelectorAll\\(\\\\\"a\\\\\"\\)\\).map\\(a=>a.textContent.trim\\(\\)\\).filter\\(Boolean\\)'\\);\n console.log\\(' ✓ links on page:', links\\);\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n console.log\\('\\\\n✓ All core checks passed.'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('✗ FAIL:', e.message\\); process.exit\\(1\\); }\\);\n\" 2>&1 | grep -v '^\\\\[3')",
|
|
25
|
+
"Bash(pkill lightpanda 2>/dev/null; sleep 0.3\nnode -e \"\nimport\\('./src/utils/browser.js'\\).then\\(async \\({ ensureBrowser }\\) => {\n const proc = await ensureBrowser\\({ port: 9222, host: '127.0.0.1', log: m => process.stdout.write\\(m+'\\\\n'\\) }\\);\n const { SessionManager } = await import\\('./src/cdp/session.js'\\);\n const { createPageDomain } = await import\\('./src/cdp/domains/page.js'\\);\n const { createDOMDomain } = await import\\('./src/cdp/domains/dom.js'\\);\n const { createRuntimeDomain } = await import\\('./src/cdp/domains/runtime.js'\\);\n const { Executor } = await import\\('./src/agent/executor.js'\\);\n\n const sm = new SessionManager\\({ host: '127.0.0.1', port: 9222 }\\);\n await sm.connect\\(\\);\n const exec = new Executor\\(sm\\);\n\n console.log\\('[1] navigate'\\);\n const r1 = await exec.execute\\('navigate', { url: 'https://example.com' }\\);\n console.log\\(' ', r1\\);\n\n console.log\\('[2] get_url'\\);\n const r2 = await exec.execute\\('get_url', {}\\);\n console.log\\(' ', r2\\);\n\n console.log\\('[3] get_page_content'\\);\n const r3 = await exec.execute\\('get_page_content', {}\\);\n console.log\\(' ', r3.slice\\(0, 150\\)\\);\n\n console.log\\('[4] screenshot'\\);\n const r4 = await exec.execute\\('screenshot', {}\\);\n console.log\\(' ', r4?.type === 'screenshot' ? 'screenshot captured' : r4\\);\n\n console.log\\('[5] evaluate_js'\\);\n const r5 = await exec.execute\\('evaluate_js', { script: 'document.title' }\\);\n console.log\\(' ', r5\\);\n\n console.log\\('[6] navigate to form'\\);\n await exec.execute\\('navigate', { url: 'https://httpbin.org/forms/post' }\\);\n await new Promise\\(r => setTimeout\\(r, 2000\\)\\);\n\n console.log\\('[7] type_text'\\);\n const r7 = await exec.execute\\('type_text', { selector: 'input[name=custname]', text: 'Kuskus Bot' }\\);\n console.log\\(' ', r7\\);\n\n console.log\\('[8] select_option'\\);\n const r8 = await exec.execute\\('select_option', { selector: 'select[name=size]', value: 'large' }\\);\n console.log\\(' ', r8\\);\n\n console.log\\('[9] evaluate — verify values'\\);\n const r9 = await exec.execute\\('evaluate_js', {\n script: 'JSON.stringify\\({ name: document.querySelector\\(\\\\\"input[name=custname]\\\\\"\\).value, size: document.querySelector\\(\\\\\"select[name=size]\\\\\"\\).value }\\)'\n }\\);\n console.log\\(' ', r9\\);\n\n await sm.close\\(\\);\n proc?.kill\\(\\);\n console.log\\('\\\\nAll executor checks passed!'\\);\n process.exit\\(0\\);\n}\\).catch\\(e => { console.error\\('FAIL:', e.message\\); process.exit\\(1\\); }\\);\n\" 2>&1 | grep -v 'INFO\\\\|wsUrl')",
|
|
26
|
+
"Bash(pkill -x lightpanda 2>/dev/null; sleep 1 && node /tmp/kuskus-test.mjs 2>&1 | grep -v \"INFO\\\\|wsUrl\")",
|
|
27
|
+
"Bash(pkill -x lightpanda 2>/dev/null; npx vitest run --reporter=verbose 2>&1)",
|
|
28
|
+
"Bash(npm install openai 2>&1 | tail -3)"
|
|
7
29
|
]
|
|
8
30
|
}
|
|
9
31
|
}
|
package/.env.example
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
1
|
# ── CLI only ──────────────────────────────────────────────────────────────────
|
|
2
2
|
# Required only for `kuskus run/repl/script` commands.
|
|
3
3
|
# The MCP server does NOT use an API key — the host model drives the agent.
|
|
4
|
+
|
|
5
|
+
# Provider: anthropic | openai
|
|
6
|
+
# Leave unset to auto-detect from model name (claude-* → anthropic, gpt-* → openai)
|
|
7
|
+
# AGENT_PROVIDER=anthropic
|
|
8
|
+
|
|
9
|
+
# API keys — only the key for your chosen provider is needed
|
|
4
10
|
ANTHROPIC_API_KEY=sk-ant-...
|
|
11
|
+
# OPENAI_API_KEY=sk-...
|
|
12
|
+
|
|
13
|
+
# Model name — provider is auto-detected if AGENT_PROVIDER is not set
|
|
5
14
|
AGENT_MODEL=claude-sonnet-4-6
|
|
15
|
+
# AGENT_MODEL=gpt-4o
|
|
16
|
+
|
|
6
17
|
AGENT_MAX_STEPS=20
|
|
7
18
|
AGENT_MAX_TOKENS=4096
|
|
8
19
|
AGENT_INCLUDE_SCREENSHOT=true
|
|
@@ -11,7 +22,8 @@ AGENT_SCREENSHOT_QUALITY=80
|
|
|
11
22
|
# ── Browser (CLI + MCP) ────────────────────────────────────────────────────────
|
|
12
23
|
CDP_URL=ws://localhost:9222
|
|
13
24
|
CDP_LAUNCH_BROWSER=false
|
|
14
|
-
|
|
25
|
+
# Override auto-detect with a specific Chrome/Chromium binary
|
|
26
|
+
# CDP_BROWSER_PATH=/Applications/Google Chrome.app/Contents/MacOS/Google Chrome
|
|
15
27
|
CDP_BROWSER_PORT=9222
|
|
16
28
|
|
|
17
29
|
# ── Logging ───────────────────────────────────────────────────────────────────
|
package/README.md
ADDED
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="assets/logo.png" width="160" alt="Kuskus" />
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">Kuskus</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
AI browser agent via Chrome DevTools Protocol — CLI + MCP Server
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
<a href="https://www.npmjs.com/package/@porcupine/kuskus"><img src="https://img.shields.io/npm/v/@porcupine/kuskus?color=a78bfa&label=npm" alt="npm" /></a>
|
|
13
|
+
<img src="https://img.shields.io/badge/node-%3E%3D20-brightgreen" alt="node" />
|
|
14
|
+
<img src="https://img.shields.io/badge/browser-Chromium-blue" alt="Chromium" />
|
|
15
|
+
<img src="https://img.shields.io/badge/protocol-CDP-blue" alt="CDP" />
|
|
16
|
+
</p>
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
Kuskus controls a browser directly over the [Chrome DevTools Protocol](https://chromedevtools.github.io/devtools-protocol/) by auto-detecting an installed Chrome/Chromium build (or downloading one on demand).
|
|
21
|
+
|
|
22
|
+
Ships as two artifacts:
|
|
23
|
+
|
|
24
|
+
| | CLI | MCP Server |
|
|
25
|
+
|---|---|---|
|
|
26
|
+
| **Usage** | `kuskus run "task..."` | Claude Desktop, Cursor, OpenCode, etc. |
|
|
27
|
+
| **LLM** | Claude (via `ANTHROPIC_API_KEY`) | Host model — no key needed |
|
|
28
|
+
| **Role** | Full agent loop | Expose browser tools to any AI |
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Requirements
|
|
33
|
+
|
|
34
|
+
- Node.js >= 20
|
|
35
|
+
- Chrome or Chromium (auto-detected; falls back to downloading a Chromium build into `~/.local`)
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## CLI
|
|
40
|
+
|
|
41
|
+
### Install
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
npm install -g @porcupine/kuskus
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Or use directly with npx (no install needed):
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
npx @porcupine/kuskus run "your task here"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Setup
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
cp .env.example .env
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Set the API key for your chosen provider:
|
|
60
|
+
|
|
61
|
+
```env
|
|
62
|
+
# Anthropic (Claude) — default
|
|
63
|
+
ANTHROPIC_API_KEY=sk-ant-...
|
|
64
|
+
|
|
65
|
+
# OpenAI
|
|
66
|
+
OPENAI_API_KEY=sk-...
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Provider is **auto-detected from the model name** — no need to set it explicitly:
|
|
70
|
+
|
|
71
|
+
| Model prefix | Provider |
|
|
72
|
+
|---|---|
|
|
73
|
+
| `claude-*` | Anthropic |
|
|
74
|
+
| `gpt-*`, `o1*`, `o3*`, `o4*`, `chatgpt-*` | OpenAI |
|
|
75
|
+
|
|
76
|
+
### Commands
|
|
77
|
+
|
|
78
|
+
#### `run` — one-shot task
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
kuskus run "go to news.ycombinator.com and summarize the top 5 posts"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Options:
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
--cdp-url <url> CDP WebSocket URL (default: ws://localhost:9222)
|
|
88
|
+
--provider <name> LLM provider: anthropic or openai (auto-detected if not set)
|
|
89
|
+
--model <model> Model name (default: claude-sonnet-4-6)
|
|
90
|
+
--max-steps <n> Max agent steps (default: 20)
|
|
91
|
+
--screenshots <dir> Save step screenshots to directory
|
|
92
|
+
--launch Auto-launch Chrome/Chromium before running
|
|
93
|
+
--no-headless Launch Chrome/Chromium with a visible window
|
|
94
|
+
--force-launch Shut down an existing debugging browser before launching
|
|
95
|
+
--user-data-dir <p> Reuse a Chrome profile directory (default ~/.local/chrome-profile when visible)
|
|
96
|
+
--output <format> Output format: text or json (default: text)
|
|
97
|
+
--debug Log raw CDP messages
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
#### `repl` — interactive session
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
kuskus repl --launch
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Special commands inside REPL:
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
!screenshot Capture and save the current viewport
|
|
110
|
+
!tabs List open browser tabs
|
|
111
|
+
!history Show action history
|
|
112
|
+
!clear Reset agent memory
|
|
113
|
+
!exit Quit
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
#### `script` — batch tasks from JSON
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
kuskus script ./tasks.json --output json
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
`tasks.json` format:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
[
|
|
126
|
+
"go to github.com/lightpanda-io/browser and read the description",
|
|
127
|
+
"search google for nodejs best practices 2025 and list the top 3 links"
|
|
128
|
+
]
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
#### `install` — manually install Chromium
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
kuskus install
|
|
135
|
+
# or force re-download
|
|
136
|
+
kuskus install --force
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
#### `mcp` — start MCP server
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
kuskus mcp
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
> Chromium is downloaded and launched automatically. No API key required.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## MCP Server
|
|
150
|
+
|
|
151
|
+
The MCP server exposes browser control tools to any AI host — Claude Desktop, Cursor, OpenCode, or any MCP-compatible client. The host model drives the reasoning; Kuskus only executes browser actions.
|
|
152
|
+
|
|
153
|
+
### Claude Desktop
|
|
154
|
+
|
|
155
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
|
|
156
|
+
|
|
157
|
+
```json
|
|
158
|
+
{
|
|
159
|
+
"mcpServers": {
|
|
160
|
+
"kuskus": {
|
|
161
|
+
"command": "npx",
|
|
162
|
+
"args": ["-y", "@porcupine/kuskus", "mcp"],
|
|
163
|
+
"env": {
|
|
164
|
+
"CDP_URL": "ws://localhost:9222"
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### OpenCode
|
|
172
|
+
|
|
173
|
+
Add to `~/.config/opencode/opencode.json`:
|
|
174
|
+
|
|
175
|
+
```json
|
|
176
|
+
{
|
|
177
|
+
"mcp": {
|
|
178
|
+
"kuskus": {
|
|
179
|
+
"type": "local",
|
|
180
|
+
"command": ["npx", "-y", "@porcupine/kuskus", "mcp"],
|
|
181
|
+
"enabled": true,
|
|
182
|
+
"environment": {
|
|
183
|
+
"CDP_URL": "ws://localhost:9222"
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Cursor / other MCP clients
|
|
191
|
+
|
|
192
|
+
```json
|
|
193
|
+
{
|
|
194
|
+
"mcpServers": {
|
|
195
|
+
"kuskus": {
|
|
196
|
+
"command": "npx",
|
|
197
|
+
"args": ["-y", "@porcupine/kuskus", "mcp"]
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### Available MCP Tools
|
|
204
|
+
|
|
205
|
+
#### Navigation
|
|
206
|
+
| Tool | Description |
|
|
207
|
+
|------|-------------|
|
|
208
|
+
| `browser_navigate` | Navigate to a URL |
|
|
209
|
+
| `browser_go_back` | Go back in history |
|
|
210
|
+
| `browser_go_forward` | Go forward in history |
|
|
211
|
+
| `browser_get_url` | Get current URL |
|
|
212
|
+
|
|
213
|
+
#### Observation
|
|
214
|
+
| Tool | Description |
|
|
215
|
+
|------|-------------|
|
|
216
|
+
| `browser_screenshot` | Capture viewport as PNG |
|
|
217
|
+
| `browser_get_content` | Get page text content |
|
|
218
|
+
| `browser_element_info` | Get element attributes and text |
|
|
219
|
+
|
|
220
|
+
#### Interaction
|
|
221
|
+
| Tool | Description |
|
|
222
|
+
|------|-------------|
|
|
223
|
+
| `browser_click` | Click element by CSS selector |
|
|
224
|
+
| `browser_type` | Type text into an input |
|
|
225
|
+
| `browser_key_press` | Press a key (Enter, Tab, Escape…) |
|
|
226
|
+
| `browser_scroll` | Scroll up or down |
|
|
227
|
+
| `browser_hover` | Hover over an element |
|
|
228
|
+
| `browser_select` | Select a `<select>` option |
|
|
229
|
+
| `browser_checkbox` | Check or uncheck a checkbox |
|
|
230
|
+
|
|
231
|
+
#### JavaScript
|
|
232
|
+
| Tool | Description |
|
|
233
|
+
|------|-------------|
|
|
234
|
+
| `browser_evaluate` | Execute JS and return result |
|
|
235
|
+
| `browser_extract` | Extract structured data via JS |
|
|
236
|
+
|
|
237
|
+
#### Tabs
|
|
238
|
+
| Tool | Description |
|
|
239
|
+
|------|-------------|
|
|
240
|
+
| `browser_list_tabs` | List all open tabs |
|
|
241
|
+
| `browser_new_tab` | Open a new tab |
|
|
242
|
+
| `browser_switch_tab` | Switch to a tab by ID |
|
|
243
|
+
| `browser_close_tab` | Close a tab |
|
|
244
|
+
|
|
245
|
+
#### Utility
|
|
246
|
+
| Tool | Description |
|
|
247
|
+
|------|-------------|
|
|
248
|
+
| `browser_wait` | Wait N milliseconds (max 10s) |
|
|
249
|
+
|
|
250
|
+
### MCP Resources
|
|
251
|
+
|
|
252
|
+
| URI | Description |
|
|
253
|
+
|-----|-------------|
|
|
254
|
+
| `browser://screenshot` | Current viewport as PNG |
|
|
255
|
+
| `browser://page/content` | Current page text |
|
|
256
|
+
| `browser://page/url` | Current URL |
|
|
257
|
+
| `browser://tabs` | Open tabs as JSON |
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## Architecture
|
|
262
|
+
|
|
263
|
+
```
|
|
264
|
+
Entry Points
|
|
265
|
+
kuskus run / repl / script kuskus mcp
|
|
266
|
+
│ │
|
|
267
|
+
▼ ▼
|
|
268
|
+
Agent Core MCP Server
|
|
269
|
+
(plan → execute loop) (expose tools directly)
|
|
270
|
+
Claude API + tool use no LLM — host model drives
|
|
271
|
+
│ │
|
|
272
|
+
└──────────────┬─────────────────┘
|
|
273
|
+
▼
|
|
274
|
+
Executor (CDP tools)
|
|
275
|
+
│
|
|
276
|
+
SessionManager
|
|
277
|
+
(single WebSocket,
|
|
278
|
+
session multiplexing)
|
|
279
|
+
│
|
|
280
|
+
Chromium Browser
|
|
281
|
+
ws://localhost:9222
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
### How the agent loop works
|
|
285
|
+
|
|
286
|
+
```
|
|
287
|
+
┌─────────────────────────────────────────────┐
|
|
288
|
+
│ 1. Observe get_page_content + screenshot │
|
|
289
|
+
│ 2. Plan Claude picks next tool │
|
|
290
|
+
│ 3. Execute CDP command via Chromium │
|
|
291
|
+
│ 4. Remember append step to rolling history │
|
|
292
|
+
│ 5. Repeat until finish or max steps │
|
|
293
|
+
└─────────────────────────────────────────────┘
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
---
|
|
297
|
+
|
|
298
|
+
## Configuration
|
|
299
|
+
|
|
300
|
+
All options via environment variables (`.env` file supported):
|
|
301
|
+
|
|
302
|
+
```env
|
|
303
|
+
# CLI only — not needed for MCP
|
|
304
|
+
ANTHROPIC_API_KEY=sk-ant-... # for Claude models
|
|
305
|
+
OPENAI_API_KEY=sk-... # for GPT / o-series models
|
|
306
|
+
|
|
307
|
+
# Provider: anthropic | openai — auto-detected from model name if not set
|
|
308
|
+
# AGENT_PROVIDER=anthropic
|
|
309
|
+
|
|
310
|
+
AGENT_MODEL=claude-sonnet-4-6 # or gpt-4o, o3-mini, etc.
|
|
311
|
+
AGENT_MAX_STEPS=20
|
|
312
|
+
AGENT_MAX_TOKENS=4096
|
|
313
|
+
AGENT_INCLUDE_SCREENSHOT=true
|
|
314
|
+
AGENT_SCREENSHOT_QUALITY=80
|
|
315
|
+
|
|
316
|
+
# Browser (CLI + MCP)
|
|
317
|
+
CDP_URL=ws://localhost:9222
|
|
318
|
+
# CDP_BROWSER_PATH=/Applications/Google Chrome.app/Contents/MacOS/Google Chrome
|
|
319
|
+
CDP_BROWSER_PORT=9222
|
|
320
|
+
|
|
321
|
+
# Logging
|
|
322
|
+
LOG_LEVEL=info # debug | info | warn | error
|
|
323
|
+
LOG_FORMAT=pretty # pretty | logfmt
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## Browser Runtime
|
|
329
|
+
|
|
330
|
+
Kuskus looks for Chrome/Chromium automatically. It checks common install locations (`/Applications/Google Chrome.app`, `chromium`, etc.) and honours `CDP_BROWSER_PATH`, `CHROME_PATH`, and `GOOGLE_CHROME_BIN` if set.
|
|
331
|
+
|
|
332
|
+
When no suitable binary is found (and auto-install is allowed) Kuskus downloads the latest **Chromium for Testing** build to `~/.local/chrome/<version>` and symlinks it to `~/.local/bin/chromium`.
|
|
333
|
+
|
|
334
|
+
Supported platforms for auto-download:
|
|
335
|
+
|
|
336
|
+
| OS | Arch |
|
|
337
|
+
|----|------|
|
|
338
|
+
| Linux | x86_64, arm64 |
|
|
339
|
+
| macOS | x86_64 (Intel), arm64 (Apple Silicon) |
|
|
340
|
+
|
|
341
|
+
Use `CDP_BROWSER_PATH` to point at a custom binary if you prefer a specific channel (e.g. Chrome Canary) or an alternative CDP-compatible browser.
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
## Examples
|
|
346
|
+
|
|
347
|
+
```bash
|
|
348
|
+
# With Claude (default)
|
|
349
|
+
kuskus run "go to https://github.com/lightpanda-io/browser and summarize the README" --launch
|
|
350
|
+
|
|
351
|
+
# With GPT-4o — provider auto-detected from model name
|
|
352
|
+
kuskus run "go to news.ycombinator.com and list the top 5 posts" --model gpt-4o --launch
|
|
353
|
+
|
|
354
|
+
# With o3-mini
|
|
355
|
+
kuskus run "go to https://httpbin.org/json and extract all fields" --model o3-mini --launch
|
|
356
|
+
|
|
357
|
+
# Force provider explicitly
|
|
358
|
+
kuskus run "..." --provider openai --model gpt-4o-mini --launch
|
|
359
|
+
|
|
360
|
+
# Interactive REPL
|
|
361
|
+
kuskus repl --launch
|
|
362
|
+
kuskus repl --model gpt-4o --launch
|
|
363
|
+
|
|
364
|
+
# Extract data as JSON
|
|
365
|
+
kuskus run "go to news.ycombinator.com, extract title and URL of each front page post" --launch --output json
|
|
366
|
+
|
|
367
|
+
# Batch tasks
|
|
368
|
+
kuskus script ./tasks.json --model gpt-4o --output json
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
---
|
|
372
|
+
|
|
373
|
+
## Development
|
|
374
|
+
|
|
375
|
+
```bash
|
|
376
|
+
git clone https://github.com/porcupine/kuskus
|
|
377
|
+
cd kuskus
|
|
378
|
+
npm install
|
|
379
|
+
cp .env.example .env
|
|
380
|
+
|
|
381
|
+
# Run tests
|
|
382
|
+
npm test
|
|
383
|
+
|
|
384
|
+
# Try the CLI
|
|
385
|
+
node bin/cli.js install # download Chromium for Testing
|
|
386
|
+
node bin/cli.js run "..." --launch
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
### Project structure
|
|
390
|
+
|
|
391
|
+
```
|
|
392
|
+
kuskus/
|
|
393
|
+
├── bin/
|
|
394
|
+
│ └── cli.js CLI entrypoint (run/repl/script/mcp/install)
|
|
395
|
+
├── src/
|
|
396
|
+
│ ├── cdp/
|
|
397
|
+
│ │ ├── client.js WebSocket CDP client + session multiplexing
|
|
398
|
+
│ │ ├── session.js Target/tab manager
|
|
399
|
+
│ │ └── domains/
|
|
400
|
+
│ │ ├── page.js Navigate, screenshot, reload
|
|
401
|
+
│ │ ├── dom.js querySelector, getBoxModel, focus
|
|
402
|
+
│ │ ├── input.js Click, hover, scroll, key press
|
|
403
|
+
│ │ ├── runtime.js Evaluate JS
|
|
404
|
+
│ │ ├── network.js Request monitoring/intercept
|
|
405
|
+
│ │ └── target.js Multi-tab management
|
|
406
|
+
│ ├── agent/
|
|
407
|
+
│ │ ├── index.js KuskusAgent orchestrator
|
|
408
|
+
│ │ ├── planner.js LLM planning loop (provider-agnostic)
|
|
409
|
+
│ │ ├── providers.js Anthropic + OpenAI adapters, auto-detection
|
|
410
|
+
│ │ ├── executor.js Tool → CDP command mapping
|
|
411
|
+
│ │ ├── tools.js Tool definitions (JSON Schema)
|
|
412
|
+
│ │ ├── memory.js Rolling step history
|
|
413
|
+
│ │ └── prompts.js System prompt
|
|
414
|
+
│ ├── mcp/
|
|
415
|
+
│ │ ├── server.js MCP server (stdio transport)
|
|
416
|
+
│ │ └── handlers.js Tool + resource handlers
|
|
417
|
+
│ └── utils/
|
|
418
|
+
│ ├── chromium.js Chrome/Chromium detector + downloader
|
|
419
|
+
│ ├── browser.js Launch + CDP readiness check
|
|
420
|
+
│ ├── dom-to-text.js HTML → readable text for LLM
|
|
421
|
+
│ ├── screenshot.js Save screenshots to disk
|
|
422
|
+
│ └── logger.js Structured logger (pino)
|
|
423
|
+
├── tests/
|
|
424
|
+
└── examples/
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
---
|
|
428
|
+
|
|
429
|
+
## License
|
|
430
|
+
|
|
431
|
+
MIT
|
package/SKILL.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Kuskus Skill Overview
|
|
2
|
+
|
|
3
|
+
Kuskus is a CLI and MCP server that lets an AI agent control a real Chrome/Chromium browser over the Chrome DevTools Protocol. Use it when you need rich browser interactions (navigation, DOM scraping, screenshots) in automated workflows.
|
|
4
|
+
|
|
5
|
+
## Capabilities
|
|
6
|
+
- Launches or attaches to a local Chrome/Chromium instance (headless or visible).
|
|
7
|
+
- Auto-detects Anthropic or OpenAI models based on name; supports GPT-4o and Claude Sonnet out of the box.
|
|
8
|
+
- Exposes a full tool palette (navigation, DOM queries, input, waits, screenshots) via JSON-schema definitions.
|
|
9
|
+
- Provides a REPL for interactive runs and a script runner for batch tasks.
|
|
10
|
+
- MCP server mode surfaces the same browser tools to host applications (Claude Desktop, Cursor, etc.).
|
|
11
|
+
|
|
12
|
+
## Quick Start (CLI)
|
|
13
|
+
```bash
|
|
14
|
+
export OPENAI_API_KEY=sk-...
|
|
15
|
+
|
|
16
|
+
# one-shot task (visible browser)
|
|
17
|
+
npx @porcupine/kuskus run "Visit https://example.com and report the heading" --model gpt-4o --launch
|
|
18
|
+
|
|
19
|
+
# interactive REPL with Claude
|
|
20
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
21
|
+
npx @porcupine/kuskus repl --model claude-sonnet-4-6 --launch
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Key flags:
|
|
25
|
+
- `--launch` / `--no-headless` – start Chrome automatically, optionally with a window.
|
|
26
|
+
- `--force-launch` – shut down any existing debugging browser before launching.
|
|
27
|
+
- `--user-data-dir` – point to a Chrome profile so sessions persist across runs.
|
|
28
|
+
- `--output json` – return structured data when tools emit payloads.
|
|
29
|
+
|
|
30
|
+
## MCP Integration
|
|
31
|
+
Start the MCP server and let the host model drive planning:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
npx @porcupine/kuskus mcp --launch --no-headless
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Configure your MCP-compatible client to use the `kuskus` command. Available tools include page navigation, content extraction, screenshot capture, and tab management.
|
|
38
|
+
|
|
39
|
+
## Deployment Notes
|
|
40
|
+
- Chrome auto-detection checks standard install paths and environment overrides (`CDP_BROWSER_PATH`, `CHROME_PATH`, `GOOGLE_CHROME_BIN`).
|
|
41
|
+
- When no browser is found, the CLI downloads the latest Chromium-for-Testing build into `~/.local/chrome/<version>`.
|
|
42
|
+
- Ensure `OPENAI_API_KEY` or `ANTHROPIC_API_KEY` is set before running tasks that use those providers.
|
|
43
|
+
|
|
44
|
+
## Troubleshooting
|
|
45
|
+
- Use `--debug` to stream CDP traffic and planner logs.
|
|
46
|
+
- If a previous headless session is blocking `--no-headless`, add `--force-launch` to close it before relaunch.
|
|
47
|
+
- Screenshots can be saved automatically with `--screenshots <dir>`.
|
|
48
|
+
- The agent can invoke `wait_for_navigation` to wait for redirects (e.g., after SSO login) without manual prompts.
|