openbot 0.2.11 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.prettierrc +8 -0
- package/AGENTS.md +68 -0
- package/CONTRIBUTING.md +74 -0
- package/LICENSE +21 -0
- package/README.md +117 -14
- package/dist/agents/system.js +106 -0
- package/dist/app/cli.js +27 -0
- package/dist/app/config.js +64 -0
- package/dist/app/server.js +237 -0
- package/dist/app/utils.js +35 -0
- package/dist/harness/agent-harness.js +45 -0
- package/dist/harness/mcp.js +61 -0
- package/dist/harness/orchestrator.js +273 -0
- package/dist/harness/process.js +7 -0
- package/dist/plugins/ai-sdk.js +141 -0
- package/dist/plugins/delegation.js +52 -0
- package/dist/plugins/mcp.js +140 -0
- package/dist/plugins/storage.js +502 -0
- package/dist/plugins/ui.js +47 -0
- package/dist/registry/plugins.js +73 -0
- package/dist/services/storage.js +724 -0
- package/docs/README.md +7 -0
- package/docs/agents.md +83 -0
- package/docs/architecture.md +34 -0
- package/docs/plugins.md +77 -0
- package/logo-black.png +0 -0
- package/{dist/assets/logo.js → logo-black.svg} +24 -24
- package/{dist/ui/sidebar.js → logo-white.svg} +23 -88
- package/package.json +10 -9
- package/src/agents/system.ts +112 -0
- package/src/app/cli.ts +38 -0
- package/src/app/config.ts +104 -0
- package/src/app/server.ts +284 -0
- package/src/app/types.ts +476 -0
- package/src/app/utils.ts +43 -0
- package/src/assets/icon.svg +1 -0
- package/src/harness/agent-harness.ts +58 -0
- package/src/harness/mcp.ts +78 -0
- package/src/harness/orchestrator.ts +342 -0
- package/src/harness/process.ts +9 -0
- package/src/harness/types.ts +34 -0
- package/src/plugins/ai-sdk.ts +197 -0
- package/src/plugins/delegation.ts +60 -0
- package/src/plugins/mcp.ts +154 -0
- package/src/plugins/storage.ts +725 -0
- package/src/plugins/ui.ts +57 -0
- package/src/registry/plugins.ts +85 -0
- package/src/services/storage.ts +957 -0
- package/tsconfig.json +18 -0
- package/dist/agents/agent-creator.js +0 -74
- package/dist/agents/browser-agent.js +0 -31
- package/dist/agents/os-agent.js +0 -32
- package/dist/agents/planner-agent.js +0 -32
- package/dist/agents/topic-agent.js +0 -46
- package/dist/architecture/execution-engine.js +0 -151
- package/dist/architecture/intent-classifier.js +0 -26
- package/dist/architecture/planner.js +0 -106
- package/dist/automation-worker.js +0 -121
- package/dist/automations.js +0 -52
- package/dist/cli.js +0 -275
- package/dist/config.js +0 -53
- package/dist/core/agents.js +0 -41
- package/dist/core/delegation.js +0 -230
- package/dist/core/manager.js +0 -96
- package/dist/core/plugins.js +0 -74
- package/dist/core/router.js +0 -191
- package/dist/handlers/init.js +0 -29
- package/dist/handlers/session-change.js +0 -21
- package/dist/handlers/settings.js +0 -47
- package/dist/handlers/tab-change.js +0 -14
- package/dist/installers.js +0 -156
- package/dist/marketplace.js +0 -80
- package/dist/model-catalog.js +0 -132
- package/dist/model-defaults.js +0 -25
- package/dist/models.js +0 -47
- package/dist/open-bot.js +0 -51
- package/dist/orchestrator/direct-invocation.js +0 -13
- package/dist/orchestrator/events.js +0 -36
- package/dist/orchestrator/state.js +0 -54
- package/dist/orchestrator.js +0 -422
- package/dist/plugins/agent/index.js +0 -81
- package/dist/plugins/approval/index.js +0 -100
- package/dist/plugins/brain/identity.js +0 -77
- package/dist/plugins/brain/index.js +0 -204
- package/dist/plugins/brain/memory.js +0 -120
- package/dist/plugins/brain/prompt.js +0 -46
- package/dist/plugins/brain/types.js +0 -45
- package/dist/plugins/brain/ui.js +0 -7
- package/dist/plugins/browser/index.js +0 -629
- package/dist/plugins/browser/ui.js +0 -13
- package/dist/plugins/file-system/index.js +0 -171
- package/dist/plugins/file-system/ui.js +0 -6
- package/dist/plugins/llm/context-budget.js +0 -139
- package/dist/plugins/llm/context-shaping.js +0 -177
- package/dist/plugins/llm/index.js +0 -380
- package/dist/plugins/memory/index.js +0 -220
- package/dist/plugins/memory/memory.js +0 -122
- package/dist/plugins/memory/prompt.js +0 -55
- package/dist/plugins/memory/types.js +0 -45
- package/dist/plugins/meta-agent/index.js +0 -570
- package/dist/plugins/meta-agent/ui.js +0 -11
- package/dist/plugins/shell/index.js +0 -100
- package/dist/plugins/shell/ui.js +0 -6
- package/dist/plugins/skills/index.js +0 -286
- package/dist/plugins/skills/types.js +0 -50
- package/dist/plugins/skills/ui.js +0 -12
- package/dist/registry/agent-registry.js +0 -35
- package/dist/registry/index.js +0 -2
- package/dist/registry/plugin-loader.js +0 -499
- package/dist/registry/plugin-registry.js +0 -44
- package/dist/registry/ts-agent-loader.js +0 -82
- package/dist/registry/yaml-agent-loader.js +0 -246
- package/dist/runtime/execution-trace.js +0 -41
- package/dist/runtime/intent-routing.js +0 -26
- package/dist/runtime/openbot-runtime.js +0 -354
- package/dist/server.js +0 -890
- package/dist/session.js +0 -179
- package/dist/ui/block.js +0 -12
- package/dist/ui/header.js +0 -52
- package/dist/ui/layout.js +0 -26
- package/dist/ui/navigation.js +0 -15
- package/dist/ui/settings.js +0 -106
- package/dist/ui/skills.js +0 -7
- package/dist/ui/thread.js +0 -16
- package/dist/ui/widgets/action-list.js +0 -2
- package/dist/ui/widgets/approval-card.js +0 -9
- package/dist/ui/widgets/code-snippet.js +0 -2
- package/dist/ui/widgets/data-block.js +0 -2
- package/dist/ui/widgets/data-table.js +0 -2
- package/dist/ui/widgets/delegation.js +0 -29
- package/dist/ui/widgets/empty-state.js +0 -2
- package/dist/ui/widgets/index.js +0 -23
- package/dist/ui/widgets/inquiry.js +0 -7
- package/dist/ui/widgets/key-value.js +0 -2
- package/dist/ui/widgets/progress-step.js +0 -2
- package/dist/ui/widgets/resource-card.js +0 -2
- package/dist/ui/widgets/status.js +0 -2
- package/dist/ui/widgets/todo-list.js +0 -2
- package/dist/version.js +0 -62
- /package/dist/{types.js → app/types.js} +0 -0
- /package/dist/{architecture/contracts.js → harness/types.js} +0 -0
|
@@ -1,629 +0,0 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
|
-
import { chromium } from "playwright";
|
|
3
|
-
import { generateText, Output } from "ai";
|
|
4
|
-
// ---------------------------------------------------------------------------
|
|
5
|
-
// Tool definitions
|
|
6
|
-
// ---------------------------------------------------------------------------
|
|
7
|
-
export const browserToolDefinitions = {
|
|
8
|
-
browser_act: {
|
|
9
|
-
description: "Perform a browser action like clicking, typing, or navigating using natural language.",
|
|
10
|
-
inputSchema: z.object({
|
|
11
|
-
instruction: z
|
|
12
|
-
.string()
|
|
13
|
-
.describe("The action to perform, e.g. 'click the login button' or 'type pizza in the search box'"),
|
|
14
|
-
}),
|
|
15
|
-
},
|
|
16
|
-
browser_extract: {
|
|
17
|
-
description: "Extract structured data from the page using natural language instructions.",
|
|
18
|
-
inputSchema: z.object({
|
|
19
|
-
instruction: z
|
|
20
|
-
.string()
|
|
21
|
-
.describe("What data to extract, e.g. 'get all product titles and prices'"),
|
|
22
|
-
}),
|
|
23
|
-
},
|
|
24
|
-
browser_observe: {
|
|
25
|
-
description: "Observe the current page and get a list of possible actions in natural language.",
|
|
26
|
-
inputSchema: z.object({}),
|
|
27
|
-
},
|
|
28
|
-
browser_state_update: {
|
|
29
|
-
description: "Get a fresh screenshot and page info from the browser.",
|
|
30
|
-
inputSchema: z.object({}),
|
|
31
|
-
},
|
|
32
|
-
};
|
|
33
|
-
// ---------------------------------------------------------------------------
|
|
34
|
-
// BrowserManager – lifecycle for Playwright browser / context / pages
|
|
35
|
-
// ---------------------------------------------------------------------------
|
|
36
|
-
class BrowserManager {
|
|
37
|
-
constructor(options) {
|
|
38
|
-
this.options = options;
|
|
39
|
-
this.pages = [];
|
|
40
|
-
this.headless = options.headless ?? true;
|
|
41
|
-
}
|
|
42
|
-
async ensureBrowser(headlessOverride) {
|
|
43
|
-
if (headlessOverride !== undefined)
|
|
44
|
-
this.headless = headlessOverride;
|
|
45
|
-
if (this.browser)
|
|
46
|
-
return this.browser;
|
|
47
|
-
if (this.options.userDataDir) {
|
|
48
|
-
this.context = await chromium.launchPersistentContext(this.options.userDataDir, { headless: this.headless, channel: this.options.channel });
|
|
49
|
-
this.browser = this.context.browser() ?? undefined;
|
|
50
|
-
this.pages = this.context.pages();
|
|
51
|
-
}
|
|
52
|
-
else {
|
|
53
|
-
this.browser = await chromium.launch({
|
|
54
|
-
headless: this.headless,
|
|
55
|
-
channel: this.options.channel,
|
|
56
|
-
});
|
|
57
|
-
this.context = await this.browser.newContext();
|
|
58
|
-
}
|
|
59
|
-
return this.browser;
|
|
60
|
-
}
|
|
61
|
-
async ensurePage(headlessOverride) {
|
|
62
|
-
await this.ensureBrowser(headlessOverride);
|
|
63
|
-
if (this.pages.length === 0) {
|
|
64
|
-
const page = await this.context.newPage();
|
|
65
|
-
this.pages.push(page);
|
|
66
|
-
}
|
|
67
|
-
return this.pages[0];
|
|
68
|
-
}
|
|
69
|
-
getPages() {
|
|
70
|
-
return this.pages;
|
|
71
|
-
}
|
|
72
|
-
isHeadless() {
|
|
73
|
-
return this.headless;
|
|
74
|
-
}
|
|
75
|
-
async relaunch(headless) {
|
|
76
|
-
await this.cleanup();
|
|
77
|
-
await this.ensureBrowser(headless);
|
|
78
|
-
}
|
|
79
|
-
async cleanup() {
|
|
80
|
-
if (this.context)
|
|
81
|
-
await this.context.close();
|
|
82
|
-
else if (this.browser)
|
|
83
|
-
await this.browser.close();
|
|
84
|
-
this.browser = undefined;
|
|
85
|
-
this.context = undefined;
|
|
86
|
-
this.pages = [];
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
// ---------------------------------------------------------------------------
|
|
90
|
-
// SmartBrowser – LLM-powered act / observe / extract
|
|
91
|
-
// ---------------------------------------------------------------------------
|
|
92
|
-
class SmartBrowser {
|
|
93
|
-
constructor(model, manager) {
|
|
94
|
-
this.model = model;
|
|
95
|
-
this.manager = manager;
|
|
96
|
-
}
|
|
97
|
-
// -- helpers --------------------------------------------------------------
|
|
98
|
-
async waitForStable(page) {
|
|
99
|
-
try {
|
|
100
|
-
await page.waitForLoadState("domcontentloaded", { timeout: 3000 });
|
|
101
|
-
await page
|
|
102
|
-
.waitForLoadState("networkidle", { timeout: 3000 })
|
|
103
|
-
.catch(() => { });
|
|
104
|
-
await page
|
|
105
|
-
.waitForFunction(() => !document.querySelector('[aria-busy="true"], .loading, .spinner'), { timeout: 2000 })
|
|
106
|
-
.catch(() => { });
|
|
107
|
-
}
|
|
108
|
-
catch {
|
|
109
|
-
/* best effort */
|
|
110
|
-
}
|
|
111
|
-
await page.waitForTimeout(500);
|
|
112
|
-
}
|
|
113
|
-
async clickById(page, id) {
|
|
114
|
-
const loc = page.locator(`[data-melony-id="${id}"]`).first();
|
|
115
|
-
await loc.waitFor({ state: "attached", timeout: 10000 });
|
|
116
|
-
await loc.scrollIntoViewIfNeeded({ timeout: 5000 }).catch(() => { });
|
|
117
|
-
await loc.click({ timeout: 10000 });
|
|
118
|
-
}
|
|
119
|
-
async typeById(page, id, text) {
|
|
120
|
-
const loc = page.locator(`[data-melony-id="${id}"]`).first();
|
|
121
|
-
await loc.waitFor({ state: "attached", timeout: 10000 });
|
|
122
|
-
await loc.scrollIntoViewIfNeeded({ timeout: 5000 }).catch(() => { });
|
|
123
|
-
await loc.fill(text, { timeout: 10000 });
|
|
124
|
-
}
|
|
125
|
-
async scroll(page, direction) {
|
|
126
|
-
await page.evaluate((dir) => {
|
|
127
|
-
const amount = dir === "up"
|
|
128
|
-
? -window.innerHeight * 0.8
|
|
129
|
-
: window.innerHeight * 0.8;
|
|
130
|
-
window.scrollBy(0, amount);
|
|
131
|
-
}, direction);
|
|
132
|
-
await page.waitForTimeout(500);
|
|
133
|
-
}
|
|
134
|
-
// -- page map (injects data-melony-id & builds semantic tree) -------------
|
|
135
|
-
async getPageMap(page) {
|
|
136
|
-
// Polyfill esbuild/tsx __name helper (it doesn't exist in browser context)
|
|
137
|
-
await page.evaluate('window.__name=window.__name||function(t){return t}');
|
|
138
|
-
// Inject IDs on interactive elements
|
|
139
|
-
await page.evaluate(() => {
|
|
140
|
-
document
|
|
141
|
-
.querySelectorAll("[data-melony-id]")
|
|
142
|
-
.forEach((el) => el.removeAttribute("data-melony-id"));
|
|
143
|
-
const selector = 'button, a, input, select, textarea, [role="button"], [role="link"], ' +
|
|
144
|
-
'[role="checkbox"], [role="menuitem"], [role="tab"], [role="treeitem"], ' +
|
|
145
|
-
'[role="option"], [role="switch"], [role="radio"], [contenteditable="true"]';
|
|
146
|
-
let id = 0;
|
|
147
|
-
document.querySelectorAll(selector).forEach((el) => {
|
|
148
|
-
const s = window.getComputedStyle(el);
|
|
149
|
-
if (s.display === "none" || s.visibility === "hidden")
|
|
150
|
-
return;
|
|
151
|
-
el.setAttribute("data-melony-id", String(id++));
|
|
152
|
-
});
|
|
153
|
-
});
|
|
154
|
-
// Build semantic page map
|
|
155
|
-
return page.evaluate(() => {
|
|
156
|
-
const vW = window.innerWidth;
|
|
157
|
-
const vH = window.innerHeight;
|
|
158
|
-
const scrollY = window.scrollY;
|
|
159
|
-
const totalHeight = document.body.scrollHeight;
|
|
160
|
-
const MAX_NODES = 1500;
|
|
161
|
-
let count = 0;
|
|
162
|
-
const sections = {
|
|
163
|
-
header: [],
|
|
164
|
-
sidebar: [],
|
|
165
|
-
navigation: [],
|
|
166
|
-
main: [],
|
|
167
|
-
footer: [],
|
|
168
|
-
popups: [],
|
|
169
|
-
other: [],
|
|
170
|
-
};
|
|
171
|
-
const classifySection = (el, style) => {
|
|
172
|
-
const tag = el.tagName;
|
|
173
|
-
const role = el.getAttribute("role");
|
|
174
|
-
const id = (el.id || "").toLowerCase();
|
|
175
|
-
const cls = (typeof el.className === "string" ? el.className : "").toLowerCase();
|
|
176
|
-
if (tag === "HEADER" || role === "banner")
|
|
177
|
-
return "header";
|
|
178
|
-
if (tag === "FOOTER" || role === "contentinfo")
|
|
179
|
-
return "footer";
|
|
180
|
-
if (tag === "NAV" || role === "navigation")
|
|
181
|
-
return "navigation";
|
|
182
|
-
if (tag === "MAIN" || role === "main")
|
|
183
|
-
return "main";
|
|
184
|
-
if (tag === "ASIDE" ||
|
|
185
|
-
role === "complementary" ||
|
|
186
|
-
id.includes("sidebar") ||
|
|
187
|
-
cls.includes("sidebar"))
|
|
188
|
-
return "sidebar";
|
|
189
|
-
if (role === "dialog" ||
|
|
190
|
-
role === "alertdialog" ||
|
|
191
|
-
cls.includes("modal") ||
|
|
192
|
-
cls.includes("popup"))
|
|
193
|
-
return "popups";
|
|
194
|
-
if (style.position === "fixed" || style.position === "sticky") {
|
|
195
|
-
const r = el.getBoundingClientRect();
|
|
196
|
-
if (r.top <= 50 && r.width > vW * 0.8)
|
|
197
|
-
return "header";
|
|
198
|
-
if (r.bottom >= vH - 50 && r.width > vW * 0.8)
|
|
199
|
-
return "footer";
|
|
200
|
-
}
|
|
201
|
-
return null;
|
|
202
|
-
};
|
|
203
|
-
const buildTree = (el, depth = 0, currentSection) => {
|
|
204
|
-
if (count > MAX_NODES || depth > 15)
|
|
205
|
-
return null;
|
|
206
|
-
if (["SCRIPT", "STYLE", "NOSCRIPT", "SVG", "IFRAME"].includes(el.tagName))
|
|
207
|
-
return null;
|
|
208
|
-
const style = window.getComputedStyle(el);
|
|
209
|
-
if (style.display === "none" ||
|
|
210
|
-
style.visibility === "hidden" ||
|
|
211
|
-
el.getAttribute("aria-hidden") === "true")
|
|
212
|
-
return null;
|
|
213
|
-
const rect = el.getBoundingClientRect();
|
|
214
|
-
const inViewport = rect.bottom > 0 &&
|
|
215
|
-
rect.right > 0 &&
|
|
216
|
-
rect.top < vH &&
|
|
217
|
-
rect.left < vW;
|
|
218
|
-
if (!inViewport && rect.top > vH * 3)
|
|
219
|
-
return null;
|
|
220
|
-
const melonyId = el.getAttribute("data-melony-id");
|
|
221
|
-
const isInteractive = !!melonyId;
|
|
222
|
-
const role = el.getAttribute("role");
|
|
223
|
-
const isHeading = /^H[1-6]$/.test(el.tagName);
|
|
224
|
-
const directText = Array.from(el.childNodes)
|
|
225
|
-
.filter((n) => n.nodeType === 3 && n.textContent?.trim())
|
|
226
|
-
.map((n) => n.textContent.trim())
|
|
227
|
-
.join(" ");
|
|
228
|
-
const section = depth < 6 ? classifySection(el, style) : null;
|
|
229
|
-
const active = section || currentSection || "other";
|
|
230
|
-
const children = Array.from(el.children)
|
|
231
|
-
.map((c) => buildTree(c, depth + 1, active))
|
|
232
|
-
.filter(Boolean);
|
|
233
|
-
const interestingRoles = [
|
|
234
|
-
"heading",
|
|
235
|
-
"img",
|
|
236
|
-
"alert",
|
|
237
|
-
"dialog",
|
|
238
|
-
"status",
|
|
239
|
-
"gridcell",
|
|
240
|
-
"list",
|
|
241
|
-
];
|
|
242
|
-
const isInteresting = isInteractive ||
|
|
243
|
-
isHeading ||
|
|
244
|
-
directText ||
|
|
245
|
-
children.length > 0 ||
|
|
246
|
-
interestingRoles.includes(role || "");
|
|
247
|
-
if (!isInteresting &&
|
|
248
|
-
!el.getAttribute("aria-label") &&
|
|
249
|
-
!el.getAttribute("title"))
|
|
250
|
-
return null;
|
|
251
|
-
// Flatten container-only nodes
|
|
252
|
-
if (!isInteractive &&
|
|
253
|
-
!directText &&
|
|
254
|
-
children.length === 1 &&
|
|
255
|
-
!section &&
|
|
256
|
-
!isHeading &&
|
|
257
|
-
!role)
|
|
258
|
-
return children[0];
|
|
259
|
-
count++;
|
|
260
|
-
const node = {
|
|
261
|
-
tag: el.tagName,
|
|
262
|
-
id: melonyId || undefined,
|
|
263
|
-
text: isInteractive || isHeading
|
|
264
|
-
? el.innerText?.trim().slice(0, 100)
|
|
265
|
-
: directText || undefined,
|
|
266
|
-
role: role || undefined,
|
|
267
|
-
label: el.getAttribute("aria-label") ||
|
|
268
|
-
el.getAttribute("title") ||
|
|
269
|
-
undefined,
|
|
270
|
-
inViewport,
|
|
271
|
-
};
|
|
272
|
-
if (el.tagName === "INPUT") {
|
|
273
|
-
node.type = el.type;
|
|
274
|
-
node.value = el.value;
|
|
275
|
-
}
|
|
276
|
-
if (children.length > 0)
|
|
277
|
-
node.children = children;
|
|
278
|
-
if (section && depth < 6) {
|
|
279
|
-
sections[section].push(node);
|
|
280
|
-
return null;
|
|
281
|
-
}
|
|
282
|
-
return node;
|
|
283
|
-
};
|
|
284
|
-
const remaining = buildTree(document.body);
|
|
285
|
-
if (remaining)
|
|
286
|
-
sections.other.push(remaining);
|
|
287
|
-
return {
|
|
288
|
-
url: window.location.href,
|
|
289
|
-
title: document.title,
|
|
290
|
-
scroll: {
|
|
291
|
-
y: scrollY,
|
|
292
|
-
percentage: Math.round((scrollY / (totalHeight - vH || 1)) * 100),
|
|
293
|
-
totalHeight,
|
|
294
|
-
},
|
|
295
|
-
sections: Object.fromEntries(Object.entries(sections).filter(([, v]) => v.length > 0)),
|
|
296
|
-
};
|
|
297
|
-
});
|
|
298
|
-
}
|
|
299
|
-
async act(page, instruction) {
|
|
300
|
-
if (!this.model)
|
|
301
|
-
throw new Error("LanguageModel required for 'act'");
|
|
302
|
-
const run = async (retry = 0, lastError) => {
|
|
303
|
-
await this.waitForStable(page);
|
|
304
|
-
const screenshot = await page
|
|
305
|
-
.screenshot({ type: "jpeg", quality: 60 })
|
|
306
|
-
.catch(() => null);
|
|
307
|
-
const state = await this.getPageMap(page);
|
|
308
|
-
const { output } = await generateText({
|
|
309
|
-
model: this.model,
|
|
310
|
-
output: Output.object({ schema: SmartBrowser.actionSchema }),
|
|
311
|
-
messages: [
|
|
312
|
-
{
|
|
313
|
-
role: "system",
|
|
314
|
-
content: `You are an expert browser agent. Your task: "${instruction}"
|
|
315
|
-
Current URL: ${state.url} | Title: ${state.title} | Scroll: ${state.scroll.percentage}%
|
|
316
|
-
|
|
317
|
-
The DOM is grouped by semantic sections. Elements have numeric IDs for clicking/typing.
|
|
318
|
-
- Prefer elements in 'main' or 'navigation'.
|
|
319
|
-
- If the target isn't visible, scroll or look for inViewport:false elements.
|
|
320
|
-
- Handle popups/modals first if present.
|
|
321
|
-
- Use action 'done' when the task is complete.`,
|
|
322
|
-
},
|
|
323
|
-
{
|
|
324
|
-
role: "user",
|
|
325
|
-
content: [
|
|
326
|
-
{
|
|
327
|
-
type: "text",
|
|
328
|
-
text: `DOM:\n${JSON.stringify(state.sections, null, 2)}`,
|
|
329
|
-
},
|
|
330
|
-
...(screenshot
|
|
331
|
-
? [{ type: "image", image: screenshot }]
|
|
332
|
-
: []),
|
|
333
|
-
...(lastError
|
|
334
|
-
? [{ type: "text", text: `Previous error: ${lastError}` }]
|
|
335
|
-
: []),
|
|
336
|
-
],
|
|
337
|
-
},
|
|
338
|
-
],
|
|
339
|
-
});
|
|
340
|
-
if (!output)
|
|
341
|
-
throw new Error("LLM returned no structured output");
|
|
342
|
-
if (output.action === "done")
|
|
343
|
-
return { success: true, message: output.reasoning };
|
|
344
|
-
try {
|
|
345
|
-
switch (output.action) {
|
|
346
|
-
case "click":
|
|
347
|
-
await this.clickById(page, output.elementId);
|
|
348
|
-
break;
|
|
349
|
-
case "type":
|
|
350
|
-
await this.typeById(page, output.elementId, output.text);
|
|
351
|
-
break;
|
|
352
|
-
case "press":
|
|
353
|
-
await page.keyboard.press(output.key);
|
|
354
|
-
break;
|
|
355
|
-
case "scroll":
|
|
356
|
-
await this.scroll(page, output.direction || "down");
|
|
357
|
-
break;
|
|
358
|
-
case "navigate":
|
|
359
|
-
await page.goto(output.url);
|
|
360
|
-
break;
|
|
361
|
-
case "wait":
|
|
362
|
-
await page.waitForTimeout(2000);
|
|
363
|
-
break;
|
|
364
|
-
}
|
|
365
|
-
return { action: output.action, reasoning: output.reasoning };
|
|
366
|
-
}
|
|
367
|
-
catch (e) {
|
|
368
|
-
if (retry < 1)
|
|
369
|
-
return run(retry + 1, e.message);
|
|
370
|
-
throw e;
|
|
371
|
-
}
|
|
372
|
-
};
|
|
373
|
-
return run();
|
|
374
|
-
}
|
|
375
|
-
// -- observe (LLM suggests possible actions) ------------------------------
|
|
376
|
-
async observe(page) {
|
|
377
|
-
if (!this.model)
|
|
378
|
-
throw new Error("LanguageModel required for 'observe'");
|
|
379
|
-
await this.waitForStable(page);
|
|
380
|
-
const state = await this.getPageMap(page);
|
|
381
|
-
const screenshot = await page
|
|
382
|
-
.screenshot({ type: "jpeg", quality: 60 })
|
|
383
|
-
.catch(() => null);
|
|
384
|
-
const { output } = await generateText({
|
|
385
|
-
model: this.model,
|
|
386
|
-
output: Output.object({
|
|
387
|
-
schema: z.object({
|
|
388
|
-
observations: z.array(z.string().describe("Natural language instruction for 'act'")),
|
|
389
|
-
}),
|
|
390
|
-
}),
|
|
391
|
-
messages: [
|
|
392
|
-
{
|
|
393
|
-
role: "system",
|
|
394
|
-
content: `You are an expert browser analyst.
|
|
395
|
-
Current URL: ${state.url} | Title: ${state.title} | Scroll: ${state.scroll.percentage}%
|
|
396
|
-
|
|
397
|
-
Suggest 5 logical, high-level actions a user might want to take.
|
|
398
|
-
Each should be a natural language instruction passable to 'browser_act'.
|
|
399
|
-
Focus on primary content and navigation. Handle modals first if open.`,
|
|
400
|
-
},
|
|
401
|
-
{
|
|
402
|
-
role: "user",
|
|
403
|
-
content: [
|
|
404
|
-
{
|
|
405
|
-
type: "text",
|
|
406
|
-
text: `DOM:\n${JSON.stringify(state.sections, null, 2)}`,
|
|
407
|
-
},
|
|
408
|
-
...(screenshot
|
|
409
|
-
? [{ type: "image", image: screenshot }]
|
|
410
|
-
: []),
|
|
411
|
-
],
|
|
412
|
-
},
|
|
413
|
-
],
|
|
414
|
-
});
|
|
415
|
-
return output;
|
|
416
|
-
}
|
|
417
|
-
// -- extract (LLM pulls structured data from page text) -------------------
|
|
418
|
-
async extract(page, instruction) {
|
|
419
|
-
if (!this.model)
|
|
420
|
-
throw new Error("LanguageModel required for 'extract'");
|
|
421
|
-
await this.waitForStable(page);
|
|
422
|
-
const content = await page.evaluate(() => document.body.innerText);
|
|
423
|
-
const { output } = await generateText({
|
|
424
|
-
model: this.model,
|
|
425
|
-
output: Output.object({
|
|
426
|
-
schema: z.object({
|
|
427
|
-
data: z.string(),
|
|
428
|
-
confidence: z.number(),
|
|
429
|
-
}),
|
|
430
|
-
}),
|
|
431
|
-
prompt: `Extract "${instruction}" from:\n${content.slice(0, 15000)}`,
|
|
432
|
-
});
|
|
433
|
-
if (!output)
|
|
434
|
-
throw new Error("LLM returned no structured output");
|
|
435
|
-
try {
|
|
436
|
-
return { ...output, data: JSON.parse(output.data) };
|
|
437
|
-
}
|
|
438
|
-
catch {
|
|
439
|
-
return output;
|
|
440
|
-
}
|
|
441
|
-
}
|
|
442
|
-
}
|
|
443
|
-
// -- act (LLM decides what to do) ----------------------------------------
|
|
444
|
-
SmartBrowser.actionSchema = z.object({
|
|
445
|
-
action: z.enum([
|
|
446
|
-
"click",
|
|
447
|
-
"type",
|
|
448
|
-
"press",
|
|
449
|
-
"wait",
|
|
450
|
-
"navigate",
|
|
451
|
-
"scroll",
|
|
452
|
-
"done",
|
|
453
|
-
]),
|
|
454
|
-
elementId: z.string().nullable().describe("data-melony-id of the element"),
|
|
455
|
-
text: z.string().nullable().describe("Text for 'type' action"),
|
|
456
|
-
key: z.string().nullable().describe("Key for 'press' action"),
|
|
457
|
-
url: z.string().nullable().describe("URL for 'navigate' action"),
|
|
458
|
-
direction: z.enum(["up", "down"]).nullable(),
|
|
459
|
-
reasoning: z.string().describe("Brief explanation"),
|
|
460
|
-
});
|
|
461
|
-
// ---------------------------------------------------------------------------
|
|
462
|
-
// Plugin factory
|
|
463
|
-
// ---------------------------------------------------------------------------
|
|
464
|
-
export const browserPlugin = (options = {}) => {
|
|
465
|
-
const manager = new BrowserManager(options);
|
|
466
|
-
const smart = new SmartBrowser(options.model, manager);
|
|
467
|
-
return (builder) => {
|
|
468
|
-
// -- helpers ------------------------------------------------------------
|
|
469
|
-
async function* yieldState(page, opts = { screenshot: true }) {
|
|
470
|
-
try {
|
|
471
|
-
const url = page.url();
|
|
472
|
-
const title = await page.title();
|
|
473
|
-
let base64;
|
|
474
|
-
if (opts.screenshot) {
|
|
475
|
-
const buf = await page
|
|
476
|
-
.screenshot({ type: "jpeg", quality: 60 })
|
|
477
|
-
.catch(() => null);
|
|
478
|
-
if (buf)
|
|
479
|
-
base64 = buf.toString("base64");
|
|
480
|
-
}
|
|
481
|
-
yield {
|
|
482
|
-
type: "browser:state-update",
|
|
483
|
-
data: {
|
|
484
|
-
url,
|
|
485
|
-
title,
|
|
486
|
-
screenshot: base64,
|
|
487
|
-
pagesCount: manager.getPages().length,
|
|
488
|
-
},
|
|
489
|
-
};
|
|
490
|
-
}
|
|
491
|
-
catch (e) {
|
|
492
|
-
console.error("Browser state update failed", e);
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
function actionResult(action, toolCallId, data) {
|
|
496
|
-
return {
|
|
497
|
-
type: "action:taskResult",
|
|
498
|
-
data: { action, toolCallId, result: data },
|
|
499
|
-
};
|
|
500
|
-
}
|
|
501
|
-
// -- browser_act --------------------------------------------------------
|
|
502
|
-
builder.on("action:browser_act", async function* (event) {
|
|
503
|
-
const { toolCallId, instruction } = event.data;
|
|
504
|
-
yield {
|
|
505
|
-
type: "browser:status",
|
|
506
|
-
data: { message: `Performing: ${instruction}` },
|
|
507
|
-
};
|
|
508
|
-
try {
|
|
509
|
-
const page = await manager.ensurePage();
|
|
510
|
-
const res = await smart.act(page, instruction);
|
|
511
|
-
yield* yieldState(page);
|
|
512
|
-
yield actionResult("browser_act", toolCallId, { success: true, ...res });
|
|
513
|
-
}
|
|
514
|
-
catch (error) {
|
|
515
|
-
yield actionResult("browser_act", toolCallId, { error: error.message });
|
|
516
|
-
}
|
|
517
|
-
});
|
|
518
|
-
// -- browser_extract ----------------------------------------------------
|
|
519
|
-
builder.on("action:browser_extract", async function* (event) {
|
|
520
|
-
const { toolCallId, instruction } = event.data;
|
|
521
|
-
yield {
|
|
522
|
-
type: "browser:status",
|
|
523
|
-
data: { message: `Extracting: ${instruction}` },
|
|
524
|
-
};
|
|
525
|
-
try {
|
|
526
|
-
const page = await manager.ensurePage();
|
|
527
|
-
const res = await smart.extract(page, instruction);
|
|
528
|
-
yield {
|
|
529
|
-
type: "browser:status",
|
|
530
|
-
data: {
|
|
531
|
-
message: `Extracted: ${JSON.stringify(res, null, 2)}`,
|
|
532
|
-
severity: "success",
|
|
533
|
-
},
|
|
534
|
-
};
|
|
535
|
-
yield* yieldState(page, { screenshot: false });
|
|
536
|
-
yield actionResult("browser_extract", toolCallId, { success: true, ...res });
|
|
537
|
-
}
|
|
538
|
-
catch (error) {
|
|
539
|
-
yield actionResult("browser_extract", toolCallId, { error: error.message });
|
|
540
|
-
}
|
|
541
|
-
});
|
|
542
|
-
// -- browser_observe ----------------------------------------------------
|
|
543
|
-
builder.on("action:browser_observe", async function* (event) {
|
|
544
|
-
const { toolCallId } = event.data;
|
|
545
|
-
yield {
|
|
546
|
-
type: "browser:status",
|
|
547
|
-
data: { message: "Observing page…" },
|
|
548
|
-
};
|
|
549
|
-
try {
|
|
550
|
-
const page = await manager.ensurePage();
|
|
551
|
-
const res = await smart.observe(page);
|
|
552
|
-
const observations = res?.observations;
|
|
553
|
-
yield {
|
|
554
|
-
type: "browser:status",
|
|
555
|
-
data: {
|
|
556
|
-
message: observations
|
|
557
|
-
? `Possible actions:\n${observations.map((o) => `• ${o}`).join("\n")}`
|
|
558
|
-
: `Observations: ${JSON.stringify(res, null, 2)}`,
|
|
559
|
-
severity: "success",
|
|
560
|
-
},
|
|
561
|
-
};
|
|
562
|
-
yield* yieldState(page);
|
|
563
|
-
yield actionResult("browser_observe", toolCallId, { success: true, ...res });
|
|
564
|
-
}
|
|
565
|
-
catch (error) {
|
|
566
|
-
yield actionResult("browser_observe", toolCallId, { error: error.message });
|
|
567
|
-
}
|
|
568
|
-
});
|
|
569
|
-
// -- browser_state_update -----------------------------------------------
|
|
570
|
-
builder.on("action:browser_state_update", async function* (event) {
|
|
571
|
-
const { toolCallId } = event.data;
|
|
572
|
-
yield {
|
|
573
|
-
type: "browser:status",
|
|
574
|
-
data: { message: "Updating browser state…" },
|
|
575
|
-
};
|
|
576
|
-
try {
|
|
577
|
-
const page = await manager.ensurePage();
|
|
578
|
-
yield* yieldState(page);
|
|
579
|
-
yield actionResult("browser_state_update", toolCallId, { success: true });
|
|
580
|
-
}
|
|
581
|
-
catch (error) {
|
|
582
|
-
yield actionResult("browser_state_update", toolCallId, {
|
|
583
|
-
error: error.message,
|
|
584
|
-
});
|
|
585
|
-
}
|
|
586
|
-
});
|
|
587
|
-
// -- browser_cleanup (internal) -----------------------------------------
|
|
588
|
-
builder.on("action:browser_cleanup", async function* (event) {
|
|
589
|
-
const { toolCallId } = event.data;
|
|
590
|
-
try {
|
|
591
|
-
await manager.cleanup();
|
|
592
|
-
yield actionResult("browser_cleanup", toolCallId, {
|
|
593
|
-
success: true,
|
|
594
|
-
message: "Browser closed",
|
|
595
|
-
});
|
|
596
|
-
}
|
|
597
|
-
catch (error) {
|
|
598
|
-
yield actionResult("browser_cleanup", toolCallId, { error: error.message });
|
|
599
|
-
}
|
|
600
|
-
});
|
|
601
|
-
// -- browser_show (internal – switch to headed mode) --------------------
|
|
602
|
-
builder.on("action:browser_show", async function* (event) {
|
|
603
|
-
const { toolCallId } = event.data;
|
|
604
|
-
yield {
|
|
605
|
-
type: "browser:status",
|
|
606
|
-
data: { message: "Showing browser…" },
|
|
607
|
-
};
|
|
608
|
-
try {
|
|
609
|
-
const page = await manager.ensurePage();
|
|
610
|
-
let activePage = page;
|
|
611
|
-
let res = { message: "Browser is active", url: page.url() };
|
|
612
|
-
if (manager.isHeadless()) {
|
|
613
|
-
const url = page.url();
|
|
614
|
-
await manager.relaunch(false);
|
|
615
|
-
const newPage = await manager.ensurePage(false);
|
|
616
|
-
activePage = newPage;
|
|
617
|
-
if (url)
|
|
618
|
-
await newPage.goto(url);
|
|
619
|
-
res = { message: "Browser opened (headed)", url: newPage.url() };
|
|
620
|
-
}
|
|
621
|
-
yield* yieldState(activePage);
|
|
622
|
-
yield actionResult("browser_show", toolCallId, { success: true, ...res });
|
|
623
|
-
}
|
|
624
|
-
catch (error) {
|
|
625
|
-
yield actionResult("browser_show", toolCallId, { error: error.message });
|
|
626
|
-
}
|
|
627
|
-
});
|
|
628
|
-
};
|
|
629
|
-
};
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import { ui } from "@melony/ui-kit/server";
|
|
2
|
-
export const browserUIPlugin = () => (builder) => {
|
|
3
|
-
builder.on("browser:status", async function* (event) {
|
|
4
|
-
yield ui.event(ui.status(event.data.message, event.data.severity));
|
|
5
|
-
});
|
|
6
|
-
builder.on("browser:state-update", async function* (event) {
|
|
7
|
-
if (event.data.screenshot) {
|
|
8
|
-
yield ui.event(ui.resourceCard(event.data.title, event.data.url, [
|
|
9
|
-
ui.image(`data:image/jpeg;base64,${event.data.screenshot}`),
|
|
10
|
-
]));
|
|
11
|
-
}
|
|
12
|
-
});
|
|
13
|
-
};
|