openbot 0.2.11 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/.prettierrc +8 -0
  2. package/AGENTS.md +68 -0
  3. package/CONTRIBUTING.md +74 -0
  4. package/LICENSE +21 -0
  5. package/README.md +117 -14
  6. package/dist/agents/system.js +106 -0
  7. package/dist/app/cli.js +27 -0
  8. package/dist/app/config.js +64 -0
  9. package/dist/app/server.js +237 -0
  10. package/dist/app/utils.js +35 -0
  11. package/dist/harness/agent-harness.js +45 -0
  12. package/dist/harness/mcp.js +61 -0
  13. package/dist/harness/orchestrator.js +273 -0
  14. package/dist/harness/process.js +7 -0
  15. package/dist/plugins/ai-sdk.js +141 -0
  16. package/dist/plugins/delegation.js +52 -0
  17. package/dist/plugins/mcp.js +140 -0
  18. package/dist/plugins/storage.js +502 -0
  19. package/dist/plugins/ui.js +47 -0
  20. package/dist/registry/plugins.js +73 -0
  21. package/dist/services/storage.js +724 -0
  22. package/docs/README.md +7 -0
  23. package/docs/agents.md +83 -0
  24. package/docs/architecture.md +34 -0
  25. package/docs/plugins.md +77 -0
  26. package/logo-black.png +0 -0
  27. package/{dist/assets/logo.js → logo-black.svg} +24 -24
  28. package/{dist/ui/sidebar.js → logo-white.svg} +23 -88
  29. package/package.json +10 -9
  30. package/src/agents/system.ts +112 -0
  31. package/src/app/cli.ts +38 -0
  32. package/src/app/config.ts +104 -0
  33. package/src/app/server.ts +284 -0
  34. package/src/app/types.ts +476 -0
  35. package/src/app/utils.ts +43 -0
  36. package/src/assets/icon.svg +1 -0
  37. package/src/harness/agent-harness.ts +58 -0
  38. package/src/harness/mcp.ts +78 -0
  39. package/src/harness/orchestrator.ts +342 -0
  40. package/src/harness/process.ts +9 -0
  41. package/src/harness/types.ts +34 -0
  42. package/src/plugins/ai-sdk.ts +197 -0
  43. package/src/plugins/delegation.ts +60 -0
  44. package/src/plugins/mcp.ts +154 -0
  45. package/src/plugins/storage.ts +725 -0
  46. package/src/plugins/ui.ts +57 -0
  47. package/src/registry/plugins.ts +85 -0
  48. package/src/services/storage.ts +957 -0
  49. package/tsconfig.json +18 -0
  50. package/dist/agents/agent-creator.js +0 -74
  51. package/dist/agents/browser-agent.js +0 -31
  52. package/dist/agents/os-agent.js +0 -32
  53. package/dist/agents/planner-agent.js +0 -32
  54. package/dist/agents/topic-agent.js +0 -46
  55. package/dist/architecture/execution-engine.js +0 -151
  56. package/dist/architecture/intent-classifier.js +0 -26
  57. package/dist/architecture/planner.js +0 -106
  58. package/dist/automation-worker.js +0 -121
  59. package/dist/automations.js +0 -52
  60. package/dist/cli.js +0 -275
  61. package/dist/config.js +0 -53
  62. package/dist/core/agents.js +0 -41
  63. package/dist/core/delegation.js +0 -230
  64. package/dist/core/manager.js +0 -96
  65. package/dist/core/plugins.js +0 -74
  66. package/dist/core/router.js +0 -191
  67. package/dist/handlers/init.js +0 -29
  68. package/dist/handlers/session-change.js +0 -21
  69. package/dist/handlers/settings.js +0 -47
  70. package/dist/handlers/tab-change.js +0 -14
  71. package/dist/installers.js +0 -156
  72. package/dist/marketplace.js +0 -80
  73. package/dist/model-catalog.js +0 -132
  74. package/dist/model-defaults.js +0 -25
  75. package/dist/models.js +0 -47
  76. package/dist/open-bot.js +0 -51
  77. package/dist/orchestrator/direct-invocation.js +0 -13
  78. package/dist/orchestrator/events.js +0 -36
  79. package/dist/orchestrator/state.js +0 -54
  80. package/dist/orchestrator.js +0 -422
  81. package/dist/plugins/agent/index.js +0 -81
  82. package/dist/plugins/approval/index.js +0 -100
  83. package/dist/plugins/brain/identity.js +0 -77
  84. package/dist/plugins/brain/index.js +0 -204
  85. package/dist/plugins/brain/memory.js +0 -120
  86. package/dist/plugins/brain/prompt.js +0 -46
  87. package/dist/plugins/brain/types.js +0 -45
  88. package/dist/plugins/brain/ui.js +0 -7
  89. package/dist/plugins/browser/index.js +0 -629
  90. package/dist/plugins/browser/ui.js +0 -13
  91. package/dist/plugins/file-system/index.js +0 -171
  92. package/dist/plugins/file-system/ui.js +0 -6
  93. package/dist/plugins/llm/context-budget.js +0 -139
  94. package/dist/plugins/llm/context-shaping.js +0 -177
  95. package/dist/plugins/llm/index.js +0 -380
  96. package/dist/plugins/memory/index.js +0 -220
  97. package/dist/plugins/memory/memory.js +0 -122
  98. package/dist/plugins/memory/prompt.js +0 -55
  99. package/dist/plugins/memory/types.js +0 -45
  100. package/dist/plugins/meta-agent/index.js +0 -570
  101. package/dist/plugins/meta-agent/ui.js +0 -11
  102. package/dist/plugins/shell/index.js +0 -100
  103. package/dist/plugins/shell/ui.js +0 -6
  104. package/dist/plugins/skills/index.js +0 -286
  105. package/dist/plugins/skills/types.js +0 -50
  106. package/dist/plugins/skills/ui.js +0 -12
  107. package/dist/registry/agent-registry.js +0 -35
  108. package/dist/registry/index.js +0 -2
  109. package/dist/registry/plugin-loader.js +0 -499
  110. package/dist/registry/plugin-registry.js +0 -44
  111. package/dist/registry/ts-agent-loader.js +0 -82
  112. package/dist/registry/yaml-agent-loader.js +0 -246
  113. package/dist/runtime/execution-trace.js +0 -41
  114. package/dist/runtime/intent-routing.js +0 -26
  115. package/dist/runtime/openbot-runtime.js +0 -354
  116. package/dist/server.js +0 -890
  117. package/dist/session.js +0 -179
  118. package/dist/ui/block.js +0 -12
  119. package/dist/ui/header.js +0 -52
  120. package/dist/ui/layout.js +0 -26
  121. package/dist/ui/navigation.js +0 -15
  122. package/dist/ui/settings.js +0 -106
  123. package/dist/ui/skills.js +0 -7
  124. package/dist/ui/thread.js +0 -16
  125. package/dist/ui/widgets/action-list.js +0 -2
  126. package/dist/ui/widgets/approval-card.js +0 -9
  127. package/dist/ui/widgets/code-snippet.js +0 -2
  128. package/dist/ui/widgets/data-block.js +0 -2
  129. package/dist/ui/widgets/data-table.js +0 -2
  130. package/dist/ui/widgets/delegation.js +0 -29
  131. package/dist/ui/widgets/empty-state.js +0 -2
  132. package/dist/ui/widgets/index.js +0 -23
  133. package/dist/ui/widgets/inquiry.js +0 -7
  134. package/dist/ui/widgets/key-value.js +0 -2
  135. package/dist/ui/widgets/progress-step.js +0 -2
  136. package/dist/ui/widgets/resource-card.js +0 -2
  137. package/dist/ui/widgets/status.js +0 -2
  138. package/dist/ui/widgets/todo-list.js +0 -2
  139. package/dist/version.js +0 -62
  140. /package/dist/{types.js → app/types.js} +0 -0
  141. /package/dist/{architecture/contracts.js → harness/types.js} +0 -0
@@ -1,629 +0,0 @@
1
- import { z } from "zod";
2
- import { chromium } from "playwright";
3
- import { generateText, Output } from "ai";
4
- // ---------------------------------------------------------------------------
5
- // Tool definitions
6
- // ---------------------------------------------------------------------------
7
- export const browserToolDefinitions = {
8
- browser_act: {
9
- description: "Perform a browser action like clicking, typing, or navigating using natural language.",
10
- inputSchema: z.object({
11
- instruction: z
12
- .string()
13
- .describe("The action to perform, e.g. 'click the login button' or 'type pizza in the search box'"),
14
- }),
15
- },
16
- browser_extract: {
17
- description: "Extract structured data from the page using natural language instructions.",
18
- inputSchema: z.object({
19
- instruction: z
20
- .string()
21
- .describe("What data to extract, e.g. 'get all product titles and prices'"),
22
- }),
23
- },
24
- browser_observe: {
25
- description: "Observe the current page and get a list of possible actions in natural language.",
26
- inputSchema: z.object({}),
27
- },
28
- browser_state_update: {
29
- description: "Get a fresh screenshot and page info from the browser.",
30
- inputSchema: z.object({}),
31
- },
32
- };
33
- // ---------------------------------------------------------------------------
34
- // BrowserManager – lifecycle for Playwright browser / context / pages
35
- // ---------------------------------------------------------------------------
36
- class BrowserManager {
37
- constructor(options) {
38
- this.options = options;
39
- this.pages = [];
40
- this.headless = options.headless ?? true;
41
- }
42
- async ensureBrowser(headlessOverride) {
43
- if (headlessOverride !== undefined)
44
- this.headless = headlessOverride;
45
- if (this.browser)
46
- return this.browser;
47
- if (this.options.userDataDir) {
48
- this.context = await chromium.launchPersistentContext(this.options.userDataDir, { headless: this.headless, channel: this.options.channel });
49
- this.browser = this.context.browser() ?? undefined;
50
- this.pages = this.context.pages();
51
- }
52
- else {
53
- this.browser = await chromium.launch({
54
- headless: this.headless,
55
- channel: this.options.channel,
56
- });
57
- this.context = await this.browser.newContext();
58
- }
59
- return this.browser;
60
- }
61
- async ensurePage(headlessOverride) {
62
- await this.ensureBrowser(headlessOverride);
63
- if (this.pages.length === 0) {
64
- const page = await this.context.newPage();
65
- this.pages.push(page);
66
- }
67
- return this.pages[0];
68
- }
69
- getPages() {
70
- return this.pages;
71
- }
72
- isHeadless() {
73
- return this.headless;
74
- }
75
- async relaunch(headless) {
76
- await this.cleanup();
77
- await this.ensureBrowser(headless);
78
- }
79
- async cleanup() {
80
- if (this.context)
81
- await this.context.close();
82
- else if (this.browser)
83
- await this.browser.close();
84
- this.browser = undefined;
85
- this.context = undefined;
86
- this.pages = [];
87
- }
88
- }
89
- // ---------------------------------------------------------------------------
90
- // SmartBrowser – LLM-powered act / observe / extract
91
- // ---------------------------------------------------------------------------
92
- class SmartBrowser {
93
- constructor(model, manager) {
94
- this.model = model;
95
- this.manager = manager;
96
- }
97
- // -- helpers --------------------------------------------------------------
98
- async waitForStable(page) {
99
- try {
100
- await page.waitForLoadState("domcontentloaded", { timeout: 3000 });
101
- await page
102
- .waitForLoadState("networkidle", { timeout: 3000 })
103
- .catch(() => { });
104
- await page
105
- .waitForFunction(() => !document.querySelector('[aria-busy="true"], .loading, .spinner'), { timeout: 2000 })
106
- .catch(() => { });
107
- }
108
- catch {
109
- /* best effort */
110
- }
111
- await page.waitForTimeout(500);
112
- }
113
- async clickById(page, id) {
114
- const loc = page.locator(`[data-melony-id="${id}"]`).first();
115
- await loc.waitFor({ state: "attached", timeout: 10000 });
116
- await loc.scrollIntoViewIfNeeded({ timeout: 5000 }).catch(() => { });
117
- await loc.click({ timeout: 10000 });
118
- }
119
- async typeById(page, id, text) {
120
- const loc = page.locator(`[data-melony-id="${id}"]`).first();
121
- await loc.waitFor({ state: "attached", timeout: 10000 });
122
- await loc.scrollIntoViewIfNeeded({ timeout: 5000 }).catch(() => { });
123
- await loc.fill(text, { timeout: 10000 });
124
- }
125
- async scroll(page, direction) {
126
- await page.evaluate((dir) => {
127
- const amount = dir === "up"
128
- ? -window.innerHeight * 0.8
129
- : window.innerHeight * 0.8;
130
- window.scrollBy(0, amount);
131
- }, direction);
132
- await page.waitForTimeout(500);
133
- }
134
- // -- page map (injects data-melony-id & builds semantic tree) -------------
135
- async getPageMap(page) {
136
- // Polyfill esbuild/tsx __name helper (it doesn't exist in browser context)
137
- await page.evaluate('window.__name=window.__name||function(t){return t}');
138
- // Inject IDs on interactive elements
139
- await page.evaluate(() => {
140
- document
141
- .querySelectorAll("[data-melony-id]")
142
- .forEach((el) => el.removeAttribute("data-melony-id"));
143
- const selector = 'button, a, input, select, textarea, [role="button"], [role="link"], ' +
144
- '[role="checkbox"], [role="menuitem"], [role="tab"], [role="treeitem"], ' +
145
- '[role="option"], [role="switch"], [role="radio"], [contenteditable="true"]';
146
- let id = 0;
147
- document.querySelectorAll(selector).forEach((el) => {
148
- const s = window.getComputedStyle(el);
149
- if (s.display === "none" || s.visibility === "hidden")
150
- return;
151
- el.setAttribute("data-melony-id", String(id++));
152
- });
153
- });
154
- // Build semantic page map
155
- return page.evaluate(() => {
156
- const vW = window.innerWidth;
157
- const vH = window.innerHeight;
158
- const scrollY = window.scrollY;
159
- const totalHeight = document.body.scrollHeight;
160
- const MAX_NODES = 1500;
161
- let count = 0;
162
- const sections = {
163
- header: [],
164
- sidebar: [],
165
- navigation: [],
166
- main: [],
167
- footer: [],
168
- popups: [],
169
- other: [],
170
- };
171
- const classifySection = (el, style) => {
172
- const tag = el.tagName;
173
- const role = el.getAttribute("role");
174
- const id = (el.id || "").toLowerCase();
175
- const cls = (typeof el.className === "string" ? el.className : "").toLowerCase();
176
- if (tag === "HEADER" || role === "banner")
177
- return "header";
178
- if (tag === "FOOTER" || role === "contentinfo")
179
- return "footer";
180
- if (tag === "NAV" || role === "navigation")
181
- return "navigation";
182
- if (tag === "MAIN" || role === "main")
183
- return "main";
184
- if (tag === "ASIDE" ||
185
- role === "complementary" ||
186
- id.includes("sidebar") ||
187
- cls.includes("sidebar"))
188
- return "sidebar";
189
- if (role === "dialog" ||
190
- role === "alertdialog" ||
191
- cls.includes("modal") ||
192
- cls.includes("popup"))
193
- return "popups";
194
- if (style.position === "fixed" || style.position === "sticky") {
195
- const r = el.getBoundingClientRect();
196
- if (r.top <= 50 && r.width > vW * 0.8)
197
- return "header";
198
- if (r.bottom >= vH - 50 && r.width > vW * 0.8)
199
- return "footer";
200
- }
201
- return null;
202
- };
203
- const buildTree = (el, depth = 0, currentSection) => {
204
- if (count > MAX_NODES || depth > 15)
205
- return null;
206
- if (["SCRIPT", "STYLE", "NOSCRIPT", "SVG", "IFRAME"].includes(el.tagName))
207
- return null;
208
- const style = window.getComputedStyle(el);
209
- if (style.display === "none" ||
210
- style.visibility === "hidden" ||
211
- el.getAttribute("aria-hidden") === "true")
212
- return null;
213
- const rect = el.getBoundingClientRect();
214
- const inViewport = rect.bottom > 0 &&
215
- rect.right > 0 &&
216
- rect.top < vH &&
217
- rect.left < vW;
218
- if (!inViewport && rect.top > vH * 3)
219
- return null;
220
- const melonyId = el.getAttribute("data-melony-id");
221
- const isInteractive = !!melonyId;
222
- const role = el.getAttribute("role");
223
- const isHeading = /^H[1-6]$/.test(el.tagName);
224
- const directText = Array.from(el.childNodes)
225
- .filter((n) => n.nodeType === 3 && n.textContent?.trim())
226
- .map((n) => n.textContent.trim())
227
- .join(" ");
228
- const section = depth < 6 ? classifySection(el, style) : null;
229
- const active = section || currentSection || "other";
230
- const children = Array.from(el.children)
231
- .map((c) => buildTree(c, depth + 1, active))
232
- .filter(Boolean);
233
- const interestingRoles = [
234
- "heading",
235
- "img",
236
- "alert",
237
- "dialog",
238
- "status",
239
- "gridcell",
240
- "list",
241
- ];
242
- const isInteresting = isInteractive ||
243
- isHeading ||
244
- directText ||
245
- children.length > 0 ||
246
- interestingRoles.includes(role || "");
247
- if (!isInteresting &&
248
- !el.getAttribute("aria-label") &&
249
- !el.getAttribute("title"))
250
- return null;
251
- // Flatten container-only nodes
252
- if (!isInteractive &&
253
- !directText &&
254
- children.length === 1 &&
255
- !section &&
256
- !isHeading &&
257
- !role)
258
- return children[0];
259
- count++;
260
- const node = {
261
- tag: el.tagName,
262
- id: melonyId || undefined,
263
- text: isInteractive || isHeading
264
- ? el.innerText?.trim().slice(0, 100)
265
- : directText || undefined,
266
- role: role || undefined,
267
- label: el.getAttribute("aria-label") ||
268
- el.getAttribute("title") ||
269
- undefined,
270
- inViewport,
271
- };
272
- if (el.tagName === "INPUT") {
273
- node.type = el.type;
274
- node.value = el.value;
275
- }
276
- if (children.length > 0)
277
- node.children = children;
278
- if (section && depth < 6) {
279
- sections[section].push(node);
280
- return null;
281
- }
282
- return node;
283
- };
284
- const remaining = buildTree(document.body);
285
- if (remaining)
286
- sections.other.push(remaining);
287
- return {
288
- url: window.location.href,
289
- title: document.title,
290
- scroll: {
291
- y: scrollY,
292
- percentage: Math.round((scrollY / (totalHeight - vH || 1)) * 100),
293
- totalHeight,
294
- },
295
- sections: Object.fromEntries(Object.entries(sections).filter(([, v]) => v.length > 0)),
296
- };
297
- });
298
- }
299
- async act(page, instruction) {
300
- if (!this.model)
301
- throw new Error("LanguageModel required for 'act'");
302
- const run = async (retry = 0, lastError) => {
303
- await this.waitForStable(page);
304
- const screenshot = await page
305
- .screenshot({ type: "jpeg", quality: 60 })
306
- .catch(() => null);
307
- const state = await this.getPageMap(page);
308
- const { output } = await generateText({
309
- model: this.model,
310
- output: Output.object({ schema: SmartBrowser.actionSchema }),
311
- messages: [
312
- {
313
- role: "system",
314
- content: `You are an expert browser agent. Your task: "${instruction}"
315
- Current URL: ${state.url} | Title: ${state.title} | Scroll: ${state.scroll.percentage}%
316
-
317
- The DOM is grouped by semantic sections. Elements have numeric IDs for clicking/typing.
318
- - Prefer elements in 'main' or 'navigation'.
319
- - If the target isn't visible, scroll or look for inViewport:false elements.
320
- - Handle popups/modals first if present.
321
- - Use action 'done' when the task is complete.`,
322
- },
323
- {
324
- role: "user",
325
- content: [
326
- {
327
- type: "text",
328
- text: `DOM:\n${JSON.stringify(state.sections, null, 2)}`,
329
- },
330
- ...(screenshot
331
- ? [{ type: "image", image: screenshot }]
332
- : []),
333
- ...(lastError
334
- ? [{ type: "text", text: `Previous error: ${lastError}` }]
335
- : []),
336
- ],
337
- },
338
- ],
339
- });
340
- if (!output)
341
- throw new Error("LLM returned no structured output");
342
- if (output.action === "done")
343
- return { success: true, message: output.reasoning };
344
- try {
345
- switch (output.action) {
346
- case "click":
347
- await this.clickById(page, output.elementId);
348
- break;
349
- case "type":
350
- await this.typeById(page, output.elementId, output.text);
351
- break;
352
- case "press":
353
- await page.keyboard.press(output.key);
354
- break;
355
- case "scroll":
356
- await this.scroll(page, output.direction || "down");
357
- break;
358
- case "navigate":
359
- await page.goto(output.url);
360
- break;
361
- case "wait":
362
- await page.waitForTimeout(2000);
363
- break;
364
- }
365
- return { action: output.action, reasoning: output.reasoning };
366
- }
367
- catch (e) {
368
- if (retry < 1)
369
- return run(retry + 1, e.message);
370
- throw e;
371
- }
372
- };
373
- return run();
374
- }
375
- // -- observe (LLM suggests possible actions) ------------------------------
376
- async observe(page) {
377
- if (!this.model)
378
- throw new Error("LanguageModel required for 'observe'");
379
- await this.waitForStable(page);
380
- const state = await this.getPageMap(page);
381
- const screenshot = await page
382
- .screenshot({ type: "jpeg", quality: 60 })
383
- .catch(() => null);
384
- const { output } = await generateText({
385
- model: this.model,
386
- output: Output.object({
387
- schema: z.object({
388
- observations: z.array(z.string().describe("Natural language instruction for 'act'")),
389
- }),
390
- }),
391
- messages: [
392
- {
393
- role: "system",
394
- content: `You are an expert browser analyst.
395
- Current URL: ${state.url} | Title: ${state.title} | Scroll: ${state.scroll.percentage}%
396
-
397
- Suggest 5 logical, high-level actions a user might want to take.
398
- Each should be a natural language instruction passable to 'browser_act'.
399
- Focus on primary content and navigation. Handle modals first if open.`,
400
- },
401
- {
402
- role: "user",
403
- content: [
404
- {
405
- type: "text",
406
- text: `DOM:\n${JSON.stringify(state.sections, null, 2)}`,
407
- },
408
- ...(screenshot
409
- ? [{ type: "image", image: screenshot }]
410
- : []),
411
- ],
412
- },
413
- ],
414
- });
415
- return output;
416
- }
417
- // -- extract (LLM pulls structured data from page text) -------------------
418
- async extract(page, instruction) {
419
- if (!this.model)
420
- throw new Error("LanguageModel required for 'extract'");
421
- await this.waitForStable(page);
422
- const content = await page.evaluate(() => document.body.innerText);
423
- const { output } = await generateText({
424
- model: this.model,
425
- output: Output.object({
426
- schema: z.object({
427
- data: z.string(),
428
- confidence: z.number(),
429
- }),
430
- }),
431
- prompt: `Extract "${instruction}" from:\n${content.slice(0, 15000)}`,
432
- });
433
- if (!output)
434
- throw new Error("LLM returned no structured output");
435
- try {
436
- return { ...output, data: JSON.parse(output.data) };
437
- }
438
- catch {
439
- return output;
440
- }
441
- }
442
- }
443
- // -- act (LLM decides what to do) ----------------------------------------
444
- SmartBrowser.actionSchema = z.object({
445
- action: z.enum([
446
- "click",
447
- "type",
448
- "press",
449
- "wait",
450
- "navigate",
451
- "scroll",
452
- "done",
453
- ]),
454
- elementId: z.string().nullable().describe("data-melony-id of the element"),
455
- text: z.string().nullable().describe("Text for 'type' action"),
456
- key: z.string().nullable().describe("Key for 'press' action"),
457
- url: z.string().nullable().describe("URL for 'navigate' action"),
458
- direction: z.enum(["up", "down"]).nullable(),
459
- reasoning: z.string().describe("Brief explanation"),
460
- });
461
- // ---------------------------------------------------------------------------
462
- // Plugin factory
463
- // ---------------------------------------------------------------------------
464
- export const browserPlugin = (options = {}) => {
465
- const manager = new BrowserManager(options);
466
- const smart = new SmartBrowser(options.model, manager);
467
- return (builder) => {
468
- // -- helpers ------------------------------------------------------------
469
- async function* yieldState(page, opts = { screenshot: true }) {
470
- try {
471
- const url = page.url();
472
- const title = await page.title();
473
- let base64;
474
- if (opts.screenshot) {
475
- const buf = await page
476
- .screenshot({ type: "jpeg", quality: 60 })
477
- .catch(() => null);
478
- if (buf)
479
- base64 = buf.toString("base64");
480
- }
481
- yield {
482
- type: "browser:state-update",
483
- data: {
484
- url,
485
- title,
486
- screenshot: base64,
487
- pagesCount: manager.getPages().length,
488
- },
489
- };
490
- }
491
- catch (e) {
492
- console.error("Browser state update failed", e);
493
- }
494
- }
495
- function actionResult(action, toolCallId, data) {
496
- return {
497
- type: "action:taskResult",
498
- data: { action, toolCallId, result: data },
499
- };
500
- }
501
- // -- browser_act --------------------------------------------------------
502
- builder.on("action:browser_act", async function* (event) {
503
- const { toolCallId, instruction } = event.data;
504
- yield {
505
- type: "browser:status",
506
- data: { message: `Performing: ${instruction}` },
507
- };
508
- try {
509
- const page = await manager.ensurePage();
510
- const res = await smart.act(page, instruction);
511
- yield* yieldState(page);
512
- yield actionResult("browser_act", toolCallId, { success: true, ...res });
513
- }
514
- catch (error) {
515
- yield actionResult("browser_act", toolCallId, { error: error.message });
516
- }
517
- });
518
- // -- browser_extract ----------------------------------------------------
519
- builder.on("action:browser_extract", async function* (event) {
520
- const { toolCallId, instruction } = event.data;
521
- yield {
522
- type: "browser:status",
523
- data: { message: `Extracting: ${instruction}` },
524
- };
525
- try {
526
- const page = await manager.ensurePage();
527
- const res = await smart.extract(page, instruction);
528
- yield {
529
- type: "browser:status",
530
- data: {
531
- message: `Extracted: ${JSON.stringify(res, null, 2)}`,
532
- severity: "success",
533
- },
534
- };
535
- yield* yieldState(page, { screenshot: false });
536
- yield actionResult("browser_extract", toolCallId, { success: true, ...res });
537
- }
538
- catch (error) {
539
- yield actionResult("browser_extract", toolCallId, { error: error.message });
540
- }
541
- });
542
- // -- browser_observe ----------------------------------------------------
543
- builder.on("action:browser_observe", async function* (event) {
544
- const { toolCallId } = event.data;
545
- yield {
546
- type: "browser:status",
547
- data: { message: "Observing page…" },
548
- };
549
- try {
550
- const page = await manager.ensurePage();
551
- const res = await smart.observe(page);
552
- const observations = res?.observations;
553
- yield {
554
- type: "browser:status",
555
- data: {
556
- message: observations
557
- ? `Possible actions:\n${observations.map((o) => `• ${o}`).join("\n")}`
558
- : `Observations: ${JSON.stringify(res, null, 2)}`,
559
- severity: "success",
560
- },
561
- };
562
- yield* yieldState(page);
563
- yield actionResult("browser_observe", toolCallId, { success: true, ...res });
564
- }
565
- catch (error) {
566
- yield actionResult("browser_observe", toolCallId, { error: error.message });
567
- }
568
- });
569
- // -- browser_state_update -----------------------------------------------
570
- builder.on("action:browser_state_update", async function* (event) {
571
- const { toolCallId } = event.data;
572
- yield {
573
- type: "browser:status",
574
- data: { message: "Updating browser state…" },
575
- };
576
- try {
577
- const page = await manager.ensurePage();
578
- yield* yieldState(page);
579
- yield actionResult("browser_state_update", toolCallId, { success: true });
580
- }
581
- catch (error) {
582
- yield actionResult("browser_state_update", toolCallId, {
583
- error: error.message,
584
- });
585
- }
586
- });
587
- // -- browser_cleanup (internal) -----------------------------------------
588
- builder.on("action:browser_cleanup", async function* (event) {
589
- const { toolCallId } = event.data;
590
- try {
591
- await manager.cleanup();
592
- yield actionResult("browser_cleanup", toolCallId, {
593
- success: true,
594
- message: "Browser closed",
595
- });
596
- }
597
- catch (error) {
598
- yield actionResult("browser_cleanup", toolCallId, { error: error.message });
599
- }
600
- });
601
- // -- browser_show (internal – switch to headed mode) --------------------
602
- builder.on("action:browser_show", async function* (event) {
603
- const { toolCallId } = event.data;
604
- yield {
605
- type: "browser:status",
606
- data: { message: "Showing browser…" },
607
- };
608
- try {
609
- const page = await manager.ensurePage();
610
- let activePage = page;
611
- let res = { message: "Browser is active", url: page.url() };
612
- if (manager.isHeadless()) {
613
- const url = page.url();
614
- await manager.relaunch(false);
615
- const newPage = await manager.ensurePage(false);
616
- activePage = newPage;
617
- if (url)
618
- await newPage.goto(url);
619
- res = { message: "Browser opened (headed)", url: newPage.url() };
620
- }
621
- yield* yieldState(activePage);
622
- yield actionResult("browser_show", toolCallId, { success: true, ...res });
623
- }
624
- catch (error) {
625
- yield actionResult("browser_show", toolCallId, { error: error.message });
626
- }
627
- });
628
- };
629
- };
@@ -1,13 +0,0 @@
1
- import { ui } from "@melony/ui-kit/server";
2
- export const browserUIPlugin = () => (builder) => {
3
- builder.on("browser:status", async function* (event) {
4
- yield ui.event(ui.status(event.data.message, event.data.severity));
5
- });
6
- builder.on("browser:state-update", async function* (event) {
7
- if (event.data.screenshot) {
8
- yield ui.event(ui.resourceCard(event.data.title, event.data.url, [
9
- ui.image(`data:image/jpeg;base64,${event.data.screenshot}`),
10
- ]));
11
- }
12
- });
13
- };