@pi-oxide/pi-host-web 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,449 @@
1
+ /**
2
+ * Browser-native tool schemas and tool registry.
3
+ *
4
+ * Defines the six browser tools that the browser host exposes to the agent.
5
+ * Tool execution goes through a BrowserRuntime adapter — fake for tests,
6
+ * real DOM later.
7
+ *
8
+ * Host-owned — no browser APIs in pi-core.
9
+ */
10
+
11
+ import type {
12
+ ToolCall,
13
+ ToolDefinition,
14
+ ToolResult,
15
+ } from "../../../pi_host_web.js";
16
+ import type {
17
+ BrowserConsoleEntry,
18
+ BrowserElementSnapshot,
19
+ BrowserRuntime,
20
+ } from "./browserRuntime.ts";
21
+ import { LiveBrowserRuntime } from "./liveRuntime.ts";
22
+ import type { AgentTools, AgentToolDefinition } from "../../types.ts";
23
+
24
+ // ========================================================================
25
+ // Tool schemas
26
+ // ========================================================================
27
+
28
+ const browserGetPageSchema: object = {
29
+ type: "object",
30
+ properties: {},
31
+ additionalProperties: false,
32
+ };
33
+
34
+ const browserEvalJsSchema: object = {
35
+ type: "object",
36
+ properties: {
37
+ source: {
38
+ type: "string",
39
+ description: "JavaScript source code to evaluate in the page context.",
40
+ },
41
+ },
42
+ required: ["source"],
43
+ additionalProperties: false,
44
+ };
45
+
46
+ const browserQuerySelectorSchema: object = {
47
+ type: "object",
48
+ properties: {
49
+ selector: {
50
+ type: "string",
51
+ description: "CSS selector to match elements.",
52
+ },
53
+ all: {
54
+ type: "boolean",
55
+ description:
56
+ "If true, return all matching elements. Default: false (first match only).",
57
+ },
58
+ },
59
+ required: ["selector"],
60
+ additionalProperties: false,
61
+ };
62
+
63
+ const browserClickSchema: object = {
64
+ type: "object",
65
+ properties: {
66
+ selector: {
67
+ type: "string",
68
+ description: "CSS selector of the element to click.",
69
+ },
70
+ },
71
+ required: ["selector"],
72
+ additionalProperties: false,
73
+ };
74
+
75
+ const browserTypeSchema: object = {
76
+ type: "object",
77
+ properties: {
78
+ selector: {
79
+ type: "string",
80
+ description: "CSS selector of the input element to type into.",
81
+ },
82
+ text: {
83
+ type: "string",
84
+ description: "Text to type into the element.",
85
+ },
86
+ },
87
+ required: ["selector", "text"],
88
+ additionalProperties: false,
89
+ };
90
+
91
+ const browserConsoleSchema: object = {
92
+ type: "object",
93
+ properties: {
94
+ level: {
95
+ type: "string",
96
+ description:
97
+ "Filter by level: 'log', 'warn', 'error', 'info'. Omit for all.",
98
+ },
99
+ limit: {
100
+ type: "number",
101
+ description: "Maximum number of entries to return. Default: 50.",
102
+ },
103
+ },
104
+ additionalProperties: false,
105
+ };
106
+
107
+ // ========================================================================
108
+ // Tool definitions
109
+ // ========================================================================
110
+
111
+ const BROWSER_GET_PAGE: ToolDefinition = {
112
+ name: "browser_get_page",
113
+ label: "Get Page",
114
+ description:
115
+ "Get the current page state: URL, title, ready state, and focused element summary.",
116
+ parameters: browserGetPageSchema,
117
+ execution_mode: "parallel",
118
+ };
119
+
120
+ const BROWSER_EVAL_JS: ToolDefinition = {
121
+ name: "browser_eval_js",
122
+ label: "Eval JS",
123
+ description:
124
+ "Evaluate JavaScript in the page context and return the JSON-serializable result. " +
125
+ "Returns a typed error if the code throws.",
126
+ parameters: browserEvalJsSchema,
127
+ execution_mode: "sequential",
128
+ };
129
+
130
+ const BROWSER_QUERY_SELECTOR: ToolDefinition = {
131
+ name: "browser_query_selector",
132
+ label: "Query Selector",
133
+ description:
134
+ "Query elements by CSS selector. Returns tag, text preview, attributes, and visibility " +
135
+ "for each matched element. Use 'all: true' to return all matches.",
136
+ parameters: browserQuerySelectorSchema,
137
+ execution_mode: "parallel",
138
+ };
139
+
140
+ const BROWSER_CLICK: ToolDefinition = {
141
+ name: "browser_click",
142
+ label: "Click",
143
+ description: "Click an element by CSS selector.",
144
+ parameters: browserClickSchema,
145
+ execution_mode: "sequential",
146
+ };
147
+
148
+ const BROWSER_TYPE: ToolDefinition = {
149
+ name: "browser_type",
150
+ label: "Type",
151
+ description: "Type text into an input element by CSS selector.",
152
+ parameters: browserTypeSchema,
153
+ execution_mode: "sequential",
154
+ };
155
+
156
+ const BROWSER_CONSOLE: ToolDefinition = {
157
+ name: "browser_console",
158
+ label: "Console",
159
+ description:
160
+ "Read captured console logs, warnings, and errors from the page. " +
161
+ "Optionally filter by level and limit count.",
162
+ parameters: browserConsoleSchema,
163
+ execution_mode: "parallel",
164
+ };
165
+
166
+ /** All browser-native tools for the browser host. */
167
+ export const BROWSER_TOOLS: ToolDefinition[] = [
168
+ BROWSER_GET_PAGE,
169
+ BROWSER_EVAL_JS,
170
+ BROWSER_QUERY_SELECTOR,
171
+ BROWSER_CLICK,
172
+ BROWSER_TYPE,
173
+ BROWSER_CONSOLE,
174
+ ];
175
+
176
+ const BROWSER_GET_PAGE_SCRIPT = `#{ action: "project", text: head(text, 3000) }`;
177
+ const BROWSER_EVAL_JS_SCRIPT = `#{ action: "project", text: head(text, 5000) }`;
178
+ const BROWSER_CONSOLE_SCRIPT = `let all = lines(text);
179
+ let errs = [];
180
+ for line in all {
181
+ if contains(line, "ERROR") || contains(line, "FATAL") {
182
+ errs.push(line);
183
+ }
184
+ }
185
+ #{ action: "project", text: join(errs, "\\n") }`;
186
+
187
+ const DEFAULT_SCRIPTS: Record<string, string> = {
188
+ browser_get_page: BROWSER_GET_PAGE_SCRIPT,
189
+ browser_console: BROWSER_CONSOLE_SCRIPT,
190
+ browser_eval_js: BROWSER_EVAL_JS_SCRIPT,
191
+ };
192
+
193
+ // ========================================================================
194
+ // Tool execution
195
+ // ========================================================================
196
+
197
+ /** Max text preview length in element snapshots. */
198
+ const MAX_ELEMENT_TEXT = 500;
199
+
200
+ /** Wrap a handler so that thrown errors are normalized to ToolResult. */
201
+ export function wrapToolHandler(
202
+ handler: (call: ToolCall) => ToolResult | Promise<ToolResult>,
203
+ ): (call: ToolCall) => Promise<ToolResult> {
204
+ return async (call: ToolCall) => {
205
+ try {
206
+ return await handler(call);
207
+ } catch (err: unknown) {
208
+ const message = err instanceof Error ? err.message : String(err);
209
+ return {
210
+ content: [{ type: "text", text: message }],
211
+ };
212
+ }
213
+ };
214
+ }
215
+
216
+ function truncateText(
217
+ text: string,
218
+ max: number,
219
+ ): { text: string; truncated: boolean } {
220
+ if (text.length <= max) return { text, truncated: false };
221
+ return { text: `${text.slice(0, max)}...`, truncated: true };
222
+ }
223
+
224
+ function makeDetails(
225
+ toolName: string,
226
+ text: string,
227
+ truncatedByTool: boolean = false,
228
+ ): Record<string, unknown> {
229
+ return {
230
+ content_kind: "generic_text",
231
+ strategy: {
232
+ type: "dynamic",
233
+ script:
234
+ DEFAULT_SCRIPTS[toolName] ||
235
+ `#{ action: "project", text: head(text, 2000) }`,
236
+ },
237
+ original_chars: Array.from(text).length,
238
+ truncated_by_tool: truncatedByTool,
239
+ };
240
+ }
241
+
242
+ function formatElement(el: BrowserElementSnapshot): object {
243
+ const { text, truncated } = truncateText(el.text, MAX_ELEMENT_TEXT);
244
+ return {
245
+ tag: el.tag,
246
+ text,
247
+ textTruncated: truncated,
248
+ attributes: el.attributes,
249
+ visible: el.visible,
250
+ selector: el.selector,
251
+ };
252
+ }
253
+
254
+ function formatConsoleEntries(
255
+ entries: BrowserConsoleEntry[],
256
+ level?: string,
257
+ limit?: number,
258
+ ): object {
259
+ let filtered = entries;
260
+ if (level) {
261
+ filtered = filtered.filter((e) => e.level === level);
262
+ }
263
+ const effectiveLimit = limit ?? 50;
264
+ const truncated = filtered.length > effectiveLimit;
265
+ const sliced = filtered.slice(-effectiveLimit);
266
+ return {
267
+ entries: sliced.map((e) => ({
268
+ level: e.level,
269
+ args: e.args,
270
+ timestamp: e.timestamp,
271
+ })),
272
+ count: sliced.length,
273
+ totalAvailable: filtered.length,
274
+ truncated,
275
+ };
276
+ }
277
+
278
+ /** Wrap a tool function in a try-catch that throws on error. */
279
+ function tryTool<T>(
280
+ fn: () => T,
281
+ toolName: string,
282
+ ): ToolResult {
283
+ const text = JSON.stringify(fn(), null, 2);
284
+ return {
285
+ content: [{ type: "text", text }],
286
+ details: makeDetails(toolName, text, false),
287
+ };
288
+ }
289
+
290
+ /**
291
+ * Execute a browser tool call against a BrowserRuntime.
292
+ *
293
+ * Returns a ToolResult suitable for hostToolDone.
294
+ */
295
+ export function executeBrowserTool(
296
+ call: ToolCall,
297
+ runtime: BrowserRuntime,
298
+ ): ToolResult {
299
+ switch (call.name) {
300
+ case "browser_get_page": {
301
+ const page = runtime.getPage();
302
+ const text = JSON.stringify(
303
+ {
304
+ url: page.url,
305
+ title: page.title,
306
+ readyState: page.readyState,
307
+ focusedElement: page.focusedElement
308
+ ? formatElement(page.focusedElement)
309
+ : null,
310
+ },
311
+ null,
312
+ 2,
313
+ );
314
+ return {
315
+ content: [{ type: "text", text }],
316
+ details: makeDetails("browser_get_page", text, false),
317
+ };
318
+ }
319
+
320
+ case "browser_eval_js": {
321
+ const source = call.arguments.source as string;
322
+ if (typeof source !== "string" || source.length === 0) {
323
+ throw new Error("source must be a non-empty string");
324
+ }
325
+ return tryTool(
326
+ () => ({ ok: true, result: runtime.evalJs(source) }),
327
+ "browser_eval_js",
328
+ );
329
+ }
330
+
331
+ case "browser_query_selector": {
332
+ const selector = call.arguments.selector as string;
333
+ const all = call.arguments.all as boolean | undefined;
334
+ if (!selector) {
335
+ throw new Error("selector is required");
336
+ }
337
+ return tryTool(
338
+ () => {
339
+ if (all) {
340
+ const elements = runtime.querySelectorAll(selector);
341
+ return {
342
+ selector,
343
+ matchCount: elements.length,
344
+ elements: elements.map(formatElement),
345
+ };
346
+ }
347
+ const el = runtime.querySelector(selector);
348
+ return { selector, found: el ? formatElement(el) : null };
349
+ },
350
+ "browser_query_selector",
351
+ );
352
+ }
353
+
354
+ case "browser_click": {
355
+ const selector = call.arguments.selector as string;
356
+ if (!selector) {
357
+ throw new Error("selector is required");
358
+ }
359
+ return tryTool(
360
+ () => {
361
+ const result = runtime.click(selector);
362
+ if (!result.ok) {
363
+ throw new Error(result.error.message);
364
+ }
365
+ return { ok: true, action: "click", selector };
366
+ },
367
+ "browser_click",
368
+ );
369
+ }
370
+
371
+ case "browser_type": {
372
+ const selector = call.arguments.selector as string;
373
+ const text = call.arguments.text as string;
374
+ if (!selector) {
375
+ throw new Error("selector is required");
376
+ }
377
+ if (typeof text !== "string") {
378
+ throw new Error("text must be a string");
379
+ }
380
+ return tryTool(
381
+ () => {
382
+ const result = runtime.type(selector, text);
383
+ if (!result.ok) {
384
+ throw new Error(result.error.message);
385
+ }
386
+ return {
387
+ ok: true,
388
+ action: "type",
389
+ selector,
390
+ textLength: text.length,
391
+ };
392
+ },
393
+ "browser_type",
394
+ );
395
+ }
396
+
397
+ case "browser_console": {
398
+ const level = call.arguments.level as string | undefined;
399
+ const limit = call.arguments.limit as number | undefined;
400
+ const entries = runtime.getConsole();
401
+ const formatted = formatConsoleEntries(entries, level, limit);
402
+ const text = JSON.stringify(formatted, null, 2);
403
+ return {
404
+ content: [{ type: "text", text }],
405
+ details: makeDetails("browser_console", text, false),
406
+ };
407
+ }
408
+
409
+ default:
410
+ throw new Error(`no browser tool handler for: ${call.name}`);
411
+ }
412
+ }
413
+
414
+ /**
415
+ * Create an AgentTools pack for browser-native tools.
416
+ * Auto-injects LiveBrowserRuntime in browser environments.
417
+ */
418
+ export function browserTools(runtime?: BrowserRuntime): AgentTools {
419
+ const rt = runtime ?? new LiveBrowserRuntime();
420
+
421
+ // Build handlers map: each handler returns a ToolResult (preserves details)
422
+ const handlers: Record<string, (call: ToolCall) => ToolResult | Promise<ToolResult>> = {};
423
+ for (const def of BROWSER_TOOLS) {
424
+ handlers[def.name] = (call: ToolCall) => executeBrowserTool(call, rt);
425
+ }
426
+
427
+ const definitions: AgentToolDefinition[] = BROWSER_TOOLS.map((t) => ({
428
+ name: t.name,
429
+ description: t.description,
430
+ inputSchema: t.parameters,
431
+ run: (input: unknown) => {
432
+ const handler = handlers[t.name];
433
+ if (!handler) return null;
434
+ return handler({ id: "", name: t.name, arguments: input as Record<string, unknown> });
435
+ },
436
+ }));
437
+
438
+ return {
439
+ definitions,
440
+ getHandler(name: string) {
441
+ const handler = handlers[name];
442
+ if (!handler) return null;
443
+ return async (input: unknown) => {
444
+ const result = await handler({ id: "", name, arguments: input as Record<string, unknown> });
445
+ return result;
446
+ };
447
+ },
448
+ };
449
+ }
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Browser runtime adapter interface and types.
3
+ *
4
+ * Defines the contract between browser tools and the actual browser environment.
5
+ * Tests use FakeBrowserRuntime; real browser uses a LiveBrowserRuntime backed
6
+ * by window/document later.
7
+ *
8
+ * Host-owned — no browser APIs in pi-core.
9
+ */
10
+
11
+ // --- Snapshot types ---
12
+
13
+ export interface BrowserPageSnapshot {
14
+ url: string;
15
+ title: string;
16
+ readyState: "loading" | "interactive" | "complete";
17
+ focusedElement: BrowserElementSnapshot | null;
18
+ }
19
+
20
+ export interface BrowserElementSnapshot {
21
+ tag: string;
22
+ text: string;
23
+ attributes: Record<string, string>;
24
+ visible: boolean;
25
+ selector: string;
26
+ }
27
+
28
+ export interface BrowserConsoleEntry {
29
+ level: "log" | "warn" | "error" | "info";
30
+ args: string[];
31
+ timestamp: number;
32
+ }
33
+
34
+ export type BrowserToolResult =
35
+ | { ok: true; data?: unknown }
36
+ | { ok: false; error: { code: string; message: string } };
37
+
38
+ // --- Runtime adapter ---
39
+
40
+ export interface BrowserRuntime {
41
+ getPage(): BrowserPageSnapshot;
42
+ evalJs(source: string): unknown;
43
+ querySelector(selector: string): BrowserElementSnapshot | null;
44
+ querySelectorAll(selector: string): BrowserElementSnapshot[];
45
+ click(selector: string): BrowserToolResult;
46
+ type(selector: string, text: string): BrowserToolResult;
47
+ getConsole(): BrowserConsoleEntry[];
48
+ }
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Live browser runtime — implements BrowserRuntime with real DOM APIs.
3
+ *
4
+ * Wraps window/document for the browser tool execution layer.
5
+ * Includes console capture so browser_console tool can read intercepted logs.
6
+ */
7
+
8
+ import type {
9
+ BrowserConsoleEntry,
10
+ BrowserElementSnapshot,
11
+ BrowserPageSnapshot,
12
+ BrowserRuntime,
13
+ BrowserToolResult,
14
+ } from "./browserRuntime.ts";
15
+
16
+ // --- Console capture ---
17
+
18
+ const consoleEntries: BrowserConsoleEntry[] = [];
19
+ const origConsole = {
20
+ log: console.log.bind(console),
21
+ warn: console.warn.bind(console),
22
+ error: console.error.bind(console),
23
+ info: console.info.bind(console),
24
+ };
25
+
26
+ function captureConsole(
27
+ level: BrowserConsoleEntry["level"],
28
+ args: unknown[],
29
+ ): void {
30
+ consoleEntries.push({ level, args: args.map(String), timestamp: Date.now() });
31
+ }
32
+
33
+ // Intercept console methods
34
+ console.log = (...a: unknown[]) => {
35
+ captureConsole("log", a);
36
+ origConsole.log(...a);
37
+ };
38
+ console.warn = (...a: unknown[]) => {
39
+ captureConsole("warn", a);
40
+ origConsole.warn(...a);
41
+ };
42
+ console.error = (...a: unknown[]) => {
43
+ captureConsole("error", a);
44
+ origConsole.error(...a);
45
+ };
46
+ console.info = (...a: unknown[]) => {
47
+ captureConsole("info", a);
48
+ origConsole.info(...a);
49
+ };
50
+
51
+ // --- Element snapshot helper ---
52
+
53
+ function snapshotElement(
54
+ el: Element,
55
+ selector: string,
56
+ ): BrowserElementSnapshot {
57
+ const text = (el.textContent || "").trim().slice(0, 500);
58
+ const attributes: Record<string, string> = {};
59
+ for (const a of el.attributes) {
60
+ attributes[a.name] = a.value;
61
+ }
62
+ const style = window.getComputedStyle(el);
63
+ const visible =
64
+ style.display !== "none" &&
65
+ style.visibility !== "hidden" &&
66
+ style.opacity !== "0";
67
+ return {
68
+ tag: el.tagName.toLowerCase(),
69
+ text,
70
+ attributes,
71
+ visible,
72
+ selector,
73
+ };
74
+ }
75
+
76
+ // --- LiveBrowserRuntime ---
77
+
78
+ export class LiveBrowserRuntime implements BrowserRuntime {
79
+ getPage(): BrowserPageSnapshot {
80
+ const ae = document.activeElement;
81
+ const focused = ae && ae !== document.body ? snapshotElement(ae, "") : null;
82
+ return {
83
+ url: location.href,
84
+ title: document.title,
85
+ readyState: document.readyState as BrowserPageSnapshot["readyState"],
86
+ focusedElement: focused,
87
+ };
88
+ }
89
+
90
+ evalJs(source: string): unknown {
91
+ return new Function(source)();
92
+ }
93
+
94
+ querySelector(selector: string): BrowserElementSnapshot | null {
95
+ const el = document.querySelector(selector);
96
+ return el ? snapshotElement(el, selector) : null;
97
+ }
98
+
99
+ querySelectorAll(selector: string): BrowserElementSnapshot[] {
100
+ return Array.from(document.querySelectorAll(selector)).map((el) =>
101
+ snapshotElement(el, selector),
102
+ );
103
+ }
104
+
105
+ click(selector: string): BrowserToolResult {
106
+ const el = document.querySelector(selector);
107
+ if (!el) {
108
+ return {
109
+ ok: false,
110
+ error: {
111
+ code: "element_not_found",
112
+ message: `No element matches: ${selector}`,
113
+ },
114
+ };
115
+ }
116
+ (el as HTMLElement).click();
117
+ return { ok: true };
118
+ }
119
+
120
+ type(selector: string, text: string): BrowserToolResult {
121
+ const el = document.querySelector(selector);
122
+ if (!el) {
123
+ return {
124
+ ok: false,
125
+ error: {
126
+ code: "element_not_found",
127
+ message: `No element matches: ${selector}`,
128
+ },
129
+ };
130
+ }
131
+ if (
132
+ !(el instanceof HTMLInputElement) &&
133
+ !(el instanceof HTMLTextAreaElement)
134
+ ) {
135
+ return {
136
+ ok: false,
137
+ error: {
138
+ code: "not_input",
139
+ message: `Element is not an input or textarea: ${selector}`,
140
+ },
141
+ };
142
+ }
143
+ el.value = text;
144
+ el.dispatchEvent(new Event("input", { bubbles: true }));
145
+ return { ok: true };
146
+ }
147
+
148
+ getConsole(): BrowserConsoleEntry[] {
149
+ return [...consoleEntries];
150
+ }
151
+ }