@canaryai/cli 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,673 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/local-browser/host.ts
4
+ import { chromium } from "playwright";
5
+ var HEARTBEAT_INTERVAL_MS = 3e4;
6
+ var RECONNECT_DELAY_MS = 1e3;
7
+ var MAX_RECONNECT_DELAY_MS = 3e4;
8
+ var MAX_RECONNECT_ATTEMPTS = 10;
9
+ var LocalBrowserHost = class {
10
+ options;
11
+ ws = null;
12
+ browser = null;
13
+ context = null;
14
+ page = null;
15
+ pendingDialogs = [];
16
+ heartbeatTimer = null;
17
+ reconnectAttempts = 0;
18
+ isShuttingDown = false;
19
+ lastSnapshotYaml = "";
20
+ constructor(options) {
21
+ this.options = options;
22
+ }
23
+ log(level, message, data) {
24
+ if (this.options.onLog) {
25
+ this.options.onLog(level, message, data);
26
+ } else {
27
+ const fn = level === "error" ? console.error : level === "warn" ? console.warn : console.log;
28
+ fn(`[LocalBrowserHost] ${message}`, data ?? "");
29
+ }
30
+ }
31
+ // =========================================================================
32
+ // Lifecycle
33
+ // =========================================================================
34
+ async start() {
35
+ this.log("info", "Starting local browser host", {
36
+ browserMode: this.options.browserMode,
37
+ sessionId: this.options.sessionId
38
+ });
39
+ await this.connectWebSocket();
40
+ await this.launchBrowser();
41
+ this.sendSessionEvent("browser_ready");
42
+ }
43
+ async stop() {
44
+ this.isShuttingDown = true;
45
+ this.log("info", "Stopping local browser host");
46
+ this.stopHeartbeat();
47
+ if (this.ws) {
48
+ try {
49
+ this.ws.close(1e3, "Shutdown");
50
+ } catch {
51
+ }
52
+ this.ws = null;
53
+ }
54
+ if (this.context) {
55
+ try {
56
+ await this.context.close();
57
+ } catch {
58
+ }
59
+ this.context = null;
60
+ }
61
+ if (this.browser) {
62
+ try {
63
+ await this.browser.close();
64
+ } catch {
65
+ }
66
+ this.browser = null;
67
+ }
68
+ this.page = null;
69
+ this.log("info", "Local browser host stopped");
70
+ }
71
+ // =========================================================================
72
+ // WebSocket Connection
73
+ // =========================================================================
74
+ async connectWebSocket() {
75
+ return new Promise((resolve, reject) => {
76
+ const wsUrl = `${this.options.apiUrl.replace("http", "ws")}/local-browser/sessions/${this.options.sessionId}/connect?token=${this.options.wsToken}`;
77
+ this.log("info", "Connecting to cloud API", { url: wsUrl.replace(/token=.*/, "token=***") });
78
+ const ws = new WebSocket(wsUrl);
79
+ ws.onopen = () => {
80
+ this.log("info", "Connected to cloud API");
81
+ this.ws = ws;
82
+ this.reconnectAttempts = 0;
83
+ this.startHeartbeat();
84
+ resolve();
85
+ };
86
+ ws.onmessage = (event) => {
87
+ this.handleMessage(event.data);
88
+ };
89
+ ws.onerror = (event) => {
90
+ this.log("error", "WebSocket error", event);
91
+ };
92
+ ws.onclose = () => {
93
+ this.log("info", "WebSocket closed");
94
+ this.stopHeartbeat();
95
+ this.ws = null;
96
+ if (!this.isShuttingDown) {
97
+ this.scheduleReconnect();
98
+ }
99
+ };
100
+ setTimeout(() => {
101
+ if (!this.ws) {
102
+ reject(new Error("WebSocket connection timeout"));
103
+ }
104
+ }, 3e4);
105
+ });
106
+ }
107
+ scheduleReconnect() {
108
+ if (this.reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) {
109
+ this.log("error", "Max reconnection attempts reached, giving up");
110
+ this.stop();
111
+ return;
112
+ }
113
+ const delay = Math.min(
114
+ RECONNECT_DELAY_MS * Math.pow(2, this.reconnectAttempts),
115
+ MAX_RECONNECT_DELAY_MS
116
+ );
117
+ this.reconnectAttempts++;
118
+ this.log("info", `Reconnecting in ${delay}ms (attempt ${this.reconnectAttempts})`);
119
+ setTimeout(async () => {
120
+ try {
121
+ await this.connectWebSocket();
122
+ this.sendSessionEvent("connected");
123
+ if (this.page) {
124
+ this.sendSessionEvent("browser_ready");
125
+ }
126
+ } catch (error) {
127
+ this.log("error", "Reconnection failed", error);
128
+ this.scheduleReconnect();
129
+ }
130
+ }, delay);
131
+ }
132
+ // =========================================================================
133
+ // Heartbeat
134
+ // =========================================================================
135
+ startHeartbeat() {
136
+ this.stopHeartbeat();
137
+ this.heartbeatTimer = setInterval(() => {
138
+ if (this.ws?.readyState === WebSocket.OPEN) {
139
+ const ping = {
140
+ type: "heartbeat",
141
+ id: crypto.randomUUID(),
142
+ timestamp: Date.now(),
143
+ direction: "pong"
144
+ };
145
+ this.ws.send(JSON.stringify(ping));
146
+ }
147
+ }, HEARTBEAT_INTERVAL_MS);
148
+ }
149
+ stopHeartbeat() {
150
+ if (this.heartbeatTimer) {
151
+ clearInterval(this.heartbeatTimer);
152
+ this.heartbeatTimer = null;
153
+ }
154
+ }
155
+ // =========================================================================
156
+ // Browser Management
157
+ // =========================================================================
158
+ async launchBrowser() {
159
+ const { browserMode, cdpUrl, headless = true, storageStatePath } = this.options;
160
+ if (browserMode === "cdp" && cdpUrl) {
161
+ this.log("info", "Connecting to existing Chrome via CDP", { cdpUrl });
162
+ this.browser = await chromium.connectOverCDP(cdpUrl);
163
+ const contexts = this.browser.contexts();
164
+ this.context = contexts[0] ?? await this.browser.newContext();
165
+ const pages = this.context.pages();
166
+ this.page = pages[0] ?? await this.context.newPage();
167
+ } else {
168
+ this.log("info", "Launching new Playwright browser", { headless });
169
+ this.browser = await chromium.launch({
170
+ headless,
171
+ args: ["--no-sandbox"]
172
+ });
173
+ const contextOptions = {
174
+ viewport: { width: 1920, height: 1080 }
175
+ };
176
+ if (storageStatePath) {
177
+ try {
178
+ await Bun.file(storageStatePath).exists();
179
+ contextOptions.storageState = storageStatePath;
180
+ this.log("info", "Loading storage state", { storageStatePath });
181
+ } catch {
182
+ this.log("debug", "Storage state file not found, starting fresh");
183
+ }
184
+ }
185
+ this.context = await this.browser.newContext(contextOptions);
186
+ this.page = await this.context.newPage();
187
+ }
188
+ this.page.on("dialog", (dialog) => {
189
+ this.pendingDialogs.push(dialog);
190
+ });
191
+ this.log("info", "Browser ready");
192
+ }
193
+ // =========================================================================
194
+ // Message Handling
195
+ // =========================================================================
196
+ handleMessage(data) {
197
+ try {
198
+ const message = JSON.parse(data);
199
+ if (message.type === "heartbeat" && message.direction === "ping") {
200
+ const pong = {
201
+ type: "heartbeat",
202
+ id: crypto.randomUUID(),
203
+ timestamp: Date.now(),
204
+ direction: "pong"
205
+ };
206
+ this.ws?.send(JSON.stringify(pong));
207
+ return;
208
+ }
209
+ if (message.type === "command") {
210
+ this.handleCommand(message);
211
+ return;
212
+ }
213
+ this.log("debug", "Received unknown message type", message);
214
+ } catch (error) {
215
+ this.log("error", "Failed to parse message", { error, data });
216
+ }
217
+ }
218
+ async handleCommand(command) {
219
+ const startTime = Date.now();
220
+ this.log("debug", `Executing command: ${command.method}`, { id: command.id });
221
+ try {
222
+ const result = await this.executeMethod(command.method, command.args);
223
+ const response = {
224
+ type: "response",
225
+ id: crypto.randomUUID(),
226
+ timestamp: Date.now(),
227
+ requestId: command.id,
228
+ success: true,
229
+ result
230
+ };
231
+ this.ws?.send(JSON.stringify(response));
232
+ this.log("debug", `Command completed: ${command.method}`, {
233
+ id: command.id,
234
+ durationMs: Date.now() - startTime
235
+ });
236
+ } catch (error) {
237
+ const errorMessage = error instanceof Error ? error.message : String(error);
238
+ const response = {
239
+ type: "response",
240
+ id: crypto.randomUUID(),
241
+ timestamp: Date.now(),
242
+ requestId: command.id,
243
+ success: false,
244
+ error: errorMessage,
245
+ stack: error instanceof Error ? error.stack : void 0
246
+ };
247
+ this.ws?.send(JSON.stringify(response));
248
+ this.log("error", `Command failed: ${command.method}`, {
249
+ id: command.id,
250
+ error: errorMessage
251
+ });
252
+ }
253
+ }
254
+ sendSessionEvent(event, error) {
255
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
256
+ const message = {
257
+ type: "session",
258
+ id: crypto.randomUUID(),
259
+ timestamp: Date.now(),
260
+ event,
261
+ browserMode: this.options.browserMode,
262
+ error
263
+ };
264
+ this.ws.send(JSON.stringify(message));
265
+ }
266
+ // =========================================================================
267
+ // Method Execution
268
+ // =========================================================================
269
+ async executeMethod(method, args) {
270
+ switch (method) {
271
+ // Lifecycle
272
+ case "connect":
273
+ return this.connect(args[0]);
274
+ case "disconnect":
275
+ return this.disconnect();
276
+ // Navigation
277
+ case "navigate":
278
+ return this.navigate(args[0], args[1]);
279
+ case "navigateBack":
280
+ return this.navigateBack(args[0]);
281
+ // Page Inspection
282
+ case "snapshot":
283
+ return this.snapshot(args[0]);
284
+ case "takeScreenshot":
285
+ return this.takeScreenshot(args[0]);
286
+ case "evaluate":
287
+ return this.evaluate(args[0], args[1]);
288
+ case "runCode":
289
+ return this.runCode(args[0], args[1]);
290
+ case "consoleMessages":
291
+ return this.consoleMessages(args[0]);
292
+ case "networkRequests":
293
+ return this.networkRequests(args[0]);
294
+ // Interaction
295
+ case "click":
296
+ return this.click(args[0], args[1], args[2]);
297
+ case "clickAtCoordinates":
298
+ return this.clickAtCoordinates(
299
+ args[0],
300
+ args[1],
301
+ args[2],
302
+ args[3]
303
+ );
304
+ case "moveToCoordinates":
305
+ return this.moveToCoordinates(
306
+ args[0],
307
+ args[1],
308
+ args[2],
309
+ args[3]
310
+ );
311
+ case "dragCoordinates":
312
+ return this.dragCoordinates(
313
+ args[0],
314
+ args[1],
315
+ args[2],
316
+ args[3],
317
+ args[4],
318
+ args[5]
319
+ );
320
+ case "hover":
321
+ return this.hover(args[0], args[1], args[2]);
322
+ case "drag":
323
+ return this.drag(
324
+ args[0],
325
+ args[1],
326
+ args[2],
327
+ args[3],
328
+ args[4]
329
+ );
330
+ case "type":
331
+ return this.type(
332
+ args[0],
333
+ args[1],
334
+ args[2],
335
+ args[3],
336
+ args[4]
337
+ );
338
+ case "pressKey":
339
+ return this.pressKey(args[0], args[1]);
340
+ case "fillForm":
341
+ return this.fillForm(args[0], args[1]);
342
+ case "selectOption":
343
+ return this.selectOption(
344
+ args[0],
345
+ args[1],
346
+ args[2],
347
+ args[3]
348
+ );
349
+ case "fileUpload":
350
+ return this.fileUpload(args[0], args[1]);
351
+ // Dialogs
352
+ case "handleDialog":
353
+ return this.handleDialog(args[0], args[1], args[2]);
354
+ // Waiting
355
+ case "waitFor":
356
+ return this.waitFor(args[0]);
357
+ // Browser Management
358
+ case "close":
359
+ return this.closePage(args[0]);
360
+ case "resize":
361
+ return this.resize(args[0], args[1], args[2]);
362
+ case "tabs":
363
+ return this.tabs(args[0], args[1], args[2]);
364
+ // Storage
365
+ case "getStorageState":
366
+ return this.getStorageState(args[0]);
367
+ case "getCurrentUrl":
368
+ return this.getCurrentUrl(args[0]);
369
+ case "getTitle":
370
+ return this.getTitle(args[0]);
371
+ case "getLinks":
372
+ return this.getLinks(args[0]);
373
+ case "getElementBoundingBox":
374
+ return this.getElementBoundingBox(args[0], args[1]);
375
+ // Tracing
376
+ case "startTracing":
377
+ return this.startTracing(args[0]);
378
+ case "stopTracing":
379
+ return this.stopTracing(args[0]);
380
+ // Video
381
+ case "isVideoRecordingEnabled":
382
+ return false;
383
+ // Video not supported in CLI host currently
384
+ case "saveVideo":
385
+ return null;
386
+ case "getVideoPath":
387
+ return null;
388
+ default:
389
+ throw new Error(`Unknown method: ${method}`);
390
+ }
391
+ }
392
+ // =========================================================================
393
+ // IBrowserClient Method Implementations
394
+ // =========================================================================
395
+ getPage() {
396
+ if (!this.page) throw new Error("No page available");
397
+ return this.page;
398
+ }
399
+ resolveRef(ref) {
400
+ return this.getPage().locator(`aria-ref=${ref}`);
401
+ }
402
+ async connect(_options) {
403
+ return;
404
+ }
405
+ async disconnect() {
406
+ await this.stop();
407
+ }
408
+ async navigate(url, _opts) {
409
+ const page = this.getPage();
410
+ await page.goto(url, { waitUntil: "domcontentloaded" });
411
+ await page.waitForLoadState("load", { timeout: 5e3 }).catch(() => {
412
+ });
413
+ return this.captureSnapshot();
414
+ }
415
+ async navigateBack(_opts) {
416
+ await this.getPage().goBack();
417
+ return this.captureSnapshot();
418
+ }
419
+ async snapshot(_opts) {
420
+ return this.captureSnapshot();
421
+ }
422
+ async captureSnapshot() {
423
+ const page = this.getPage();
424
+ this.lastSnapshotYaml = await page._snapshotForAI({ mode: "full" });
425
+ return this.lastSnapshotYaml;
426
+ }
427
+ async takeScreenshot(opts) {
428
+ const page = this.getPage();
429
+ const buffer = await page.screenshot({
430
+ type: opts?.type ?? "jpeg",
431
+ fullPage: opts?.fullPage ?? false
432
+ });
433
+ const mime = opts?.type === "png" ? "image/png" : "image/jpeg";
434
+ return `data:${mime};base64,${buffer.toString("base64")}`;
435
+ }
436
+ async evaluate(fn, _opts) {
437
+ const page = this.getPage();
438
+ return page.evaluate(new Function(`return (${fn})()`));
439
+ }
440
+ async runCode(code, _opts) {
441
+ const page = this.getPage();
442
+ const fn = new Function("page", `return (async () => { ${code} })()`);
443
+ return fn(page);
444
+ }
445
+ async consoleMessages(_opts) {
446
+ return "Console message capture not implemented in CLI host";
447
+ }
448
+ async networkRequests(_opts) {
449
+ return "Network request capture not implemented in CLI host";
450
+ }
451
+ async click(ref, _elementDesc, opts) {
452
+ const locator = this.resolveRef(ref);
453
+ await locator.scrollIntoViewIfNeeded({ timeout: 5e3 }).catch(() => {
454
+ });
455
+ const box = await locator.boundingBox();
456
+ if (box) {
457
+ const centerX = box.x + box.width / 2;
458
+ const centerY = box.y + box.height / 2;
459
+ const page = this.getPage();
460
+ if (opts?.modifiers?.length) {
461
+ for (const mod of opts.modifiers) {
462
+ await page.keyboard.down(mod);
463
+ }
464
+ }
465
+ if (opts?.doubleClick) {
466
+ await page.mouse.dblclick(centerX, centerY);
467
+ } else {
468
+ await page.mouse.click(centerX, centerY);
469
+ }
470
+ if (opts?.modifiers?.length) {
471
+ for (const mod of opts.modifiers) {
472
+ await page.keyboard.up(mod);
473
+ }
474
+ }
475
+ } else {
476
+ if (opts?.doubleClick) {
477
+ await locator.dblclick({ timeout: opts?.timeoutMs ?? 3e4 });
478
+ } else {
479
+ await locator.click({ timeout: opts?.timeoutMs ?? 3e4 });
480
+ }
481
+ }
482
+ }
483
+ async clickAtCoordinates(x, y, _elementDesc, opts) {
484
+ const page = this.getPage();
485
+ if (opts?.doubleClick) {
486
+ await page.mouse.dblclick(x, y);
487
+ } else {
488
+ await page.mouse.click(x, y);
489
+ }
490
+ }
491
+ async moveToCoordinates(x, y, _elementDesc, _opts) {
492
+ await this.getPage().mouse.move(x, y);
493
+ }
494
+ async dragCoordinates(startX, startY, endX, endY, _elementDesc, _opts) {
495
+ const page = this.getPage();
496
+ await page.mouse.move(startX, startY);
497
+ await page.mouse.down();
498
+ await page.mouse.move(endX, endY);
499
+ await page.mouse.up();
500
+ }
501
+ async hover(ref, _elementDesc, opts) {
502
+ await this.resolveRef(ref).hover({ timeout: opts?.timeoutMs ?? 3e4 });
503
+ }
504
+ async drag(startRef, _startElement, endRef, _endElement, opts) {
505
+ const startLocator = this.resolveRef(startRef);
506
+ const endLocator = this.resolveRef(endRef);
507
+ await startLocator.dragTo(endLocator, { timeout: opts?.timeoutMs ?? 6e4 });
508
+ }
509
+ async type(ref, text, _elementDesc, submit, opts) {
510
+ const locator = this.resolveRef(ref);
511
+ await locator.clear();
512
+ await locator.pressSequentially(text, {
513
+ delay: opts?.delay ?? 0,
514
+ timeout: opts?.timeoutMs ?? 3e4
515
+ });
516
+ if (submit) {
517
+ await locator.press("Enter");
518
+ }
519
+ }
520
+ async pressKey(key, _opts) {
521
+ await this.getPage().keyboard.press(key);
522
+ }
523
+ async fillForm(fields, opts) {
524
+ for (const field of fields) {
525
+ const locator = this.resolveRef(field.ref);
526
+ const fieldType = field.type ?? "textbox";
527
+ switch (fieldType) {
528
+ case "checkbox": {
529
+ const isChecked = await locator.isChecked();
530
+ const shouldBeChecked = field.value === "true";
531
+ if (shouldBeChecked !== isChecked) {
532
+ await locator.click({ timeout: opts?.timeoutMs ?? 3e4 });
533
+ }
534
+ break;
535
+ }
536
+ case "radio":
537
+ await locator.check({ timeout: opts?.timeoutMs ?? 3e4 });
538
+ break;
539
+ case "combobox":
540
+ await locator.selectOption(field.value, { timeout: opts?.timeoutMs ?? 3e4 });
541
+ break;
542
+ default:
543
+ await locator.fill(field.value, { timeout: opts?.timeoutMs ?? 3e4 });
544
+ }
545
+ }
546
+ }
547
+ async selectOption(ref, value, _elementDesc, opts) {
548
+ await this.resolveRef(ref).selectOption(value, { timeout: opts?.timeoutMs ?? 3e4 });
549
+ }
550
+ async fileUpload(paths, opts) {
551
+ const fileChooser = await this.getPage().waitForEvent("filechooser", {
552
+ timeout: opts?.timeoutMs ?? 3e4
553
+ });
554
+ await fileChooser.setFiles(paths);
555
+ }
556
+ async handleDialog(action, promptText, _opts) {
557
+ const dialog = this.pendingDialogs.shift();
558
+ if (dialog) {
559
+ if (action === "accept") {
560
+ await dialog.accept(promptText);
561
+ } else {
562
+ await dialog.dismiss();
563
+ }
564
+ }
565
+ }
566
+ async waitFor(opts) {
567
+ const page = this.getPage();
568
+ const timeout = opts?.timeout ?? opts?.timeoutMs ?? 3e4;
569
+ if (opts?.timeSec) {
570
+ await page.waitForTimeout(opts.timeSec * 1e3);
571
+ return;
572
+ }
573
+ if (opts?.text) {
574
+ await page.getByText(opts.text).first().waitFor({ state: "visible", timeout });
575
+ return;
576
+ }
577
+ if (opts?.textGone) {
578
+ await page.getByText(opts.textGone).first().waitFor({ state: "hidden", timeout });
579
+ return;
580
+ }
581
+ if (opts?.selector) {
582
+ await page.locator(opts.selector).waitFor({
583
+ state: opts.state ?? "visible",
584
+ timeout
585
+ });
586
+ }
587
+ }
588
+ async closePage(_opts) {
589
+ await this.getPage().close();
590
+ this.page = null;
591
+ }
592
+ async resize(width, height, _opts) {
593
+ await this.getPage().setViewportSize({ width, height });
594
+ }
595
+ async tabs(action, index, _opts) {
596
+ if (!this.context) throw new Error("No context available");
597
+ const pages = this.context.pages();
598
+ switch (action) {
599
+ case "list":
600
+ return Promise.all(
601
+ pages.map(async (p, i) => ({
602
+ index: i,
603
+ url: p.url(),
604
+ title: await p.title().catch(() => "")
605
+ }))
606
+ );
607
+ case "new": {
608
+ const newPage = await this.context.newPage();
609
+ this.page = newPage;
610
+ newPage.on("dialog", (dialog) => this.pendingDialogs.push(dialog));
611
+ return { index: pages.length };
612
+ }
613
+ case "close":
614
+ if (index !== void 0 && pages[index]) {
615
+ await pages[index].close();
616
+ } else {
617
+ await this.page?.close();
618
+ }
619
+ this.page = this.context.pages()[0] ?? null;
620
+ break;
621
+ case "select":
622
+ if (index !== void 0 && pages[index]) {
623
+ this.page = pages[index];
624
+ }
625
+ break;
626
+ }
627
+ return null;
628
+ }
629
+ async getStorageState(_opts) {
630
+ if (!this.context) throw new Error("No context available");
631
+ return this.context.storageState();
632
+ }
633
+ async getCurrentUrl(_opts) {
634
+ return this.getPage().url();
635
+ }
636
+ async getTitle(_opts) {
637
+ return this.getPage().title();
638
+ }
639
+ async getLinks(_opts) {
640
+ const page = this.getPage();
641
+ return page.$$eval(
642
+ "a[href]",
643
+ (links) => links.map((a) => a.href).filter((h) => !!h && (h.startsWith("http://") || h.startsWith("https://")))
644
+ );
645
+ }
646
+ async getElementBoundingBox(ref, _opts) {
647
+ const locator = this.resolveRef(ref);
648
+ const box = await locator.boundingBox();
649
+ if (!box) return null;
650
+ return { x: box.x, y: box.y, width: box.width, height: box.height };
651
+ }
652
+ async startTracing(_opts) {
653
+ if (!this.context) throw new Error("No context available");
654
+ await this.context.tracing.start({ screenshots: true, snapshots: true });
655
+ }
656
+ async stopTracing(_opts) {
657
+ if (!this.context) throw new Error("No context available");
658
+ const tracePath = `/tmp/trace-${Date.now()}.zip`;
659
+ await this.context.tracing.stop({ path: tracePath });
660
+ return {
661
+ trace: tracePath,
662
+ network: "",
663
+ resources: "",
664
+ directory: null,
665
+ legend: `Trace saved to ${tracePath}`
666
+ };
667
+ }
668
+ };
669
+
670
+ export {
671
+ LocalBrowserHost
672
+ };
673
+ //# sourceMappingURL=chunk-55MFLJD7.js.map