@poncho-ai/browser 0.6.25 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @poncho-ai/browser@0.6.25 build /home/runner/work/poncho-ai/poncho-ai/packages/browser
2
+ > @poncho-ai/browser@0.7.0 build /home/runner/work/poncho-ai/poncho-ai/packages/browser
3
3
  > tsup src/index.ts --format esm --dts
4
4
 
5
5
  CLI Building entry: src/index.ts
@@ -7,8 +7,8 @@
7
7
  CLI tsup v8.5.1
8
8
  CLI Target: es2022
9
9
  ESM Build start
10
- ESM dist/index.js 47.44 KB
10
+ ESM dist/index.js 53.08 KB
11
11
  ESM ⚡️ Build success in 63ms
12
12
  DTS Build start
13
- DTS ⚡️ Build success in 5080ms
14
- DTS dist/index.d.ts 13.69 KB
13
+ DTS ⚡️ Build success in 5156ms
14
+ DTS dist/index.d.ts 14.54 KB
package/CHANGELOG.md CHANGED
@@ -1,5 +1,34 @@
1
1
  # @poncho-ai/browser
2
2
 
3
+ ## 0.7.0
4
+
5
+ ### Minor Changes
6
+
7
+ - [#184](https://github.com/cesr/poncho-ai/pull/184) [`12ce2be`](https://github.com/cesr/poncho-ai/commit/12ce2be01c9d98b1d9aa634d4d8051c4c0094a44) Thanks [@cesr](https://github.com/cesr)! - Add `browser_download` so the agent can save files from the browser into the
8
+ VFS. The tool fetches a file using the page's logged-in session (so it works
9
+ for files behind a login) and writes the bytes straight to the tenant's VFS via
10
+ `ToolContext.vfs` — never through the model. `url` defaults to the current page,
11
+ or pass a same-origin link's href. The fetch runs inside the page (`evaluate`),
12
+ so it works identically for local and remote/cloud browsers (bytes return over
13
+ CDP). Capped at 25 MB. The harness browser system prompt now documents it under
14
+ a "Saving files" section.
15
+
16
+ ## 0.6.26
17
+
18
+ ### Patch Changes
19
+
20
+ - [#182](https://github.com/cesr/poncho-ai/pull/182) [`5ca3615`](https://github.com/cesr/poncho-ai/commit/5ca361576cbe1a97e6315f550a58a302b4e70aca) Thanks [@cesr](https://github.com/cesr)! - Keep host viewport listeners alive across browser sessions. `onFrame` /
21
+ `onStatus` listeners were stored inside the per-conversation `ConversationTab`
22
+ object, so `closeTab` (and LRU eviction) deleted them along with the tab. When
23
+ an agent closed one browser and opened another in the same conversation, the
24
+ new tab had empty listener sets — the host's live-viewport subscription was
25
+ silently orphaned, so the second session's `browser:status` / frames never
26
+ reached the client until it reconnected (the "pill/sheet doesn't appear, or is
27
+ left over after close, until I navigate away and back" bug). Listeners now live
28
+ in session-level maps keyed by conversationId, independent of any tab's
29
+ lifetime; they persist until the host unsubscribes, and `emitStatus` delivers
30
+ the final `active:false` on close before the tab is removed.
31
+
3
32
  ## 0.6.25
4
33
 
5
34
  ### Patch Changes
package/dist/index.d.ts CHANGED
@@ -80,6 +80,8 @@ declare class BrowserSession {
80
80
  private readonly sessionId;
81
81
  private manager;
82
82
  private readonly tabs;
83
+ private readonly frameListeners;
84
+ private readonly statusListeners;
83
85
  private _contextStealthInstalled;
84
86
  private readonly _uaOverrideApplied;
85
87
  private _lockQueue;
@@ -142,6 +144,21 @@ declare class BrowserSession {
142
144
  url: string;
143
145
  title: string;
144
146
  }>;
147
+ /**
148
+ * Fetch a file using the page's own (logged-in) session and return its
149
+ * bytes, so the host can persist it (e.g. to a VFS). `url` defaults to the
150
+ * current page. The fetch runs INSIDE the page via `evaluate`, so it carries
151
+ * the site's cookies and works the same whether the browser is local or a
152
+ * remote/cloud provider (the bytes come back over CDP). Because it's a page
153
+ * `fetch`, same-origin and CORS-permissive URLs work; a cross-origin URL the
154
+ * site doesn't allow CORS for will fail — navigate to the file first (so it's
155
+ * same-origin) or pass its direct URL while on that site.
156
+ */
157
+ download(conversationId: string, url?: string): Promise<{
158
+ data: Buffer;
159
+ contentType: string;
160
+ filename: string;
161
+ }>;
145
162
  scroll(conversationId: string, direction: "up" | "down", amount?: number): Promise<void>;
146
163
  clickText(conversationId: string, text: string, exact?: boolean): Promise<void>;
147
164
  executeJs(conversationId: string, script: string): Promise<unknown>;
package/dist/index.js CHANGED
@@ -244,12 +244,43 @@ var SAME_TAB_INIT_SCRIPT = `
244
244
  } catch {}
245
245
  })();
246
246
  `;
247
+ function sanitizeName(name) {
248
+ const cleaned = name.trim().replace(/[/\\]/g, "_").replace(/\0/g, "");
249
+ return cleaned || "download";
250
+ }
251
+ function filenameFromDownload(disposition, url) {
252
+ const star = /filename\*=(?:UTF-8'')?["']?([^"';]+)/i.exec(disposition);
253
+ if (star?.[1]) {
254
+ try {
255
+ return sanitizeName(decodeURIComponent(star[1]));
256
+ } catch {
257
+ return sanitizeName(star[1]);
258
+ }
259
+ }
260
+ const plain = /filename=["']?([^"';]+)/i.exec(disposition);
261
+ if (plain?.[1]) return sanitizeName(plain[1]);
262
+ try {
263
+ const base = new URL(url).pathname.split("/").filter(Boolean).pop();
264
+ if (base) return sanitizeName(decodeURIComponent(base));
265
+ } catch {
266
+ }
267
+ return "download";
268
+ }
247
269
  var BrowserSession = class {
248
270
  config;
249
271
  sessionId;
250
272
  manager;
251
273
  // Tab management: conversationId → tab state
252
274
  tabs = /* @__PURE__ */ new Map();
275
+ // Viewport listeners, keyed by conversationId and kept SEPARATE from the
276
+ // tab. A host (e.g. a live iOS viewport) subscribes once; its listeners must
277
+ // outlive any individual tab so that closing one browser and opening another
278
+ // in the same conversation — or an LRU tab eviction — doesn't silently
279
+ // orphan the subscription (the symptom: the second session's status/frames
280
+ // never reach the client until it reconnects). Tabs come and go; listeners
281
+ // persist until the host unsubscribes.
282
+ frameListeners = /* @__PURE__ */ new Map();
283
+ statusListeners = /* @__PURE__ */ new Map();
253
284
  // Whether context-level stealth init script has been installed
254
285
  _contextStealthInstalled = false;
255
286
  // Track which tabs have had per-page CDP UA override applied
@@ -549,13 +580,10 @@ var BrowserSession = class {
549
580
  if (realTabs > 0) {
550
581
  await mgr.newTab();
551
582
  }
552
- const existing = tab;
553
583
  tab = {
554
584
  tabIndex: mgr.getActiveIndex(),
555
585
  active: true,
556
- lastUsed: Date.now(),
557
- frameListeners: existing?.frameListeners ?? /* @__PURE__ */ new Set(),
558
- statusListeners: existing?.statusListeners ?? /* @__PURE__ */ new Set()
586
+ lastUsed: Date.now()
559
587
  };
560
588
  this.tabs.set(conversationId, tab);
561
589
  } else {
@@ -687,6 +715,55 @@ var BrowserSession = class {
687
715
  this.unlock();
688
716
  }
689
717
  }
718
+ /**
719
+ * Fetch a file using the page's own (logged-in) session and return its
720
+ * bytes, so the host can persist it (e.g. to a VFS). `url` defaults to the
721
+ * current page. The fetch runs INSIDE the page via `evaluate`, so it carries
722
+ * the site's cookies and works the same whether the browser is local or a
723
+ * remote/cloud provider (the bytes come back over CDP). Because it's a page
724
+ * `fetch`, same-origin and CORS-permissive URLs work; a cross-origin URL the
725
+ * site doesn't allow CORS for will fail — navigate to the file first (so it's
726
+ * same-origin) or pass its direct URL while on that site.
727
+ */
728
+ async download(conversationId, url) {
729
+ await this.lock();
730
+ try {
731
+ const mgr = await this.ensureManager();
732
+ await this.switchToConversation(mgr, conversationId);
733
+ const page = mgr.getPage();
734
+ const target = url && url.trim() ? url.trim() : page.url();
735
+ if (!target || target === "about:blank") {
736
+ throw new Error("no URL to download (open the file's page first, or pass a url)");
737
+ }
738
+ const MAX_BYTES = 25 * 1024 * 1024;
739
+ const expr = `(async () => {
740
+ const res = await fetch(${JSON.stringify(target)}, { credentials: "include" });
741
+ if (!res.ok) throw new Error("HTTP " + res.status + " " + res.statusText);
742
+ const buf = new Uint8Array(await res.arrayBuffer());
743
+ if (buf.length > ${MAX_BYTES}) throw new Error("file too large: " + buf.length + " bytes (max ${MAX_BYTES})");
744
+ let bin = "";
745
+ const CH = 0x8000;
746
+ for (let i = 0; i < buf.length; i += CH) {
747
+ bin += String.fromCharCode.apply(null, buf.subarray(i, i + CH));
748
+ }
749
+ return {
750
+ base64: btoa(bin),
751
+ contentType: res.headers.get("content-type") || "",
752
+ disposition: res.headers.get("content-disposition") || "",
753
+ finalUrl: res.url || ${JSON.stringify(target)},
754
+ };
755
+ })()`;
756
+ const r = await page.evaluate(expr);
757
+ const data = Buffer.from(r.base64, "base64");
758
+ return {
759
+ data,
760
+ contentType: r.contentType,
761
+ filename: filenameFromDownload(r.disposition, r.finalUrl)
762
+ };
763
+ } finally {
764
+ this.unlock();
765
+ }
766
+ }
690
767
  async scroll(conversationId, direction, amount) {
691
768
  await this.lock();
692
769
  try {
@@ -829,15 +906,15 @@ var BrowserSession = class {
829
906
  (frame) => {
830
907
  const cid = this._screencastConversation;
831
908
  if (!cid) return;
832
- const t = this.tabs.get(cid);
833
- if (!t) return;
909
+ const listeners = this.frameListeners.get(cid);
910
+ if (!listeners || listeners.size === 0) return;
834
911
  const browserFrame = {
835
912
  data: frame.data,
836
913
  width: frame.metadata.deviceWidth,
837
914
  height: frame.metadata.deviceHeight,
838
915
  timestamp: Date.now()
839
916
  };
840
- for (const listener of t.frameListeners) {
917
+ for (const listener of listeners) {
841
918
  try {
842
919
  listener(browserFrame);
843
920
  } catch {
@@ -865,29 +942,37 @@ var BrowserSession = class {
865
942
  // Per-conversation event listeners
866
943
  // -----------------------------------------------------------------------
867
944
  onFrame(conversationId, listener) {
868
- let tab = this.tabs.get(conversationId);
869
- if (!tab) {
870
- tab = { tabIndex: -1, active: false, lastUsed: Date.now(), frameListeners: /* @__PURE__ */ new Set(), statusListeners: /* @__PURE__ */ new Set() };
871
- this.tabs.set(conversationId, tab);
945
+ let set = this.frameListeners.get(conversationId);
946
+ if (!set) {
947
+ set = /* @__PURE__ */ new Set();
948
+ this.frameListeners.set(conversationId, set);
872
949
  }
873
- tab.frameListeners.add(listener);
950
+ set.add(listener);
874
951
  return () => {
875
- tab.frameListeners.delete(listener);
876
- if (tab.frameListeners.size === 0 && this._screencastConversation === conversationId) {
877
- this.stopScreencast().catch(() => {
878
- });
952
+ const s = this.frameListeners.get(conversationId);
953
+ if (!s) return;
954
+ s.delete(listener);
955
+ if (s.size === 0) {
956
+ this.frameListeners.delete(conversationId);
957
+ if (this._screencastConversation === conversationId) {
958
+ this.stopScreencast().catch(() => {
959
+ });
960
+ }
879
961
  }
880
962
  };
881
963
  }
882
964
  onStatus(conversationId, listener) {
883
- let tab = this.tabs.get(conversationId);
884
- if (!tab) {
885
- tab = { tabIndex: -1, active: false, lastUsed: Date.now(), frameListeners: /* @__PURE__ */ new Set(), statusListeners: /* @__PURE__ */ new Set() };
886
- this.tabs.set(conversationId, tab);
965
+ let set = this.statusListeners.get(conversationId);
966
+ if (!set) {
967
+ set = /* @__PURE__ */ new Set();
968
+ this.statusListeners.set(conversationId, set);
887
969
  }
888
- tab.statusListeners.add(listener);
970
+ set.add(listener);
889
971
  return () => {
890
- tab.statusListeners.delete(listener);
972
+ const s = this.statusListeners.get(conversationId);
973
+ if (!s) return;
974
+ s.delete(listener);
975
+ if (s.size === 0) this.statusListeners.delete(conversationId);
891
976
  };
892
977
  }
893
978
  // -----------------------------------------------------------------------
@@ -1087,8 +1172,9 @@ var BrowserSession = class {
1087
1172
  url: tab?.url,
1088
1173
  interactionAllowed: tab?.active ?? false
1089
1174
  };
1090
- if (tab) {
1091
- for (const listener of tab.statusListeners) {
1175
+ const listeners = this.statusListeners.get(conversationId);
1176
+ if (listeners) {
1177
+ for (const listener of listeners) {
1092
1178
  try {
1093
1179
  listener(status);
1094
1180
  } catch {
@@ -1246,6 +1332,47 @@ function createBrowserTools(getSession) {
1246
1332
  return { url: result.url, title: result.title, text: result.text };
1247
1333
  }
1248
1334
  },
1335
+ {
1336
+ name: "browser_download",
1337
+ description: "Download a file from the browser and save it into the user's virtual filesystem (VFS). Fetches the file using the browser's logged-in session, so it works for files behind a login \u2014 use it to keep a PDF, CSV, image, or other file the page offers. It fetches `url` (or the current page if you omit it), so for a download link on the page, grab its href from a snapshot first; for a file that opens in the browser, navigate to it and call this with no url. The fetch runs in the page, so the url should be same-origin with the current page (navigate to the file's site first if needed). Returns the saved VFS path and byte size \u2014 the bytes go straight to the VFS, not through the chat.",
1338
+ inputSchema: {
1339
+ type: "object",
1340
+ properties: {
1341
+ path: {
1342
+ type: "string",
1343
+ description: "Destination in the VFS. Include a filename (e.g. /downloads/report.pdf); parent folders are created as needed. End with '/' (e.g. /downloads/) to keep the file's own name."
1344
+ },
1345
+ url: {
1346
+ type: "string",
1347
+ description: "URL of the file to download. Optional \u2014 defaults to the current page's URL."
1348
+ }
1349
+ },
1350
+ required: ["path"]
1351
+ },
1352
+ handler: async (input, context) => {
1353
+ const session = getSession();
1354
+ const vfs = context.vfs;
1355
+ if (!vfs) throw new Error("VFS is not available in this environment");
1356
+ const dest0 = String(input.path ?? "").trim();
1357
+ if (!dest0) throw new Error("path is required");
1358
+ const url = input.url != null ? String(input.url) : void 0;
1359
+ const { data, contentType, filename } = await session.download(
1360
+ context.conversationId ?? "__default__",
1361
+ url
1362
+ );
1363
+ let dest = dest0.startsWith("/") ? dest0 : `/${dest0}`;
1364
+ if (dest.endsWith("/")) dest = `${dest}${filename}`;
1365
+ const slash = dest.lastIndexOf("/");
1366
+ if (slash > 0) {
1367
+ try {
1368
+ await vfs.mkdir(dest.slice(0, slash), { recursive: true });
1369
+ } catch {
1370
+ }
1371
+ }
1372
+ await vfs.writeFile(dest, new Uint8Array(data), contentType || void 0);
1373
+ return { path: dest, bytes: data.length, ...contentType ? { contentType } : {} };
1374
+ }
1375
+ },
1249
1376
  {
1250
1377
  name: "browser_screenshot",
1251
1378
  description: "Take a screenshot of the current page. Returns the image so you can see exactly what the page looks like. Use this when you need to see visual layout, verify actions, or read content that isn't in the accessibility tree.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@poncho-ai/browser",
3
- "version": "0.6.25",
3
+ "version": "0.7.0",
4
4
  "description": "Browser automation for Poncho agents, powered by agent-browser",
5
5
  "repository": {
6
6
  "type": "git",
package/src/session.ts CHANGED
@@ -144,8 +144,29 @@ interface ConversationTab {
144
144
  url?: string;
145
145
  active: boolean;
146
146
  lastUsed: number;
147
- frameListeners: Set<FrameListener>;
148
- statusListeners: Set<StatusListener>;
147
+ }
148
+
149
+ /** Strip path separators / nulls so a derived name can't escape its folder. */
150
+ function sanitizeName(name: string): string {
151
+ const cleaned = name.trim().replace(/[/\\]/g, "_").replace(/\0/g, "");
152
+ return cleaned || "download";
153
+ }
154
+
155
+ /** Derive a filename from a Content-Disposition header, falling back to the
156
+ * URL's last path segment, then a generic "download". */
157
+ function filenameFromDownload(disposition: string, url: string): string {
158
+ const star = /filename\*=(?:UTF-8'')?["']?([^"';]+)/i.exec(disposition);
159
+ if (star?.[1]) {
160
+ try { return sanitizeName(decodeURIComponent(star[1])); }
161
+ catch { return sanitizeName(star[1]); }
162
+ }
163
+ const plain = /filename=["']?([^"';]+)/i.exec(disposition);
164
+ if (plain?.[1]) return sanitizeName(plain[1]);
165
+ try {
166
+ const base = new URL(url).pathname.split("/").filter(Boolean).pop();
167
+ if (base) return sanitizeName(decodeURIComponent(base));
168
+ } catch { /* not a parseable URL */ }
169
+ return "download";
149
170
  }
150
171
 
151
172
  export class BrowserSession {
@@ -156,6 +177,16 @@ export class BrowserSession {
156
177
  // Tab management: conversationId → tab state
157
178
  private readonly tabs = new Map<string, ConversationTab>();
158
179
 
180
+ // Viewport listeners, keyed by conversationId and kept SEPARATE from the
181
+ // tab. A host (e.g. a live iOS viewport) subscribes once; its listeners must
182
+ // outlive any individual tab so that closing one browser and opening another
183
+ // in the same conversation — or an LRU tab eviction — doesn't silently
184
+ // orphan the subscription (the symptom: the second session's status/frames
185
+ // never reach the client until it reconnects). Tabs come and go; listeners
186
+ // persist until the host unsubscribes.
187
+ private readonly frameListeners = new Map<string, Set<FrameListener>>();
188
+ private readonly statusListeners = new Map<string, Set<StatusListener>>();
189
+
159
190
  // Whether context-level stealth init script has been installed
160
191
  private _contextStealthInstalled = false;
161
192
 
@@ -500,13 +531,10 @@ export class BrowserSession {
500
531
  if (realTabs > 0) {
501
532
  await mgr.newTab();
502
533
  }
503
- const existing = tab;
504
534
  tab = {
505
535
  tabIndex: mgr.getActiveIndex(),
506
536
  active: true,
507
537
  lastUsed: Date.now(),
508
- frameListeners: existing?.frameListeners ?? new Set(),
509
- statusListeners: existing?.statusListeners ?? new Set(),
510
538
  };
511
539
  this.tabs.set(conversationId, tab);
512
540
  } else {
@@ -650,6 +678,66 @@ export class BrowserSession {
650
678
  }
651
679
  }
652
680
 
681
+ /**
682
+ * Fetch a file using the page's own (logged-in) session and return its
683
+ * bytes, so the host can persist it (e.g. to a VFS). `url` defaults to the
684
+ * current page. The fetch runs INSIDE the page via `evaluate`, so it carries
685
+ * the site's cookies and works the same whether the browser is local or a
686
+ * remote/cloud provider (the bytes come back over CDP). Because it's a page
687
+ * `fetch`, same-origin and CORS-permissive URLs work; a cross-origin URL the
688
+ * site doesn't allow CORS for will fail — navigate to the file first (so it's
689
+ * same-origin) or pass its direct URL while on that site.
690
+ */
691
+ async download(
692
+ conversationId: string,
693
+ url?: string,
694
+ ): Promise<{ data: Buffer; contentType: string; filename: string }> {
695
+ await this.lock();
696
+ try {
697
+ const mgr = await this.ensureManager();
698
+ await this.switchToConversation(mgr, conversationId);
699
+ const page = mgr.getPage();
700
+ const target = url && url.trim() ? url.trim() : page.url();
701
+ if (!target || target === "about:blank") {
702
+ throw new Error("no URL to download (open the file's page first, or pass a url)");
703
+ }
704
+ const MAX_BYTES = 25 * 1024 * 1024;
705
+ // Build the in-page fetch. JSON.stringify safely escapes the URL into the
706
+ // evaluated source. Base64 in-page so the bytes survive the JSON channel.
707
+ const expr = `(async () => {
708
+ const res = await fetch(${JSON.stringify(target)}, { credentials: "include" });
709
+ if (!res.ok) throw new Error("HTTP " + res.status + " " + res.statusText);
710
+ const buf = new Uint8Array(await res.arrayBuffer());
711
+ if (buf.length > ${MAX_BYTES}) throw new Error("file too large: " + buf.length + " bytes (max ${MAX_BYTES})");
712
+ let bin = "";
713
+ const CH = 0x8000;
714
+ for (let i = 0; i < buf.length; i += CH) {
715
+ bin += String.fromCharCode.apply(null, buf.subarray(i, i + CH));
716
+ }
717
+ return {
718
+ base64: btoa(bin),
719
+ contentType: res.headers.get("content-type") || "",
720
+ disposition: res.headers.get("content-disposition") || "",
721
+ finalUrl: res.url || ${JSON.stringify(target)},
722
+ };
723
+ })()`;
724
+ const r = (await page.evaluate(expr)) as {
725
+ base64: string;
726
+ contentType: string;
727
+ disposition: string;
728
+ finalUrl: string;
729
+ };
730
+ const data = Buffer.from(r.base64, "base64");
731
+ return {
732
+ data,
733
+ contentType: r.contentType,
734
+ filename: filenameFromDownload(r.disposition, r.finalUrl),
735
+ };
736
+ } finally {
737
+ this.unlock();
738
+ }
739
+ }
740
+
653
741
  async scroll(conversationId: string, direction: "up" | "down", amount?: number): Promise<void> {
654
742
  await this.lock();
655
743
  try {
@@ -799,15 +887,15 @@ export class BrowserSession {
799
887
  (frame) => {
800
888
  const cid = this._screencastConversation;
801
889
  if (!cid) return;
802
- const t = this.tabs.get(cid);
803
- if (!t) return;
890
+ const listeners = this.frameListeners.get(cid);
891
+ if (!listeners || listeners.size === 0) return;
804
892
  const browserFrame: BrowserFrame = {
805
893
  data: frame.data,
806
894
  width: frame.metadata.deviceWidth,
807
895
  height: frame.metadata.deviceHeight,
808
896
  timestamp: Date.now(),
809
897
  };
810
- for (const listener of t.frameListeners) {
898
+ for (const listener of listeners) {
811
899
  try { listener(browserFrame); } catch { /* */ }
812
900
  }
813
901
  },
@@ -835,28 +923,38 @@ export class BrowserSession {
835
923
  // -----------------------------------------------------------------------
836
924
 
837
925
  onFrame(conversationId: string, listener: FrameListener): () => void {
838
- let tab = this.tabs.get(conversationId);
839
- if (!tab) {
840
- tab = { tabIndex: -1, active: false, lastUsed: Date.now(), frameListeners: new Set(), statusListeners: new Set() };
841
- this.tabs.set(conversationId, tab);
926
+ let set = this.frameListeners.get(conversationId);
927
+ if (!set) {
928
+ set = new Set();
929
+ this.frameListeners.set(conversationId, set);
842
930
  }
843
- tab.frameListeners.add(listener);
931
+ set.add(listener);
844
932
  return () => {
845
- tab!.frameListeners.delete(listener);
846
- if (tab!.frameListeners.size === 0 && this._screencastConversation === conversationId) {
847
- this.stopScreencast().catch(() => {});
933
+ const s = this.frameListeners.get(conversationId);
934
+ if (!s) return;
935
+ s.delete(listener);
936
+ if (s.size === 0) {
937
+ this.frameListeners.delete(conversationId);
938
+ if (this._screencastConversation === conversationId) {
939
+ this.stopScreencast().catch(() => {});
940
+ }
848
941
  }
849
942
  };
850
943
  }
851
944
 
852
945
  onStatus(conversationId: string, listener: StatusListener): () => void {
853
- let tab = this.tabs.get(conversationId);
854
- if (!tab) {
855
- tab = { tabIndex: -1, active: false, lastUsed: Date.now(), frameListeners: new Set(), statusListeners: new Set() };
856
- this.tabs.set(conversationId, tab);
946
+ let set = this.statusListeners.get(conversationId);
947
+ if (!set) {
948
+ set = new Set();
949
+ this.statusListeners.set(conversationId, set);
857
950
  }
858
- tab.statusListeners.add(listener);
859
- return () => { tab!.statusListeners.delete(listener); };
951
+ set.add(listener);
952
+ return () => {
953
+ const s = this.statusListeners.get(conversationId);
954
+ if (!s) return;
955
+ s.delete(listener);
956
+ if (s.size === 0) this.statusListeners.delete(conversationId);
957
+ };
860
958
  }
861
959
 
862
960
  // -----------------------------------------------------------------------
@@ -1068,8 +1166,12 @@ export class BrowserSession {
1068
1166
  url: tab?.url,
1069
1167
  interactionAllowed: tab?.active ?? false,
1070
1168
  };
1071
- if (tab) {
1072
- for (const listener of tab.statusListeners) {
1169
+ // Listeners live at the session level, so a close (which deletes the tab)
1170
+ // still delivers the final active:false to the host before the tab is
1171
+ // gone — and a later reopen reuses the same subscription.
1172
+ const listeners = this.statusListeners.get(conversationId);
1173
+ if (listeners) {
1174
+ for (const listener of listeners) {
1073
1175
  try { listener(status); } catch { /* */ }
1074
1176
  }
1075
1177
  }
package/src/tools.ts CHANGED
@@ -176,6 +176,55 @@ export function createBrowserTools(
176
176
  return { url: result.url, title: result.title, text: result.text };
177
177
  },
178
178
  },
179
+ {
180
+ name: "browser_download",
181
+ description:
182
+ "Download a file from the browser and save it into the user's virtual filesystem (VFS). " +
183
+ "Fetches the file using the browser's logged-in session, so it works for files behind a login — " +
184
+ "use it to keep a PDF, CSV, image, or other file the page offers. " +
185
+ "It fetches `url` (or the current page if you omit it), so for a download link on the page, grab its href from a snapshot first; " +
186
+ "for a file that opens in the browser, navigate to it and call this with no url. " +
187
+ "The fetch runs in the page, so the url should be same-origin with the current page (navigate to the file's site first if needed). " +
188
+ "Returns the saved VFS path and byte size — the bytes go straight to the VFS, not through the chat.",
189
+ inputSchema: {
190
+ type: "object",
191
+ properties: {
192
+ path: {
193
+ type: "string",
194
+ description:
195
+ "Destination in the VFS. Include a filename (e.g. /downloads/report.pdf); parent folders are created as needed. " +
196
+ "End with '/' (e.g. /downloads/) to keep the file's own name.",
197
+ },
198
+ url: {
199
+ type: "string",
200
+ description:
201
+ "URL of the file to download. Optional — defaults to the current page's URL.",
202
+ },
203
+ },
204
+ required: ["path"],
205
+ },
206
+ handler: async (input: BrowserToolInput, context: ToolContext) => {
207
+ const session = getSession();
208
+ const vfs = context.vfs;
209
+ if (!vfs) throw new Error("VFS is not available in this environment");
210
+ const dest0 = String(input.path ?? "").trim();
211
+ if (!dest0) throw new Error("path is required");
212
+ const url = input.url != null ? String(input.url) : undefined;
213
+ const { data, contentType, filename } = await session.download(
214
+ context.conversationId ?? "__default__",
215
+ url,
216
+ );
217
+ // A trailing slash (or bare folder) means "use the file's own name".
218
+ let dest = dest0.startsWith("/") ? dest0 : `/${dest0}`;
219
+ if (dest.endsWith("/")) dest = `${dest}${filename}`;
220
+ const slash = dest.lastIndexOf("/");
221
+ if (slash > 0) {
222
+ try { await vfs.mkdir(dest.slice(0, slash), { recursive: true }); } catch { /* exists */ }
223
+ }
224
+ await vfs.writeFile(dest, new Uint8Array(data), contentType || undefined);
225
+ return { path: dest, bytes: data.length, ...(contentType ? { contentType } : {}) };
226
+ },
227
+ },
179
228
  {
180
229
  name: "browser_screenshot",
181
230
  description: