agentbrowse 0.0.3 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -21,7 +21,8 @@ agentbrowse read --page 2 # next chunk if it was truncated
21
21
  ## Rules that make this reliable
22
22
 
23
23
  - **Read with `--json`** when you need to parse: `read --json` gives `{ title, markdown, page, totalPages, state }`. Every command's text output ends with a `url | title | links` footer so you always know where you are.
24
- - **Targeting `click`/`type`**, in priority order: (1) visible text `click "Sign in"`; (2) a number from the last `links`/`find` `click 2`; (3) a CSS selector`click "button.primary"`. Bare words are treated as visible text; use explicit CSS for elements without text.
24
+ - **Prefer `snapshot` for acting.** `snapshot` returns every actionable element as `[ref] role "name" (state)` e.g. `[3] button "Search"`. Then act by ref: `click 3`, `type 2 "shoes"`. Refs resolve by role+name, so they survive CSS/DOM changesfar more reliable than selectors. If the page changed and a ref is stale, the tool returns a **fresh snapshot** in the error (`stale_ref`); just re-pick from it.
25
+ - **Other `click`/`type` targeting**, in priority order: (1) a `snapshot` ref — `click 3`; (2) visible text — `click "Sign in"`; (3) a number from the last `links`/`find`; (4) a CSS selector — `click "button.primary"`. Bare words are visible text; use explicit CSS for elements without text.
25
26
  - **Forms:** `fill -f email=me@x.com -f password=...` then `submit`. Or `type <field> <text>` for one field. Fields match by `name`, or pass a CSS selector.
26
27
  - **Truncation:** `read` is capped (`--max-chars`, default 8000). If `truncated`, request `--page 2`, etc. Don't assume you've seen the whole page from page 1.
27
28
  - **Errors:** non-zero exit codes mean failure; the reason is on **stderr** as `{ "error": { code, message } }`. `4` = target not found (re-run `links`/`find` to get fresh numbers), `3` = navigation problem, `2` = bad usage, `5` = daemon problem.
package/README.md CHANGED
@@ -41,8 +41,9 @@ Sessions are isolated by `--session <id>` (default `default`), each with its own
41
41
  | `open <url>` | Navigate the session to a URL |
42
42
  | `read [url]` | Current page (or open `<url>` first) as token-bounded markdown (`--max-chars`, `--page`) |
43
43
  | `links [url]` | Numbered, followable links (`--filter`) |
44
- | `find <text>` | Locate elements by visible text; numbers reusable by `click` |
45
- | `click <target>` | Click by visible text, a number from `links`/`find`, or a CSS selector |
44
+ | `snapshot [url]` | **Accessibility-tree view**: every actionable element with a stable `[ref]`, role, name, state (`--filter`, `--max`, `--json`). The robust way to act |
45
+ | `find <text>` | Locate elements by visible text (falls back to accessible name); numbers reusable by `click` |
46
+ | `click <target>` | Click by a `snapshot` ref (robust), visible text, a `links`/`find` number, or a CSS selector |
46
47
  | `type <field> <text>` | Type into a field (CSS selector or bare `name`) |
47
48
  | `fill -f name=value …` | Fill form fields |
48
49
  | `submit [form]` | Submit the current form |
package/dist/cli.js CHANGED
@@ -3,6 +3,52 @@
3
3
  // src/cli.ts
4
4
  import { Command } from "commander";
5
5
 
6
+ // package.json
7
+ var package_default = {
8
+ name: "agentbrowse",
9
+ version: "0.1.1",
10
+ description: "Agent-browser CLI: drive any website from the terminal.",
11
+ type: "module",
12
+ bin: {
13
+ agentbrowse: "dist/cli.js"
14
+ },
15
+ files: [
16
+ "dist",
17
+ "AGENTS.md"
18
+ ],
19
+ engines: {
20
+ node: ">=20"
21
+ },
22
+ scripts: {
23
+ build: "tsup",
24
+ dev: "tsx src/cli.ts",
25
+ test: "vitest run",
26
+ "test:watch": "vitest",
27
+ prepublishOnly: "npm run build && npm test"
28
+ },
29
+ dependencies: {
30
+ "@mozilla/readability": "^0.6.0",
31
+ ajv: "^8.17.1",
32
+ commander: "^12.1.0",
33
+ jsdom: "^24.1.0",
34
+ playwright: "^1.45.0",
35
+ turndown: "^7.2.0"
36
+ },
37
+ devDependencies: {
38
+ "@types/jsdom": "^21.1.0",
39
+ "@types/node": "^20.14.0",
40
+ "@types/turndown": "^5.0.4",
41
+ tsup: "^8.1.0",
42
+ tsx: "^4.16.0",
43
+ typescript: "^5.5.0",
44
+ vitest: "^2.0.0"
45
+ },
46
+ repository: {
47
+ type: "git",
48
+ url: "git+https://github.com/mandarwagh9/agentbrowse.git"
49
+ }
50
+ };
51
+
6
52
  // src/daemon/client.ts
7
53
  import net from "net";
8
54
  import path2 from "path";
@@ -134,6 +180,7 @@ var DAEMON_CODE_EXIT = {
134
180
  bad_args: EXIT.usage,
135
181
  unknown_cmd: EXIT.usage,
136
182
  target_not_found: EXIT.targetNotFound,
183
+ stale_ref: EXIT.targetNotFound,
137
184
  exec_error: EXIT.navigation
138
185
  };
139
186
  function fromDaemon(res) {
@@ -228,6 +275,31 @@ async function runLinks(opts) {
228
275
  return data.links.map((l) => `${l.n}. ${l.text} -> ${l.href}`).join("\n");
229
276
  }
230
277
 
278
+ // src/commands/snapshot.ts
279
+ async function runSnapshot(opts) {
280
+ if (opts.url) {
281
+ const o = await sendRequest(opts.session, { id: nextId(), cmd: "open", args: { url: opts.url } });
282
+ if (!o.ok) throw fromDaemon(o);
283
+ }
284
+ const res = await sendRequest(opts.session, {
285
+ id: nextId(),
286
+ cmd: "snapshot",
287
+ args: { filter: opts.filter, max: opts.max }
288
+ });
289
+ if (!res.ok) throw fromDaemon(res);
290
+ const d = res.data;
291
+ if (opts.json) return JSON.stringify(d, null, 2);
292
+ const lines = d.elements.map((e) => {
293
+ const state = [e.disabled ? "disabled" : "", e.checked ? "checked" : ""].filter(Boolean).join(" ");
294
+ return `[${e.ref}] ${e.role.padEnd(9)} "${e.name}"${e.href ? ` -> ${e.href}` : ""}${state ? ` (${state})` : ""}`;
295
+ });
296
+ const more = d.total > d.elements.length ? `
297
+ (+${d.total - d.elements.length} more \u2014 narrow with --filter)` : "";
298
+ return `${lines.join("\n") || "(no actionable elements)"}${more}
299
+ ---
300
+ snapshot v${d.version} | url: ${d.url} | ${d.elements.length} actionable elements`;
301
+ }
302
+
231
303
  // src/commands/stop.ts
232
304
  async function runStop(session) {
233
305
  try {
@@ -568,6 +640,60 @@ function extractLinks(html, baseUrl, filter) {
568
640
  return out;
569
641
  }
570
642
 
643
+ // src/core/snapshot.ts
644
+ var ACTIONABLE = /* @__PURE__ */ new Set([
645
+ "link",
646
+ "button",
647
+ "textbox",
648
+ "searchbox",
649
+ "checkbox",
650
+ "radio",
651
+ "combobox",
652
+ "listbox",
653
+ "menuitem",
654
+ "menuitemcheckbox",
655
+ "menuitemradio",
656
+ "tab",
657
+ "switch",
658
+ "slider",
659
+ "spinbutton",
660
+ "option"
661
+ ]);
662
+ var LINE = /^\s*-\s+([a-z][\w-]*)(?:\s+"((?:[^"\\]|\\.)*)")?(?:\s+\[([^\]]*)\])?:?\s*$/;
663
+ var URL2 = /^\s*-\s+\/url:\s*"?([^"\n]*?)"?\s*$/;
664
+ function parseAriaSnapshot(yaml) {
665
+ const out = [];
666
+ const counts = /* @__PURE__ */ new Map();
667
+ for (const raw of yaml.split("\n")) {
668
+ const url = raw.match(URL2);
669
+ if (url) {
670
+ const last = out[out.length - 1];
671
+ if (last && last.role === "link") last.href = url[1];
672
+ continue;
673
+ }
674
+ const m = raw.match(LINE);
675
+ if (!m) continue;
676
+ const role = m[1];
677
+ const name = (m[2] ?? "").replace(/\\"/g, '"');
678
+ const states = (m[3] ?? "").split(/[\s,]+/).filter(Boolean);
679
+ const actionable = ACTIONABLE.has(role);
680
+ const el = { role, name, actionable, nth: 0 };
681
+ for (const s of states) {
682
+ if (s === "disabled") el.disabled = true;
683
+ else if (s === "checked") el.checked = true;
684
+ else if (s.startsWith("level=")) el.level = Number(s.slice(6));
685
+ }
686
+ if (actionable) {
687
+ const key = `${role} ${name}`;
688
+ const n = counts.get(key) ?? 0;
689
+ el.nth = n;
690
+ counts.set(key, n + 1);
691
+ }
692
+ out.push(el);
693
+ }
694
+ return out;
695
+ }
696
+
571
697
  // src/core/target.ts
572
698
  function looksLikeSelector(s) {
573
699
  const t = s.trim();
@@ -599,6 +725,39 @@ async function startDaemon(sessionId, opts = {}) {
599
725
  const context = await browser.newContext(statePath ? { storageState: statePath } : {});
600
726
  const page = await context.newPage();
601
727
  let lastRefs = [];
728
+ let snapshotVersion = 0;
729
+ let snapshotRefs = /* @__PURE__ */ new Map();
730
+ page.on("framenavigated", (f) => {
731
+ if (f === page.mainFrame()) snapshotVersion++;
732
+ });
733
+ async function freshSnapshot(filter, max = 150) {
734
+ const yaml = await page.locator("body").ariaSnapshot();
735
+ const all = parseAriaSnapshot(yaml).filter((e) => e.actionable);
736
+ const f = filter?.toLowerCase();
737
+ const shown = all.filter((e) => !f || `${e.role} ${e.name}`.toLowerCase().includes(f)).slice(0, max);
738
+ snapshotRefs = /* @__PURE__ */ new Map();
739
+ const elements = shown.map((e, i) => {
740
+ const ref = i + 1;
741
+ snapshotRefs.set(ref, { role: e.role, name: e.name, nth: e.nth });
742
+ return { ref, role: e.role, name: e.name, href: e.href, disabled: e.disabled, checked: e.checked };
743
+ });
744
+ return { elements, total: all.length };
745
+ }
746
+ async function staleRef(req) {
747
+ const fresh = await freshSnapshot();
748
+ return {
749
+ id: req.id,
750
+ ok: false,
751
+ error: { code: "stale_ref", message: "snapshot is stale; use the fresh refs" },
752
+ data: { version: snapshotVersion, url: page.url(), elements: fresh.elements }
753
+ };
754
+ }
755
+ function refLocator(target) {
756
+ if (!/^\d+$/.test(target)) return null;
757
+ const entry = snapshotRefs.get(parseInt(target, 10));
758
+ if (!entry) return null;
759
+ return page.getByRole(entry.role, { name: entry.name, exact: true }).nth(entry.nth);
760
+ }
602
761
  const settle = () => page.waitForLoadState("domcontentloaded").catch(() => {
603
762
  });
604
763
  async function dispatch(req) {
@@ -631,7 +790,16 @@ async function startDaemon(sessionId, opts = {}) {
631
790
  case "find": {
632
791
  const text = String(req.args?.text ?? "");
633
792
  if (!text) return err(req, "bad_args", "find requires text");
634
- const matches = await page.getByText(text, { exact: false }).all();
793
+ let matches = await page.getByText(text, { exact: false }).all();
794
+ if (matches.length === 0) {
795
+ const needle = text.toLowerCase();
796
+ const named = parseAriaSnapshot(await page.locator("body").ariaSnapshot()).filter(
797
+ (e) => e.actionable && e.name.toLowerCase().includes(needle)
798
+ );
799
+ matches = named.map(
800
+ (e) => page.getByRole(e.role, { name: e.name, exact: true }).nth(e.nth)
801
+ );
802
+ }
635
803
  lastRefs = matches.map((locator) => ({ kind: "element", locator }));
636
804
  const items = await Promise.all(
637
805
  matches.map(async (loc, i) => ({
@@ -645,6 +813,13 @@ async function startDaemon(sessionId, opts = {}) {
645
813
  case "click": {
646
814
  const target = String(req.args?.target ?? "");
647
815
  if (!target) return err(req, "bad_args", "click requires a target");
816
+ const snapLoc = refLocator(target);
817
+ if (snapLoc) {
818
+ if (await snapLoc.count() === 0) return staleRef(req);
819
+ await snapLoc.click();
820
+ await settle();
821
+ return ok(req, { url: page.url(), title: await page.title() });
822
+ }
648
823
  if (/^\d+$/.test(target)) {
649
824
  const ref = lastRefs[parseInt(target, 10) - 1];
650
825
  if (!ref) return err(req, "target_not_found", `no ref #${target}; run links or find first`);
@@ -666,6 +841,12 @@ async function startDaemon(sessionId, opts = {}) {
666
841
  const selector = String(req.args?.selector ?? "");
667
842
  const text = String(req.args?.text ?? "");
668
843
  if (!selector) return err(req, "bad_args", "type requires a selector");
844
+ const snapField = refLocator(selector);
845
+ if (snapField) {
846
+ if (await snapField.count() === 0) return staleRef(req);
847
+ await snapField.fill(text);
848
+ return ok(req, { typed: selector, url: page.url() });
849
+ }
669
850
  const loc = resolveField(page, selector);
670
851
  if (await loc.count() === 0) return err(req, "target_not_found", `no field matching: ${selector}`);
671
852
  await loc.fill(text);
@@ -695,6 +876,12 @@ async function startDaemon(sessionId, opts = {}) {
695
876
  await settle();
696
877
  return ok(req, { url: page.url(), title: await page.title() });
697
878
  }
879
+ case "snapshot": {
880
+ const filter = req.args?.filter ? String(req.args.filter) : void 0;
881
+ const max = Number(req.args?.max ?? 150);
882
+ const { elements, total } = await freshSnapshot(filter, max);
883
+ return ok(req, { version: snapshotVersion, url: page.url(), elements, total });
884
+ }
698
885
  case "savestate": {
699
886
  const p = ensureStatePath(sessionId);
700
887
  await context.storageState({ path: p });
@@ -779,7 +966,7 @@ async function emit(produce) {
779
966
  }
780
967
  function buildProgram() {
781
968
  const program = new Command();
782
- program.name("agentbrowse").description("Agent-browser CLI: drive any website from the terminal.").version("0.0.1").option("--session <id>", "session name (isolated browser + cookies)", "default");
969
+ program.name("agentbrowse").description("Agent-browser CLI: drive any website from the terminal.").version(package_default.version).option("--session <id>", "session name (isolated browser + cookies)", "default");
783
970
  const session = (cmd) => cmd.optsWithGlobals().session;
784
971
  program.command("open").description("Navigate the session's browser to a URL.").argument("<url>", "URL to open").option("--json", "structured JSON output", false).action((url, opts, cmd) => emit(() => runOpen({ session: session(cmd), json: !!opts.json, url })));
785
972
  program.command("read").description("Read the current page (or open <url> first) as token-bounded markdown.").argument("[url]", "optional URL to open before reading").option("--json", "structured JSON output", false).option("--max-chars <n>", "max characters per page", (v) => parseInt(v, 10), 8e3).option("--page <n>", "page number when output is truncated", (v) => parseInt(v, 10), 1).action(
@@ -790,6 +977,9 @@ function buildProgram() {
790
977
  program.command("links").description("List navigable links on the current page (or open <url> first).").argument("[url]", "optional URL to open before listing").option("--json", "structured JSON output", false).option("--filter <text>", "case-insensitive substring filter").action(
791
978
  (url, opts, cmd) => emit(() => runLinks({ session: session(cmd), json: !!opts.json, filter: opts.filter, url }))
792
979
  );
980
+ program.command("snapshot").description("List actionable elements (accessibility tree) with refs for click/type.").argument("[url]", "optional URL to open before snapshotting").option("--json", "structured JSON output", false).option("--filter <text>", "case-insensitive substring filter").option("--max <n>", "max elements", (v) => parseInt(v, 10), 150).action(
981
+ (url, opts, cmd) => emit(() => runSnapshot({ session: session(cmd), json: !!opts.json, filter: opts.filter, max: opts.max, url }))
982
+ );
793
983
  program.command("find").description("Find elements on the current page by visible text (numbers reusable by click).").argument("<text...>", "visible text to search for").option("--json", "structured JSON output", false).action(
794
984
  (text, opts, cmd) => emit(() => runFind({ session: session(cmd), json: !!opts.json, text: text.join(" ") }))
795
985
  );
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentbrowse",
3
- "version": "0.0.3",
3
+ "version": "0.1.1",
4
4
  "description": "Agent-browser CLI: drive any website from the terminal.",
5
5
  "type": "module",
6
6
  "bin": {