github-router 0.3.73 → 0.3.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -487,12 +487,43 @@ async function extractSnapshotLegacy(tabId, opts) {
487
487
  // summary: walk text nodes whose parent is in the viewport; cap
488
488
  // at 20 KB. The model sees what a user could read without
489
489
  // scrolling. Off-screen content remains reachable via mode:"full".
490
- // full: 256 KiB innerText cap (legacy behavior).
490
+ // full: walk all rendered text nodes; cap at 256 KiB.
491
491
  let text = ""
492
492
  if (mode === "full") {
493
+ // Mirror of collectVisibleText(root, cap, "rendered") in
494
+ // src/browser-ext/visible-text.js — executeScript serializes only
495
+ // this func and drops its module closure, so it cannot import that
496
+ // helper; keep the two in sync by hand. We walk text nodes and join
497
+ // with "\n" instead of using document.body.innerText, which glues
498
+ // adjacent inline siblings with no separator
499
+ // (<span>A</span><span>B</span> -> "AB" instead of "A\nB").
493
500
  const MAX_FULL = 256 * 1024
494
- text = document.body ? document.body.innerText : ""
495
- if (text.length > MAX_FULL) text = text.slice(0, MAX_FULL)
501
+ const parts = []
502
+ let total = 0
503
+ const root = document.body || document.documentElement
504
+ if (root) {
505
+ const tw = document.createTreeWalker(root, 4) // NodeFilter.SHOW_TEXT === 4
506
+ let n
507
+ while ((n = tw.nextNode())) {
508
+ const parent = n.parentElement
509
+ if (!parent) continue
510
+ const ptag = parent.tagName ? parent.tagName.toLowerCase() : ""
511
+ if (ptag === "script" || ptag === "style" || ptag === "noscript") continue
512
+ // display:none / detached parents report zero client rects;
513
+ // off-screen (scrolled-out) parents still report rects, so full
514
+ // mode keeps them — matching innerText's "rendered text" intent.
515
+ if (parent.getClientRects().length === 0) continue
516
+ const t = (n.textContent || "").replace(/\s+/g, " ").trim()
517
+ if (!t) continue
518
+ if (total + t.length + 1 > MAX_FULL) {
519
+ parts.push(t.slice(0, Math.max(0, MAX_FULL - total)))
520
+ break
521
+ }
522
+ parts.push(t)
523
+ total += t.length + 1
524
+ }
525
+ }
526
+ text = parts.join("\n")
496
527
  } else {
497
528
  const TEXT_CAP = 20 * 1024
498
529
  const parts = []
@@ -2,7 +2,7 @@
2
2
  "manifest_version": 3,
3
3
  "name": "github-router browser bridge",
4
4
  "short_name": "gh-router-browser",
5
- "version": "0.3.73",
5
+ "version": "0.3.82",
6
6
  "description": "Bridge between Claude (via github-router /mcp) and the browser. Implements tab control, navigation, clicks, form fill, downloads, screenshots, devtools eval. Blocks navigation to chrome://settings.",
7
7
  "key": "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqJElxuBlonBS3TVW9FJN0mGTtShB3L1hoaYf6k39SOr1ogGYmF90EjRxy1i21k9wQQjPf26bcBu/9X67KrQjQV0uB38CaNukgiSeoLjfptN811u+PJHx6BP+jx3Qa6/3VenNPxHC8WEU0GXql8QSjIHEyCwKb6fMASXOK94JyB5Ywov2x8mt/+9ncqBBBMVzf6r5Sagy4PL1XnryLsuADD/vOEkPet8wXgH/Oj7v5tTsQQZ7U1JT51PoDs2BFnXc5v3TkVgZwd32k3ONh+nkDw1Hof+4zwUGOyJE6eMrlYzRlKM4Qxdf9JpavQvqfieAbTRWcyKeclnHeoIfE7cDBQIDAQAB",
8
8
  "background": {
@@ -18,6 +18,8 @@
18
18
  // fails (enterprise DeveloperToolsAvailability=2, DevTools already
19
19
  // open on the tab, etc.).
20
20
 
21
+ import { buildVisibleTextExpr } from "./visible-text.js"
22
+
21
23
  const ELEMENT_CAP = 500 // total elements across all frames
22
24
  const PER_FRAME_CAP = 200 // per-frame element cap
23
25
  const TEXT_CAP = 32 * 1024 // viewport-visible text cap
@@ -89,7 +91,7 @@ export async function extractSnapshotCDP(tabId, opts, deps) {
89
91
  const elements = []
90
92
  const refCounter = { next: 1 }
91
93
  const usedRefs = new Set()
92
- const diag = { frames: frames.length, axNodes: 0, interesting: 0, resolved: 0, withRef: 0 }
94
+ const diag = { frames: frames.length, axNodes: 0, interesting: 0, resolved: 0, withRef: 0, textFramesSkipped: 0 }
93
95
  for (const frame of frames) {
94
96
  if (timedOut) break
95
97
  if (elements.length >= ELEMENT_CAP) {
@@ -119,7 +121,7 @@ export async function extractSnapshotCDP(tabId, opts, deps) {
119
121
  // attach already succeeded so an enable failure is rare.
120
122
  }
121
123
  }
122
- const text = await extractVisibleText(tabId, sendCommand).catch(() => "")
124
+ const text = await extractVisibleText(tabId, frames, sendCommand, diag, () => timedOut).catch(() => "")
123
125
  const truncatedText = text.length >= TEXT_CAP
124
126
  const visualSurfaces = await extractVisualSurfaces(tabId, sendCommand).catch(() => [])
125
127
  const out = {
@@ -368,40 +370,71 @@ function attrFromList(attrList, name) {
368
370
  return undefined
369
371
  }
370
372
 
371
- async function extractVisibleText(tabId, sendCommand) {
372
- // Single Runtime.evaluate call into the page's main world to grab
373
- // viewport-visible text. Same logic as the legacy extractor.
374
- const expr = `
375
- (function() {
376
- const out = [];
377
- let total = 0;
378
- const CAP = ${TEXT_CAP};
379
- const root = document.body || document.documentElement;
380
- if (!root) return "";
381
- const tw = document.createTreeWalker(root, 4);
382
- const vp = { w: window.innerWidth, h: window.innerHeight };
383
- function inV(r) { return r.bottom > 0 && r.right > 0 && r.top < vp.h && r.left < vp.w; }
384
- let n;
385
- while ((n = tw.nextNode())) {
386
- const p = n.parentElement;
387
- if (!p) continue;
388
- const t = p.tagName ? p.tagName.toLowerCase() : "";
389
- if (t === "script" || t === "style" || t === "noscript") continue;
390
- const r = p.getBoundingClientRect();
391
- if (!inV(r)) continue;
392
- const s = (n.textContent || "").replace(/\\s+/g, " ").trim();
393
- if (!s) continue;
394
- if (total + s.length + 1 > CAP) { out.push(s.slice(0, Math.max(0, CAP - total))); break; }
395
- out.push(s);
396
- total += s.length + 1;
397
- }
398
- return out.join("\\n");
399
- })()
400
- `
401
- const res = await sendCommand(tabId, "Runtime.evaluate", {
402
- expression: expr,
403
- returnByValue: true,
404
- })
373
+ async function extractVisibleText(tabId, frames, sendCommand, diag, isTimedOut) {
374
+ // Per-frame visible text. The old implementation ran a single
375
+ // Runtime.evaluate in the top frame's default context, so text inside
376
+ // child frames (same-origin app frames, embedded widgets) was invisible
377
+ // even though the element extractor already pierces frames. We now run the
378
+ // shared collectVisibleText expression in EACH frame: the top frame in its
379
+ // default context, child frames in a per-frame isolated world (Runtime
380
+ // .evaluate has no frameId — Page.createIsolatedWorld({frameId}) is the
381
+ // CDP-blessed way to get an executionContextId for a specific frame).
382
+ //
383
+ // Per-frame failures are non-fatal (cross-process OOPIFs may refuse
384
+ // createIsolatedWorld; we count them in diag and keep the rest) — mirroring
385
+ // the element loop's best-effort cross-origin handling. The merged result
386
+ // is bounded by the same global TEXT_CAP.
387
+ //
388
+ // Caveat: a child frame's "viewport" gate is the FRAME's own viewport
389
+ // (window/getBoundingClientRect are frame-local), not the top-page viewport,
390
+ // so a frame scrolled out of the top viewport can still contribute text.
391
+ // Gating on top-viewport visibility needs the owner-iframe rect in top
392
+ // coordinates (the deferred per-frame bbox transform). Bounded here by
393
+ // processing the top frame first and the global TEXT_CAP.
394
+ const parts = []
395
+ let total = 0
396
+ for (let i = 0; i < frames.length; i++) {
397
+ if (total >= TEXT_CAP) break
398
+ if (typeof isTimedOut === "function" && isTimedOut()) break
399
+ const frame = frames[i]
400
+ const isTopFrame = i === 0
401
+ // Ask each frame only for the budget still remaining so a later frame
402
+ // can't serialize text we'd immediately discard.
403
+ const expr = buildVisibleTextExpr("viewport", TEXT_CAP - total)
404
+ let frameText = ""
405
+ try {
406
+ frameText = await evaluateTextInFrame(tabId, frame, isTopFrame, expr, sendCommand)
407
+ } catch {
408
+ if (diag) diag.textFramesSkipped = (diag.textFramesSkipped || 0) + 1
409
+ continue
410
+ }
411
+ if (!frameText) continue
412
+ parts.push(frameText)
413
+ total += frameText.length + 1
414
+ }
415
+ const joined = parts.join("\n")
416
+ return joined.length > TEXT_CAP ? joined.slice(0, TEXT_CAP) : joined
417
+ }
418
+
419
+ /**
420
+ * Run the visible-text expression in one frame. The top frame uses the
421
+ * attachment's default execution context; a child frame needs an isolated
422
+ * world minted for its frameId. Returns "" when no context could be obtained
423
+ * (e.g. a cross-process frame that refuses createIsolatedWorld) — the caller
424
+ * treats a throw as a skipped frame, but a missing context degrades quietly.
425
+ */
426
+ async function evaluateTextInFrame(tabId, frame, isTopFrame, expr, sendCommand) {
427
+ const params = { expression: expr, returnByValue: true }
428
+ if (!isTopFrame) {
429
+ const world = await sendCommand(tabId, "Page.createIsolatedWorld", {
430
+ frameId: frame.frameId,
431
+ worldName: "gh_router_text",
432
+ })
433
+ const contextId = world && world.executionContextId
434
+ if (!contextId) return ""
435
+ params.contextId = contextId
436
+ }
437
+ const res = await sendCommand(tabId, "Runtime.evaluate", params)
405
438
  return res?.result?.value ?? ""
406
439
  }
407
440
 
@@ -0,0 +1,102 @@
1
+ // visible-text.js — the canonical visible-text walk shared by both snapshot
2
+ // extractors.
3
+ //
4
+ // It runs in TWO execution contexts:
5
+ // 1. Serialized via Function.prototype.toString() into a CDP
6
+ // `Runtime.evaluate` expression (snapshot-cdp.js, the primary path) and
7
+ // run PER FRAME, including same-process child frames the old top-frame-
8
+ // only evaluate missed.
9
+ // 2. Mirrored inline inside the legacy `executeScript({func})` extractor
10
+ // (background.js). `chrome.scripting.executeScript` serializes ONLY the
11
+ // given function and drops its module closure, so that copy cannot
12
+ // `import` this one — it is kept in sync by hand (see the comment there).
13
+ //
14
+ // Why a TreeWalker join instead of `element.innerText`: `innerText` glues
15
+ // adjacent inline siblings with no separator — `<span>Item-757</span>` +
16
+ // `<span>ITM_a209f4</span>` collapses to the unreadable "Item-757ITM_a209f4".
17
+ // Walking text nodes and joining with "\n" keeps distinct fields separable for
18
+ // the model.
19
+ //
20
+ // Authored in plain ES5 (no arrow / spread / optional-chaining / template
21
+ // literals) so its `.toString()` source is self-contained and survives
22
+ // bundling intact for in-page injection — a transpiler helper reference in the
23
+ // emitted source would break the serialized expression. For the same reason
24
+ // the function closes over NOTHING from module scope (constants are inlined):
25
+ // `.toString()` captures only the function body, not module-level bindings.
26
+
27
+ /**
28
+ * Collect viewport- or render-visible text from `root`, joining text nodes
29
+ * with "\n" and capping the result at `cap` UTF-16 code units.
30
+ *
31
+ * `mode` selects the per-node visibility gate:
32
+ * - "viewport" : keep nodes whose parent rect intersects the frame's
33
+ * viewport (what a user sees without scrolling). Needs a
34
+ * live `window` + layout.
35
+ * - "rendered" : keep nodes whose parent has >=1 client rect (i.e. not
36
+ * display:none / detached); off-screen content IS kept.
37
+ * Used by the "full" snapshot mode.
38
+ * - anything else (e.g. "none"): no visibility gate — keep every non-
39
+ * script/style text node. Used by unit tests so the walk is
40
+ * exercisable without a layout engine.
41
+ *
42
+ * Pure and dependency-free. `script` / `style` / `noscript` text is always
43
+ * dropped. Returns "" for a missing root / document.
44
+ */
45
+ export function collectVisibleText(root, cap, mode) {
46
+ if (!root) return ""
47
+ var doc = root.ownerDocument || (typeof document !== "undefined" ? document : null)
48
+ if (!doc || typeof doc.createTreeWalker !== "function") return ""
49
+ var tw = doc.createTreeWalker(root, 4) // NodeFilter.SHOW_TEXT — inlined (see header)
50
+ var out = []
51
+ var total = 0
52
+ var n
53
+ while ((n = tw.nextNode())) {
54
+ var p = n.parentElement
55
+ if (!p) continue
56
+ var tag = p.tagName ? String(p.tagName).toLowerCase() : ""
57
+ if (tag === "script" || tag === "style" || tag === "noscript") continue
58
+ if (mode === "viewport") {
59
+ var r = p.getBoundingClientRect()
60
+ if (!(r.bottom > 0 && r.right > 0 && r.top < window.innerHeight && r.left < window.innerWidth)) {
61
+ continue
62
+ }
63
+ } else if (mode === "rendered") {
64
+ // display:none / detached parents report zero client rects; off-screen
65
+ // (scrolled-out) parents still report rects, so full mode keeps them.
66
+ // NB: `visibility:hidden` text IS kept (it retains layout boxes) — this
67
+ // matches the "viewport" path's getBoundingClientRect behavior; excluding
68
+ // it would need a per-node getComputedStyle (style-recalc cost) and would
69
+ // diverge the two extractors.
70
+ if (p.getClientRects().length === 0) continue
71
+ }
72
+ var s = (n.textContent || "").replace(/\s+/g, " ").trim()
73
+ if (!s) continue
74
+ if (total + s.length + 1 > cap) {
75
+ out.push(s.slice(0, Math.max(0, cap - total)))
76
+ break
77
+ }
78
+ out.push(s)
79
+ total += s.length + 1
80
+ }
81
+ return out.join("\n")
82
+ }
83
+
84
+ /**
85
+ * Build the in-page `Runtime.evaluate` expression that runs
86
+ * `collectVisibleText` against the frame's document. Self-contained: the
87
+ * function source is inlined via `.toString()` so it needs nothing from the
88
+ * page or this module at eval time. `cap` is coerced to a number and `mode`
89
+ * is JSON-encoded so the generated source is always a well-formed literal
90
+ * (callers pass constants today; this keeps it injection-safe regardless).
91
+ */
92
+ export function buildVisibleTextExpr(mode, cap) {
93
+ return (
94
+ "(" +
95
+ collectVisibleText.toString() +
96
+ ")(document.body||document.documentElement," +
97
+ Number(cap) +
98
+ "," +
99
+ JSON.stringify(String(mode)) +
100
+ ")"
101
+ )
102
+ }