npm - github-router - Versions diffs - 0.3.73 → 0.3.82 - Mend

github-router 0.3.73 → 0.3.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/browser-ext/background.js +34 -3
package/dist/browser-ext/manifest.json +1 -1
package/dist/browser-ext/snapshot-cdp.js +69 -36
package/dist/browser-ext/visible-text.js +102 -0
package/dist/main.js +2151 -236
package/dist/main.js.map +1 -1
package/package.json +1 -1

package/dist/browser-ext/background.js CHANGED Viewed

@@ -487,12 +487,43 @@ async function extractSnapshotLegacy(tabId, opts) {
       // summary: walk text nodes whose parent is in the viewport; cap
       // at 20 KB. The model sees what a user could read without
       // scrolling. Off-screen content remains reachable via mode:"full".
-      // full: 256 KiB innerText cap (legacy behavior).
+      // full: walk all rendered text nodes; cap at 256 KiB.
       let text = ""
       if (mode === "full") {
+        // Mirror of collectVisibleText(root, cap, "rendered") in
+        // src/browser-ext/visible-text.js — executeScript serializes only
+        // this func and drops its module closure, so it cannot import that
+        // helper; keep the two in sync by hand. We walk text nodes and join
+        // with "\n" instead of using document.body.innerText, which glues
+        // adjacent inline siblings with no separator
+        // (<span>A</span><span>B</span> -> "AB" instead of "A\nB").
         const MAX_FULL = 256 * 1024
-        text = document.body ? document.body.innerText : ""
-        if (text.length > MAX_FULL) text = text.slice(0, MAX_FULL)
+        const parts = []
+        let total = 0
+        const root = document.body || document.documentElement
+        if (root) {
+          const tw = document.createTreeWalker(root, 4) // NodeFilter.SHOW_TEXT === 4
+          let n
+          while ((n = tw.nextNode())) {
+            const parent = n.parentElement
+            if (!parent) continue
+            const ptag = parent.tagName ? parent.tagName.toLowerCase() : ""
+            if (ptag === "script" || ptag === "style" || ptag === "noscript") continue
+            // display:none / detached parents report zero client rects;
+            // off-screen (scrolled-out) parents still report rects, so full
+            // mode keeps them — matching innerText's "rendered text" intent.
+            if (parent.getClientRects().length === 0) continue
+            const t = (n.textContent || "").replace(/\s+/g, " ").trim()
+            if (!t) continue
+            if (total + t.length + 1 > MAX_FULL) {
+              parts.push(t.slice(0, Math.max(0, MAX_FULL - total)))
+              break
+            }
+            parts.push(t)
+            total += t.length + 1
+          }
+        }
+        text = parts.join("\n")
       } else {
         const TEXT_CAP = 20 * 1024
         const parts = []

package/dist/browser-ext/manifest.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "manifest_version": 3,
   "name": "github-router browser bridge",
   "short_name": "gh-router-browser",
-  "version": "0.3.73",
+  "version": "0.3.82",
   "description": "Bridge between Claude (via github-router /mcp) and the browser. Implements tab control, navigation, clicks, form fill, downloads, screenshots, devtools eval. Blocks navigation to chrome://settings.",
   "key": "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqJElxuBlonBS3TVW9FJN0mGTtShB3L1hoaYf6k39SOr1ogGYmF90EjRxy1i21k9wQQjPf26bcBu/9X67KrQjQV0uB38CaNukgiSeoLjfptN811u+PJHx6BP+jx3Qa6/3VenNPxHC8WEU0GXql8QSjIHEyCwKb6fMASXOK94JyB5Ywov2x8mt/+9ncqBBBMVzf6r5Sagy4PL1XnryLsuADD/vOEkPet8wXgH/Oj7v5tTsQQZ7U1JT51PoDs2BFnXc5v3TkVgZwd32k3ONh+nkDw1Hof+4zwUGOyJE6eMrlYzRlKM4Qxdf9JpavQvqfieAbTRWcyKeclnHeoIfE7cDBQIDAQAB",
   "background": {

package/dist/browser-ext/snapshot-cdp.js CHANGED Viewed

@@ -18,6 +18,8 @@
 // fails (enterprise DeveloperToolsAvailability=2, DevTools already
 // open on the tab, etc.).
+import { buildVisibleTextExpr } from "./visible-text.js"
 const ELEMENT_CAP = 500            // total elements across all frames
 const PER_FRAME_CAP = 200          // per-frame element cap
 const TEXT_CAP = 32 * 1024         // viewport-visible text cap
@@ -89,7 +91,7 @@ export async function extractSnapshotCDP(tabId, opts, deps) {
     const elements = []
     const refCounter = { next: 1 }
     const usedRefs = new Set()
-    const diag = { frames: frames.length, axNodes: 0, interesting: 0, resolved: 0, withRef: 0 }
+    const diag = { frames: frames.length, axNodes: 0, interesting: 0, resolved: 0, withRef: 0, textFramesSkipped: 0 }
     for (const frame of frames) {
       if (timedOut) break
       if (elements.length >= ELEMENT_CAP) {
@@ -119,7 +121,7 @@ export async function extractSnapshotCDP(tabId, opts, deps) {
         // attach already succeeded so an enable failure is rare.
       }
     }
-    const text = await extractVisibleText(tabId, sendCommand).catch(() => "")
+    const text = await extractVisibleText(tabId, frames, sendCommand, diag, () => timedOut).catch(() => "")
     const truncatedText = text.length >= TEXT_CAP
     const visualSurfaces = await extractVisualSurfaces(tabId, sendCommand).catch(() => [])
     const out = {
@@ -368,40 +370,71 @@ function attrFromList(attrList, name) {
   return undefined
 }
-async function extractVisibleText(tabId, sendCommand) {
-  // Single Runtime.evaluate call into the page's main world to grab
-  // viewport-visible text. Same logic as the legacy extractor.
-  const expr = `
-    (function() {
-      const out = [];
-      let total = 0;
-      const CAP = ${TEXT_CAP};
-      const root = document.body || document.documentElement;
-      if (!root) return "";
-      const tw = document.createTreeWalker(root, 4);
-      const vp = { w: window.innerWidth, h: window.innerHeight };
-      function inV(r) { return r.bottom > 0 && r.right > 0 && r.top < vp.h && r.left < vp.w; }
-      let n;
-      while ((n = tw.nextNode())) {
-        const p = n.parentElement;
-        if (!p) continue;
-        const t = p.tagName ? p.tagName.toLowerCase() : "";
-        if (t === "script" || t === "style" || t === "noscript") continue;
-        const r = p.getBoundingClientRect();
-        if (!inV(r)) continue;
-        const s = (n.textContent || "").replace(/\\s+/g, " ").trim();
-        if (!s) continue;
-        if (total + s.length + 1 > CAP) { out.push(s.slice(0, Math.max(0, CAP - total))); break; }
-        out.push(s);
-        total += s.length + 1;
-      }
-      return out.join("\\n");
-    })()
-  `
-  const res = await sendCommand(tabId, "Runtime.evaluate", {
-    expression: expr,
-    returnByValue: true,
-  })
+async function extractVisibleText(tabId, frames, sendCommand, diag, isTimedOut) {
+  // Per-frame visible text. The old implementation ran a single
+  // Runtime.evaluate in the top frame's default context, so text inside
+  // child frames (same-origin app frames, embedded widgets) was invisible
+  // even though the element extractor already pierces frames. We now run the
+  // shared collectVisibleText expression in EACH frame: the top frame in its
+  // default context, child frames in a per-frame isolated world (Runtime
+  // .evaluate has no frameId — Page.createIsolatedWorld({frameId}) is the
+  // CDP-blessed way to get an executionContextId for a specific frame).
+  //
+  // Per-frame failures are non-fatal (cross-process OOPIFs may refuse
+  // createIsolatedWorld; we count them in diag and keep the rest) — mirroring
+  // the element loop's best-effort cross-origin handling. The merged result
+  // is bounded by the same global TEXT_CAP.
+  //
+  // Caveat: a child frame's "viewport" gate is the FRAME's own viewport
+  // (window/getBoundingClientRect are frame-local), not the top-page viewport,
+  // so a frame scrolled out of the top viewport can still contribute text.
+  // Gating on top-viewport visibility needs the owner-iframe rect in top
+  // coordinates (the deferred per-frame bbox transform). Bounded here by
+  // processing the top frame first and the global TEXT_CAP.
+  const parts = []
+  let total = 0
+  for (let i = 0; i < frames.length; i++) {
+    if (total >= TEXT_CAP) break
+    if (typeof isTimedOut === "function" && isTimedOut()) break
+    const frame = frames[i]
+    const isTopFrame = i === 0
+    // Ask each frame only for the budget still remaining so a later frame
+    // can't serialize text we'd immediately discard.
+    const expr = buildVisibleTextExpr("viewport", TEXT_CAP - total)
+    let frameText = ""
+    try {
+      frameText = await evaluateTextInFrame(tabId, frame, isTopFrame, expr, sendCommand)
+    } catch {
+      if (diag) diag.textFramesSkipped = (diag.textFramesSkipped || 0) + 1
+      continue
+    }
+    if (!frameText) continue
+    parts.push(frameText)
+    total += frameText.length + 1
+  }
+  const joined = parts.join("\n")
+  return joined.length > TEXT_CAP ? joined.slice(0, TEXT_CAP) : joined
+}
+/**
+ * Run the visible-text expression in one frame. The top frame uses the
+ * attachment's default execution context; a child frame needs an isolated
+ * world minted for its frameId. Returns "" when no context could be obtained
+ * (e.g. a cross-process frame that refuses createIsolatedWorld) — the caller
+ * treats a throw as a skipped frame, but a missing context degrades quietly.
+ */
+async function evaluateTextInFrame(tabId, frame, isTopFrame, expr, sendCommand) {
+  const params = { expression: expr, returnByValue: true }
+  if (!isTopFrame) {
+    const world = await sendCommand(tabId, "Page.createIsolatedWorld", {
+      frameId: frame.frameId,
+      worldName: "gh_router_text",
+    })
+    const contextId = world && world.executionContextId
+    if (!contextId) return ""
+    params.contextId = contextId
+  }
+  const res = await sendCommand(tabId, "Runtime.evaluate", params)
   return res?.result?.value ?? ""
 }

package/dist/browser-ext/visible-text.js ADDED Viewed

@@ -0,0 +1,102 @@
+// visible-text.js — the canonical visible-text walk shared by both snapshot
+// extractors.
+//
+// It runs in TWO execution contexts:
+//   1. Serialized via Function.prototype.toString() into a CDP
+//      `Runtime.evaluate` expression (snapshot-cdp.js, the primary path) and
+//      run PER FRAME, including same-process child frames the old top-frame-
+//      only evaluate missed.
+//   2. Mirrored inline inside the legacy `executeScript({func})` extractor
+//      (background.js). `chrome.scripting.executeScript` serializes ONLY the
+//      given function and drops its module closure, so that copy cannot
+//      `import` this one — it is kept in sync by hand (see the comment there).
+//
+// Why a TreeWalker join instead of `element.innerText`: `innerText` glues
+// adjacent inline siblings with no separator — `<span>Item-757</span>` +
+// `<span>ITM_a209f4</span>` collapses to the unreadable "Item-757ITM_a209f4".
+// Walking text nodes and joining with "\n" keeps distinct fields separable for
+// the model.
+//
+// Authored in plain ES5 (no arrow / spread / optional-chaining / template
+// literals) so its `.toString()` source is self-contained and survives
+// bundling intact for in-page injection — a transpiler helper reference in the
+// emitted source would break the serialized expression. For the same reason
+// the function closes over NOTHING from module scope (constants are inlined):
+// `.toString()` captures only the function body, not module-level bindings.
+/**
+ * Collect viewport- or render-visible text from `root`, joining text nodes
+ * with "\n" and capping the result at `cap` UTF-16 code units.
+ *
+ * `mode` selects the per-node visibility gate:
+ *   - "viewport"  : keep nodes whose parent rect intersects the frame's
+ *                   viewport (what a user sees without scrolling). Needs a
+ *                   live `window` + layout.
+ *   - "rendered"  : keep nodes whose parent has >=1 client rect (i.e. not
+ *                   display:none / detached); off-screen content IS kept.
+ *                   Used by the "full" snapshot mode.
+ *   - anything else (e.g. "none"): no visibility gate — keep every non-
+ *                   script/style text node. Used by unit tests so the walk is
+ *                   exercisable without a layout engine.
+ *
+ * Pure and dependency-free. `script` / `style` / `noscript` text is always
+ * dropped. Returns "" for a missing root / document.
+ */
+export function collectVisibleText(root, cap, mode) {
+  if (!root) return ""
+  var doc = root.ownerDocument || (typeof document !== "undefined" ? document : null)
+  if (!doc || typeof doc.createTreeWalker !== "function") return ""
+  var tw = doc.createTreeWalker(root, 4) // NodeFilter.SHOW_TEXT — inlined (see header)
+  var out = []
+  var total = 0
+  var n
+  while ((n = tw.nextNode())) {
+    var p = n.parentElement
+    if (!p) continue
+    var tag = p.tagName ? String(p.tagName).toLowerCase() : ""
+    if (tag === "script" || tag === "style" || tag === "noscript") continue
+    if (mode === "viewport") {
+      var r = p.getBoundingClientRect()
+      if (!(r.bottom > 0 && r.right > 0 && r.top < window.innerHeight && r.left < window.innerWidth)) {
+        continue
+      }
+    } else if (mode === "rendered") {
+      // display:none / detached parents report zero client rects; off-screen
+      // (scrolled-out) parents still report rects, so full mode keeps them.
+      // NB: `visibility:hidden` text IS kept (it retains layout boxes) — this
+      // matches the "viewport" path's getBoundingClientRect behavior; excluding
+      // it would need a per-node getComputedStyle (style-recalc cost) and would
+      // diverge the two extractors.
+      if (p.getClientRects().length === 0) continue
+    }
+    var s = (n.textContent || "").replace(/\s+/g, " ").trim()
+    if (!s) continue
+    if (total + s.length + 1 > cap) {
+      out.push(s.slice(0, Math.max(0, cap - total)))
+      break
+    }
+    out.push(s)
+    total += s.length + 1
+  }
+  return out.join("\n")
+}
+/**
+ * Build the in-page `Runtime.evaluate` expression that runs
+ * `collectVisibleText` against the frame's document. Self-contained: the
+ * function source is inlined via `.toString()` so it needs nothing from the
+ * page or this module at eval time. `cap` is coerced to a number and `mode`
+ * is JSON-encoded so the generated source is always a well-formed literal
+ * (callers pass constants today; this keeps it injection-safe regardless).
+ */
+export function buildVisibleTextExpr(mode, cap) {
+  return (
+    "(" +
+    collectVisibleText.toString() +
+    ")(document.body||document.documentElement," +
+    Number(cap) +
+    "," +
+    JSON.stringify(String(mode)) +
+    ")"
+  )
+}