github-router 0.3.73 → 0.3.82
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser-ext/background.js +34 -3
- package/dist/browser-ext/manifest.json +1 -1
- package/dist/browser-ext/snapshot-cdp.js +69 -36
- package/dist/browser-ext/visible-text.js +102 -0
- package/dist/main.js +2151 -236
- package/dist/main.js.map +1 -1
- package/package.json +1 -1
|
@@ -487,12 +487,43 @@ async function extractSnapshotLegacy(tabId, opts) {
|
|
|
487
487
|
// summary: walk text nodes whose parent is in the viewport; cap
|
|
488
488
|
// at 20 KB. The model sees what a user could read without
|
|
489
489
|
// scrolling. Off-screen content remains reachable via mode:"full".
|
|
490
|
-
// full:
|
|
490
|
+
// full: walk all rendered text nodes; cap at 256 KiB.
|
|
491
491
|
let text = ""
|
|
492
492
|
if (mode === "full") {
|
|
493
|
+
// Mirror of collectVisibleText(root, cap, "rendered") in
|
|
494
|
+
// src/browser-ext/visible-text.js — executeScript serializes only
|
|
495
|
+
// this func and drops its module closure, so it cannot import that
|
|
496
|
+
// helper; keep the two in sync by hand. We walk text nodes and join
|
|
497
|
+
// with "\n" instead of using document.body.innerText, which glues
|
|
498
|
+
// adjacent inline siblings with no separator
|
|
499
|
+
// (<span>A</span><span>B</span> -> "AB" instead of "A\nB").
|
|
493
500
|
const MAX_FULL = 256 * 1024
|
|
494
|
-
|
|
495
|
-
|
|
501
|
+
const parts = []
|
|
502
|
+
let total = 0
|
|
503
|
+
const root = document.body || document.documentElement
|
|
504
|
+
if (root) {
|
|
505
|
+
const tw = document.createTreeWalker(root, 4) // NodeFilter.SHOW_TEXT === 4
|
|
506
|
+
let n
|
|
507
|
+
while ((n = tw.nextNode())) {
|
|
508
|
+
const parent = n.parentElement
|
|
509
|
+
if (!parent) continue
|
|
510
|
+
const ptag = parent.tagName ? parent.tagName.toLowerCase() : ""
|
|
511
|
+
if (ptag === "script" || ptag === "style" || ptag === "noscript") continue
|
|
512
|
+
// display:none / detached parents report zero client rects;
|
|
513
|
+
// off-screen (scrolled-out) parents still report rects, so full
|
|
514
|
+
// mode keeps them — matching innerText's "rendered text" intent.
|
|
515
|
+
if (parent.getClientRects().length === 0) continue
|
|
516
|
+
const t = (n.textContent || "").replace(/\s+/g, " ").trim()
|
|
517
|
+
if (!t) continue
|
|
518
|
+
if (total + t.length + 1 > MAX_FULL) {
|
|
519
|
+
parts.push(t.slice(0, Math.max(0, MAX_FULL - total)))
|
|
520
|
+
break
|
|
521
|
+
}
|
|
522
|
+
parts.push(t)
|
|
523
|
+
total += t.length + 1
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
text = parts.join("\n")
|
|
496
527
|
} else {
|
|
497
528
|
const TEXT_CAP = 20 * 1024
|
|
498
529
|
const parts = []
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"manifest_version": 3,
|
|
3
3
|
"name": "github-router browser bridge",
|
|
4
4
|
"short_name": "gh-router-browser",
|
|
5
|
-
"version": "0.3.
|
|
5
|
+
"version": "0.3.82",
|
|
6
6
|
"description": "Bridge between Claude (via github-router /mcp) and the browser. Implements tab control, navigation, clicks, form fill, downloads, screenshots, devtools eval. Blocks navigation to chrome://settings.",
|
|
7
7
|
"key": "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqJElxuBlonBS3TVW9FJN0mGTtShB3L1hoaYf6k39SOr1ogGYmF90EjRxy1i21k9wQQjPf26bcBu/9X67KrQjQV0uB38CaNukgiSeoLjfptN811u+PJHx6BP+jx3Qa6/3VenNPxHC8WEU0GXql8QSjIHEyCwKb6fMASXOK94JyB5Ywov2x8mt/+9ncqBBBMVzf6r5Sagy4PL1XnryLsuADD/vOEkPet8wXgH/Oj7v5tTsQQZ7U1JT51PoDs2BFnXc5v3TkVgZwd32k3ONh+nkDw1Hof+4zwUGOyJE6eMrlYzRlKM4Qxdf9JpavQvqfieAbTRWcyKeclnHeoIfE7cDBQIDAQAB",
|
|
8
8
|
"background": {
|
|
@@ -18,6 +18,8 @@
|
|
|
18
18
|
// fails (enterprise DeveloperToolsAvailability=2, DevTools already
|
|
19
19
|
// open on the tab, etc.).
|
|
20
20
|
|
|
21
|
+
import { buildVisibleTextExpr } from "./visible-text.js"
|
|
22
|
+
|
|
21
23
|
const ELEMENT_CAP = 500 // total elements across all frames
|
|
22
24
|
const PER_FRAME_CAP = 200 // per-frame element cap
|
|
23
25
|
const TEXT_CAP = 32 * 1024 // viewport-visible text cap
|
|
@@ -89,7 +91,7 @@ export async function extractSnapshotCDP(tabId, opts, deps) {
|
|
|
89
91
|
const elements = []
|
|
90
92
|
const refCounter = { next: 1 }
|
|
91
93
|
const usedRefs = new Set()
|
|
92
|
-
const diag = { frames: frames.length, axNodes: 0, interesting: 0, resolved: 0, withRef: 0 }
|
|
94
|
+
const diag = { frames: frames.length, axNodes: 0, interesting: 0, resolved: 0, withRef: 0, textFramesSkipped: 0 }
|
|
93
95
|
for (const frame of frames) {
|
|
94
96
|
if (timedOut) break
|
|
95
97
|
if (elements.length >= ELEMENT_CAP) {
|
|
@@ -119,7 +121,7 @@ export async function extractSnapshotCDP(tabId, opts, deps) {
|
|
|
119
121
|
// attach already succeeded so an enable failure is rare.
|
|
120
122
|
}
|
|
121
123
|
}
|
|
122
|
-
const text = await extractVisibleText(tabId, sendCommand).catch(() => "")
|
|
124
|
+
const text = await extractVisibleText(tabId, frames, sendCommand, diag, () => timedOut).catch(() => "")
|
|
123
125
|
const truncatedText = text.length >= TEXT_CAP
|
|
124
126
|
const visualSurfaces = await extractVisualSurfaces(tabId, sendCommand).catch(() => [])
|
|
125
127
|
const out = {
|
|
@@ -368,40 +370,71 @@ function attrFromList(attrList, name) {
|
|
|
368
370
|
return undefined
|
|
369
371
|
}
|
|
370
372
|
|
|
371
|
-
async function extractVisibleText(tabId, sendCommand) {
|
|
372
|
-
//
|
|
373
|
-
//
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
373
|
+
async function extractVisibleText(tabId, frames, sendCommand, diag, isTimedOut) {
|
|
374
|
+
// Per-frame visible text. The old implementation ran a single
|
|
375
|
+
// Runtime.evaluate in the top frame's default context, so text inside
|
|
376
|
+
// child frames (same-origin app frames, embedded widgets) was invisible
|
|
377
|
+
// even though the element extractor already pierces frames. We now run the
|
|
378
|
+
// shared collectVisibleText expression in EACH frame: the top frame in its
|
|
379
|
+
// default context, child frames in a per-frame isolated world (Runtime
|
|
380
|
+
// .evaluate has no frameId — Page.createIsolatedWorld({frameId}) is the
|
|
381
|
+
// CDP-blessed way to get an executionContextId for a specific frame).
|
|
382
|
+
//
|
|
383
|
+
// Per-frame failures are non-fatal (cross-process OOPIFs may refuse
|
|
384
|
+
// createIsolatedWorld; we count them in diag and keep the rest) — mirroring
|
|
385
|
+
// the element loop's best-effort cross-origin handling. The merged result
|
|
386
|
+
// is bounded by the same global TEXT_CAP.
|
|
387
|
+
//
|
|
388
|
+
// Caveat: a child frame's "viewport" gate is the FRAME's own viewport
|
|
389
|
+
// (window/getBoundingClientRect are frame-local), not the top-page viewport,
|
|
390
|
+
// so a frame scrolled out of the top viewport can still contribute text.
|
|
391
|
+
// Gating on top-viewport visibility needs the owner-iframe rect in top
|
|
392
|
+
// coordinates (the deferred per-frame bbox transform). Bounded here by
|
|
393
|
+
// processing the top frame first and the global TEXT_CAP.
|
|
394
|
+
const parts = []
|
|
395
|
+
let total = 0
|
|
396
|
+
for (let i = 0; i < frames.length; i++) {
|
|
397
|
+
if (total >= TEXT_CAP) break
|
|
398
|
+
if (typeof isTimedOut === "function" && isTimedOut()) break
|
|
399
|
+
const frame = frames[i]
|
|
400
|
+
const isTopFrame = i === 0
|
|
401
|
+
// Ask each frame only for the budget still remaining so a later frame
|
|
402
|
+
// can't serialize text we'd immediately discard.
|
|
403
|
+
const expr = buildVisibleTextExpr("viewport", TEXT_CAP - total)
|
|
404
|
+
let frameText = ""
|
|
405
|
+
try {
|
|
406
|
+
frameText = await evaluateTextInFrame(tabId, frame, isTopFrame, expr, sendCommand)
|
|
407
|
+
} catch {
|
|
408
|
+
if (diag) diag.textFramesSkipped = (diag.textFramesSkipped || 0) + 1
|
|
409
|
+
continue
|
|
410
|
+
}
|
|
411
|
+
if (!frameText) continue
|
|
412
|
+
parts.push(frameText)
|
|
413
|
+
total += frameText.length + 1
|
|
414
|
+
}
|
|
415
|
+
const joined = parts.join("\n")
|
|
416
|
+
return joined.length > TEXT_CAP ? joined.slice(0, TEXT_CAP) : joined
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Run the visible-text expression in one frame. The top frame uses the
|
|
421
|
+
* attachment's default execution context; a child frame needs an isolated
|
|
422
|
+
* world minted for its frameId. Returns "" when no context could be obtained
|
|
423
|
+
* (e.g. a cross-process frame that refuses createIsolatedWorld) — the caller
|
|
424
|
+
* treats a throw as a skipped frame, but a missing context degrades quietly.
|
|
425
|
+
*/
|
|
426
|
+
async function evaluateTextInFrame(tabId, frame, isTopFrame, expr, sendCommand) {
|
|
427
|
+
const params = { expression: expr, returnByValue: true }
|
|
428
|
+
if (!isTopFrame) {
|
|
429
|
+
const world = await sendCommand(tabId, "Page.createIsolatedWorld", {
|
|
430
|
+
frameId: frame.frameId,
|
|
431
|
+
worldName: "gh_router_text",
|
|
432
|
+
})
|
|
433
|
+
const contextId = world && world.executionContextId
|
|
434
|
+
if (!contextId) return ""
|
|
435
|
+
params.contextId = contextId
|
|
436
|
+
}
|
|
437
|
+
const res = await sendCommand(tabId, "Runtime.evaluate", params)
|
|
405
438
|
return res?.result?.value ?? ""
|
|
406
439
|
}
|
|
407
440
|
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// visible-text.js — the canonical visible-text walk shared by both snapshot
|
|
2
|
+
// extractors.
|
|
3
|
+
//
|
|
4
|
+
// It runs in TWO execution contexts:
|
|
5
|
+
// 1. Serialized via Function.prototype.toString() into a CDP
|
|
6
|
+
// `Runtime.evaluate` expression (snapshot-cdp.js, the primary path) and
|
|
7
|
+
// run PER FRAME, including same-process child frames the old top-frame-
|
|
8
|
+
// only evaluate missed.
|
|
9
|
+
// 2. Mirrored inline inside the legacy `executeScript({func})` extractor
|
|
10
|
+
// (background.js). `chrome.scripting.executeScript` serializes ONLY the
|
|
11
|
+
// given function and drops its module closure, so that copy cannot
|
|
12
|
+
// `import` this one — it is kept in sync by hand (see the comment there).
|
|
13
|
+
//
|
|
14
|
+
// Why a TreeWalker join instead of `element.innerText`: `innerText` glues
|
|
15
|
+
// adjacent inline siblings with no separator — `<span>Item-757</span>` +
|
|
16
|
+
// `<span>ITM_a209f4</span>` collapses to the unreadable "Item-757ITM_a209f4".
|
|
17
|
+
// Walking text nodes and joining with "\n" keeps distinct fields separable for
|
|
18
|
+
// the model.
|
|
19
|
+
//
|
|
20
|
+
// Authored in plain ES5 (no arrow / spread / optional-chaining / template
|
|
21
|
+
// literals) so its `.toString()` source is self-contained and survives
|
|
22
|
+
// bundling intact for in-page injection — a transpiler helper reference in the
|
|
23
|
+
// emitted source would break the serialized expression. For the same reason
|
|
24
|
+
// the function closes over NOTHING from module scope (constants are inlined):
|
|
25
|
+
// `.toString()` captures only the function body, not module-level bindings.
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Collect viewport- or render-visible text from `root`, joining text nodes
|
|
29
|
+
* with "\n" and capping the result at `cap` UTF-16 code units.
|
|
30
|
+
*
|
|
31
|
+
* `mode` selects the per-node visibility gate:
|
|
32
|
+
* - "viewport" : keep nodes whose parent rect intersects the frame's
|
|
33
|
+
* viewport (what a user sees without scrolling). Needs a
|
|
34
|
+
* live `window` + layout.
|
|
35
|
+
* - "rendered" : keep nodes whose parent has >=1 client rect (i.e. not
|
|
36
|
+
* display:none / detached); off-screen content IS kept.
|
|
37
|
+
* Used by the "full" snapshot mode.
|
|
38
|
+
* - anything else (e.g. "none"): no visibility gate — keep every non-
|
|
39
|
+
* script/style text node. Used by unit tests so the walk is
|
|
40
|
+
* exercisable without a layout engine.
|
|
41
|
+
*
|
|
42
|
+
* Pure and dependency-free. `script` / `style` / `noscript` text is always
|
|
43
|
+
* dropped. Returns "" for a missing root / document.
|
|
44
|
+
*/
|
|
45
|
+
export function collectVisibleText(root, cap, mode) {
|
|
46
|
+
if (!root) return ""
|
|
47
|
+
var doc = root.ownerDocument || (typeof document !== "undefined" ? document : null)
|
|
48
|
+
if (!doc || typeof doc.createTreeWalker !== "function") return ""
|
|
49
|
+
var tw = doc.createTreeWalker(root, 4) // NodeFilter.SHOW_TEXT — inlined (see header)
|
|
50
|
+
var out = []
|
|
51
|
+
var total = 0
|
|
52
|
+
var n
|
|
53
|
+
while ((n = tw.nextNode())) {
|
|
54
|
+
var p = n.parentElement
|
|
55
|
+
if (!p) continue
|
|
56
|
+
var tag = p.tagName ? String(p.tagName).toLowerCase() : ""
|
|
57
|
+
if (tag === "script" || tag === "style" || tag === "noscript") continue
|
|
58
|
+
if (mode === "viewport") {
|
|
59
|
+
var r = p.getBoundingClientRect()
|
|
60
|
+
if (!(r.bottom > 0 && r.right > 0 && r.top < window.innerHeight && r.left < window.innerWidth)) {
|
|
61
|
+
continue
|
|
62
|
+
}
|
|
63
|
+
} else if (mode === "rendered") {
|
|
64
|
+
// display:none / detached parents report zero client rects; off-screen
|
|
65
|
+
// (scrolled-out) parents still report rects, so full mode keeps them.
|
|
66
|
+
// NB: `visibility:hidden` text IS kept (it retains layout boxes) — this
|
|
67
|
+
// matches the "viewport" path's getBoundingClientRect behavior; excluding
|
|
68
|
+
// it would need a per-node getComputedStyle (style-recalc cost) and would
|
|
69
|
+
// diverge the two extractors.
|
|
70
|
+
if (p.getClientRects().length === 0) continue
|
|
71
|
+
}
|
|
72
|
+
var s = (n.textContent || "").replace(/\s+/g, " ").trim()
|
|
73
|
+
if (!s) continue
|
|
74
|
+
if (total + s.length + 1 > cap) {
|
|
75
|
+
out.push(s.slice(0, Math.max(0, cap - total)))
|
|
76
|
+
break
|
|
77
|
+
}
|
|
78
|
+
out.push(s)
|
|
79
|
+
total += s.length + 1
|
|
80
|
+
}
|
|
81
|
+
return out.join("\n")
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Build the in-page `Runtime.evaluate` expression that runs
|
|
86
|
+
* `collectVisibleText` against the frame's document. Self-contained: the
|
|
87
|
+
* function source is inlined via `.toString()` so it needs nothing from the
|
|
88
|
+
* page or this module at eval time. `cap` is coerced to a number and `mode`
|
|
89
|
+
* is JSON-encoded so the generated source is always a well-formed literal
|
|
90
|
+
* (callers pass constants today; this keeps it injection-safe regardless).
|
|
91
|
+
*/
|
|
92
|
+
export function buildVisibleTextExpr(mode, cap) {
|
|
93
|
+
return (
|
|
94
|
+
"(" +
|
|
95
|
+
collectVisibleText.toString() +
|
|
96
|
+
")(document.body||document.documentElement," +
|
|
97
|
+
Number(cap) +
|
|
98
|
+
"," +
|
|
99
|
+
JSON.stringify(String(mode)) +
|
|
100
|
+
")"
|
|
101
|
+
)
|
|
102
|
+
}
|