github-router 0.3.45 → 0.3.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -8,7 +8,7 @@ import { createHash, randomBytes, randomUUID, timingSafeEqual } from "node:crypt
8
8
  import fs, { readFile, stat } from "node:fs/promises";
9
9
  import os, { homedir, platform } from "node:os";
10
10
  import * as path$1 from "node:path";
11
- import path from "node:path";
11
+ import path, { dirname, join } from "node:path";
12
12
  import process$1 from "node:process";
13
13
  import { execFile, execFileSync, spawn, spawnSync } from "node:child_process";
14
14
  import { promisify } from "node:util";
@@ -17,13 +17,13 @@ import { createInterface } from "node:readline";
17
17
  import Parser from "web-tree-sitter";
18
18
  import WebSocket from "ws";
19
19
  import { fileURLToPath } from "node:url";
20
+ import { events } from "fetch-event-stream";
20
21
  import { Type } from "typebox";
21
22
  import "partial-json";
22
23
  import { Compile } from "typebox/compile";
23
24
  import { Value } from "typebox/value";
24
25
  import "yaml";
25
26
  import "ignore";
26
- import { events } from "fetch-event-stream";
27
27
  import { z } from "zod";
28
28
  import { Writable } from "node:stream";
29
29
  import { serve } from "srvx";
@@ -62,14 +62,14 @@ function copilotVersion(state$1) {
62
62
  const API_VERSION = "2026-01-09";
63
63
  const copilotBaseUrl = (state$1) => state$1.copilotApiUrl ?? "https://api.githubcopilot.com";
64
64
  const copilotHeaders = (state$1, vision = false, integrationId = "vscode-chat") => {
65
- const version$1 = copilotVersion(state$1);
65
+ const version$2 = copilotVersion(state$1);
66
66
  const headers = {
67
67
  Authorization: `Bearer ${state$1.copilotToken}`,
68
68
  "content-type": standardHeaders()["content-type"],
69
69
  "copilot-integration-id": integrationId,
70
70
  "editor-version": `vscode/${state$1.vsCodeVersion}`,
71
- "editor-plugin-version": `copilot-chat/${version$1}`,
72
- "user-agent": `GitHubCopilotChat/${version$1}`,
71
+ "editor-plugin-version": `copilot-chat/${version$2}`,
72
+ "user-agent": `GitHubCopilotChat/${version$2}`,
73
73
  "openai-intent": "conversation-panel",
74
74
  "x-interaction-type": "conversation-panel",
75
75
  "x-github-api-version": API_VERSION,
@@ -538,9 +538,9 @@ const cacheVSCodeVersion = async () => {
538
538
  consola.info(`Using VSCode version: ${response}`);
539
539
  };
540
540
  const cacheCopilotVersion = async () => {
541
- const version$1 = await getCopilotChatVersion();
542
- state.copilotVersion = version$1;
543
- consola.info(`Using Copilot Chat version: ${version$1}`);
541
+ const version$2 = await getCopilotChatVersion();
542
+ state.copilotVersion = version$2;
543
+ consola.info(`Using Copilot Chat version: ${version$2}`);
544
544
  };
545
545
 
546
546
  //#endregion
@@ -1117,10 +1117,10 @@ function getCodexVersion() {
1117
1117
  };
1118
1118
  const major = Number.parseInt(m[1], 10);
1119
1119
  const minor = Number.parseInt(m[2], 10);
1120
- const version$1 = `${m[1]}.${m[2]}.${m[3]}`;
1120
+ const version$2 = `${m[1]}.${m[2]}.${m[3]}`;
1121
1121
  return {
1122
1122
  ok: major > 0 || major === 0 && minor >= 129,
1123
- version: version$1
1123
+ version: version$2
1124
1124
  };
1125
1125
  }
1126
1126
  /**
@@ -2471,6 +2471,33 @@ function round4(x) {
2471
2471
  return Math.round(x * 1e4) / 1e4;
2472
2472
  }
2473
2473
 
2474
+ //#endregion
2475
+ //#region src/lib/version.ts
2476
+ /**
2477
+ * Read this binary's published version from package.json at runtime.
2478
+ *
2479
+ * Done at runtime (not baked at build time) because release.yml builds
2480
+ * BEFORE `npm version patch` bumps the version — a build-time inline
2481
+ * would always ship the pre-bump value. The npm tarball ships package.json
2482
+ * alongside `dist/`, so a sibling-up lookup from import.meta.url resolves
2483
+ * cleanly in both dev (`src/lib/`) and bundled (`dist/`) layouts.
2484
+ *
2485
+ * Returns `"unknown"` if package.json can't be located or parsed —
2486
+ * never throws, so the CLI never fails to start over version reporting.
2487
+ */
2488
+ function getPackageVersion() {
2489
+ try {
2490
+ const here = dirname(fileURLToPath(import.meta.url));
2491
+ const candidates = [join(here, "..", "..", "package.json"), join(here, "..", "package.json")];
2492
+ for (const path$2 of candidates) try {
2493
+ const raw = readFileSync(path$2, "utf8");
2494
+ const parsed = JSON.parse(raw);
2495
+ if (typeof parsed.version === "string" && (parsed.name === "github-router" || parsed.name === "@animeshkundu/github-router")) return parsed.version;
2496
+ } catch {}
2497
+ } catch {}
2498
+ return "unknown";
2499
+ }
2500
+
2474
2501
  //#endregion
2475
2502
  //#region src/lib/browser-mcp/browser-detect.ts
2476
2503
  let cached;
@@ -2879,16 +2906,94 @@ function loadStableExtensionId() {
2879
2906
  } catch {}
2880
2907
  return "unknown";
2881
2908
  }
2882
- function buildInstallRequired(reason, autoInstalled) {
2909
+ /**
2910
+ * Reads the `version` field from the on-disk extension manifest in
2911
+ * extensionDir(). Returns undefined if the file is missing, unreadable,
2912
+ * or doesn't have a string version. Used to detect when the loaded
2913
+ * extension is stale relative to a freshly-updated package.
2914
+ */
2915
+ function loadExpectedExtensionVersion() {
2916
+ try {
2917
+ const raw = readFileSync(path.join(extensionDir(), "manifest.json"), "utf8");
2918
+ const parsed = JSON.parse(raw);
2919
+ if (typeof parsed.version === "string" && parsed.version.length > 0) return parsed.version;
2920
+ } catch {}
2921
+ }
2922
+ /**
2923
+ * Source-checkout dev sentinel — see scripts/copy-browser-ext.ts. When
2924
+ * extensionDir() resolves to src/browser-ext/ (dev iteration via
2925
+ * GH_ROUTER_BROWSER_EXT_DIR, or the dist fallback when the package
2926
+ * isn't built), the version is "0.0.0" and the auto-reload check is a
2927
+ * no-op: both sides agree, no mismatch, no reload triggered.
2928
+ */
2929
+ const DEV_VERSION_SENTINEL = "0.0.0";
2930
+ /**
2931
+ * Track which `(extensionId, expectedVersion)` pairs we've already
2932
+ * tried to auto-reload in this process. Prevents an infinite reload
2933
+ * loop if the on-disk version somehow stays ahead of what the browser
2934
+ * picks up (e.g. Chrome disabled the extension after reload because
2935
+ * a new permission was added — the loaded version stays stale).
2936
+ */
2937
+ const attemptedReloads = /* @__PURE__ */ new Set();
2938
+ /**
2939
+ * Send POST /reload to the bridge — triggers __reload__ control frame
2940
+ * over native messaging, which the extension's handler dispatches into
2941
+ * chrome.runtime.reload(). After this returns, the OLD bridge process
2942
+ * may still be running (its WS clients haven't dropped); the NEW
2943
+ * bridge spawned by Chrome on extension reconnect will overwrite the
2944
+ * discovery file.
2945
+ */
2946
+ async function postReload(port, token, timeoutMs = 1e3) {
2947
+ const controller = new AbortController();
2948
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
2949
+ try {
2950
+ return (await fetch(`http://127.0.0.1:${port}/reload`, {
2951
+ method: "POST",
2952
+ headers: { authorization: `Bearer ${token}` },
2953
+ signal: controller.signal
2954
+ })).ok;
2955
+ } catch {
2956
+ return false;
2957
+ } finally {
2958
+ clearTimeout(timer);
2959
+ }
2960
+ }
2961
+ /**
2962
+ * After triggering a reload, poll the discovery file + /health until
2963
+ * we see the expected extension version (success) or run out of time
2964
+ * (caller falls back to install_required). Re-reads the discovery file
2965
+ * each cycle because the bridge process changes — old bridge exits
2966
+ * after its grace window, new bridge writes a new discovery file with
2967
+ * new port/token/pid.
2968
+ */
2969
+ async function pollUntilExtensionVersion(expectedVersion, maxWaitMs, intervalMs) {
2970
+ const deadline = Date.now() + maxWaitMs;
2971
+ while (Date.now() < deadline) {
2972
+ await new Promise((r) => setTimeout(r, intervalMs));
2973
+ const disc = readBridgeDiscovery();
2974
+ if (!disc) continue;
2975
+ const health = await probeHealth(disc.port, disc.token, 500);
2976
+ if (health && health.ok && health.extension_connected && health.extension_loaded_version === expectedVersion) return disc;
2977
+ }
2978
+ }
2979
+ function buildInstallRequired(reason, autoInstalled, versionMismatch) {
2980
+ const instructions = (() => {
2981
+ if (reason === "no_supported_browser") return "No Chrome or Edge installation was detected on this host. Install one and restart the github-router proxy.";
2982
+ if (reason === "bridge_bundle_missing") return "The bridge bundle is missing. Run `bun run build` from the github-router checkout to produce dist/browser-bridge/index.js, then retry.";
2983
+ if (reason === "extension_outdated" && versionMismatch) return `Your loaded github-router browser extension is version ${versionMismatch.loaded} but the github-router package shipped version ${versionMismatch.expected}. Auto-reload was attempted and did not converge — Chrome likely disabled the extension because the new manifest declares new permissions. Open chrome://extensions (or edge://extensions), find the github-router extension card, click "Enable" if it's disabled, then click the reload arrow. Retry this tool call afterwards.`;
2984
+ return "Open chrome://extensions (or edge://extensions), enable Developer Mode, click 'Load unpacked', and select the load_unpacked_dir above. Then retry this tool call. If you just updated the github-router package, an extension already loaded may need to be reloaded — click the reload arrow on its card.";
2985
+ })();
2883
2986
  return {
2884
2987
  install_required: true,
2885
2988
  reason,
2886
2989
  auto_installed: autoInstalled,
2990
+ proxy_version: getPackageVersion(),
2887
2991
  manual_steps: {
2888
2992
  load_unpacked_dir: extensionDir(),
2889
2993
  expected_extension_id: loadStableExtensionId(),
2890
- instructions: reason === "no_supported_browser" ? "No Chrome or Edge installation was detected on this host. Install one and restart the github-router proxy." : reason === "bridge_bundle_missing" ? "The bridge bundle is missing. Run `bun run build` from the github-router checkout to produce dist/browser-bridge/index.js, then retry." : "Open chrome://extensions (or edge://extensions), enable Developer Mode, click 'Load unpacked', and select the load_unpacked_dir above. Then retry this tool call."
2891
- }
2994
+ instructions
2995
+ },
2996
+ ...versionMismatch ? { version_mismatch: versionMismatch } : {}
2892
2997
  };
2893
2998
  }
2894
2999
  /**
@@ -2929,6 +3034,31 @@ async function _ensureBridgeReadyImpl() {
2929
3034
  const health = await probeHealth(discovery.port, discovery.token);
2930
3035
  if (!health || !health.ok) return buildInstallRequired("bridge_not_running", autoInstalled);
2931
3036
  if (!health.extension_connected) return buildInstallRequired("extension_not_loaded", autoInstalled);
3037
+ const expectedVersion = loadExpectedExtensionVersion();
3038
+ const loadedVersion = health.extension_loaded_version;
3039
+ if (typeof expectedVersion === "string" && typeof loadedVersion === "string" && expectedVersion !== DEV_VERSION_SENTINEL && loadedVersion !== DEV_VERSION_SENTINEL && expectedVersion !== loadedVersion) {
3040
+ const reloadKey = `${loadStableExtensionId()}::${expectedVersion}`;
3041
+ if (attemptedReloads.has(reloadKey)) return buildInstallRequired("extension_outdated", autoInstalled, {
3042
+ loaded: loadedVersion,
3043
+ expected: expectedVersion
3044
+ });
3045
+ attemptedReloads.add(reloadKey);
3046
+ if (!await postReload(discovery.port, discovery.token)) return buildInstallRequired("extension_outdated", autoInstalled, {
3047
+ loaded: loadedVersion,
3048
+ expected: expectedVersion
3049
+ });
3050
+ const newDiscovery = await pollUntilExtensionVersion(expectedVersion, 3e3, 150);
3051
+ if (!newDiscovery) return buildInstallRequired("extension_outdated", autoInstalled, {
3052
+ loaded: loadedVersion,
3053
+ expected: expectedVersion
3054
+ });
3055
+ return {
3056
+ install_required: false,
3057
+ port: newDiscovery.port,
3058
+ token: newDiscovery.token,
3059
+ pid: newDiscovery.pid
3060
+ };
3061
+ }
2932
3062
  return {
2933
3063
  install_required: false,
2934
3064
  port: discovery.port,
@@ -3226,89 +3356,698 @@ function logAudit$1(record) {
3226
3356
  }
3227
3357
 
3228
3358
  //#endregion
3229
- //#region src/lib/browser-mcp/index.ts
3359
+ //#region src/lib/mcp-inflight.ts
3230
3360
  /**
3231
- * Browser-control MCP tools (`browser_*`). All entries route through
3232
- * `dispatchBrowserTool()` which (1) runs the bridge-layer URL policy
3233
- * check, (2) runs the install-check pre-flight (returning structured
3234
- * install_required JSON when the bridge or extension isn't ready),
3235
- * and (3) opens a WS to the bridge, sends the tool call, awaits the
3236
- * response with a per-tool timeout.
3361
+ * Shared concurrency cap for MCP `tools/call` dispatches.
3237
3362
  *
3238
- * Each entry carries `capability: "browser"` so `browserToolsEnabled()`
3239
- * in `src/routes/mcp/handler.ts` drops them at both list-time and
3240
- * call-time when the operator hasn't opted in via `--browse` or
3241
- * `GH_ROUTER_ENABLE_BROWSE=1`.
3363
+ * Originally lived as a module-private counter inside
3364
+ * `src/routes/mcp/handler.ts`. Extracted because the worker-agent's
3365
+ * `peer_review` and `advisor` tools (which dispatch to peer-model
3366
+ * personas / the advisor responses endpoint from inside a worker
3367
+ * subagent loop) must participate in the same backpressure budget;
3368
+ * otherwise a single worker can fan out unboundedly to peers and
3369
+ * starve the operator's own `tools/list` callers.
3242
3370
  *
3243
- * v1 surface: 19 tools (Phases 3 + 4a + 4b + humanlike input v2).
3371
+ * The counter is a single process-wide integer no per-route
3372
+ * partitioning. Persona calls at the MCP boundary (handler.ts),
3373
+ * peer/advisor calls nested inside a worker (tools.ts), and any
3374
+ * future MCP-adjacent dispatcher all increment the same number.
3375
+ *
3376
+ * Cap = `MAX_INFLIGHT_TOOLS_CALL = 8`. Justification lives at the
3377
+ * historical home (`src/routes/mcp/handler.ts` comment block); do not
3378
+ * change the value without re-reading
3379
+ * `docs/research/peer-mcp-investigation.md` § "Concurrency cap
3380
+ * investigation".
3244
3381
  */
3245
- const BROWSER_TOOLS = Object.freeze([
3246
- {
3247
- toolNameHttp: "browser_list_tabs",
3248
- description: "List all open tabs across all browser windows. Returns each tab's id (used by other browser_* tools), URL, title, active flag, and window id.",
3249
- inputSchema: {
3250
- type: "object",
3251
- additionalProperties: false,
3252
- properties: {}
3253
- },
3254
- capability: "browser",
3255
- async handler(args, signal) {
3256
- return dispatchBrowserTool("browser_list_tabs", args, signal);
3382
+ const MAX_INFLIGHT_TOOLS_CALL = 8;
3383
+ let inFlight$1 = 0;
3384
+ /**
3385
+ * Acquire a slot if one is available. Returns a release function the
3386
+ * caller MUST invoke exactly once (typically from a `finally` block);
3387
+ * returns `null` if the cap is saturated. The release fn is idempotent
3388
+ * — calling it twice is a no-op so callers can release defensively
3389
+ * without worrying about double-decrementing the counter under unusual
3390
+ * unwind paths.
3391
+ *
3392
+ * Synchronous on purpose. Async semaphore acquisition would let callers
3393
+ * queue indefinitely; we want immediate "queue full" feedback so the
3394
+ * MCP client (or the model holding the nested tool call) can choose to
3395
+ * back off or retry.
3396
+ */
3397
+ function acquireInFlightSlot() {
3398
+ if (inFlight$1 >= MAX_INFLIGHT_TOOLS_CALL) return null;
3399
+ inFlight$1++;
3400
+ let released = false;
3401
+ return () => {
3402
+ if (released) return;
3403
+ released = true;
3404
+ inFlight$1--;
3405
+ };
3406
+ }
3407
+
3408
+ //#endregion
3409
+ //#region src/lib/diagnose-response.ts
3410
+ const PREVIEW_LIMIT = 200;
3411
+ async function parseJsonOrDiagnose(response, routePath) {
3412
+ const cloned = response.clone();
3413
+ try {
3414
+ return await response.json();
3415
+ } catch (error) {
3416
+ const contentType = response.headers.get("content-type") ?? "(none)";
3417
+ const bodyText = await cloned.text().catch(() => "(unreadable)");
3418
+ const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
3419
+ consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
3420
+ throw error;
3421
+ }
3422
+ }
3423
+
3424
+ //#endregion
3425
+ //#region src/lib/response-cap.ts
3426
+ /**
3427
+ * Hard byte cap for non-streaming upstream response bodies.
3428
+ *
3429
+ * Anthropic responses with large tool_use blocks can legitimately reach
3430
+ * several MB, but a multi-GB body is either a buggy upstream or a malicious
3431
+ * one. Buffering it would OOM the proxy and crash all in-flight requests.
3432
+ *
3433
+ * Applies to /v1/messages, /v1/chat/completions, and /v1/responses.
3434
+ */
3435
+ const MAX_RESPONSE_BODY_BYTES = 10 * 1024 * 1024;
3436
+ /**
3437
+ * Read a Response body with a hard byte cap, then parse as JSON.
3438
+ *
3439
+ * Falls back to the fast path (response.json()) when Content-Length is
3440
+ * present and within the cap, avoiding the streaming-reader overhead for
3441
+ * the vast majority of normal responses.
3442
+ *
3443
+ * When the cap is hit:
3444
+ * - the reader is cancelled to release the upstream socket
3445
+ * - a structured Anthropic-format error is returned to the caller
3446
+ * (the caller wraps it in c.json(), not throws — the client gets a
3447
+ * clean 413 error, not an unhandled-rejection crash)
3448
+ *
3449
+ * Returns `{ ok: true, value }` on success or `{ ok: false, errorResponse, status }`
3450
+ * on cap exceeded.
3451
+ */
3452
+ async function readResponseBodyCapped(response, routePath, capBytes = MAX_RESPONSE_BODY_BYTES) {
3453
+ const contentLengthHeader = response.headers.get("content-length");
3454
+ const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : NaN;
3455
+ if (!isNaN(contentLength) && contentLength <= capBytes) return {
3456
+ ok: true,
3457
+ value: await parseJsonOrDiagnose(response, routePath)
3458
+ };
3459
+ const reader = response.body?.getReader();
3460
+ if (!reader) return {
3461
+ ok: true,
3462
+ value: await parseJsonOrDiagnose(response, routePath)
3463
+ };
3464
+ const chunks = [];
3465
+ let totalBytes = 0;
3466
+ let capped = false;
3467
+ try {
3468
+ while (true) {
3469
+ const { done, value } = await reader.read();
3470
+ if (done) break;
3471
+ if (!value) continue;
3472
+ totalBytes += value.byteLength;
3473
+ if (totalBytes > capBytes) {
3474
+ capped = true;
3475
+ try {
3476
+ await reader.cancel("size_cap");
3477
+ } catch {}
3478
+ break;
3479
+ }
3480
+ chunks.push(value);
3257
3481
  }
3258
- },
3259
- {
3260
- toolNameHttp: "browser_open_tab",
3261
- description: "Open a URL in a new browser tab and wait for the page to finish loading. Returns the new tab's id, final URL after redirects, and HTTP status. Refuses to navigate to browser-internal settings / preferences / extensions / flags pages (returns {blocked: true, reason}); devtools://* is allowed.",
3262
- inputSchema: {
3263
- type: "object",
3264
- required: ["url"],
3265
- additionalProperties: false,
3266
- properties: {
3267
- url: {
3268
- type: "string",
3269
- description: "The URL to load. Maximum 8 KB. Settings / preferences / extensions / flags pages are blocked."
3270
- },
3271
- reuseActive: {
3272
- type: "boolean",
3273
- description: "When true, navigate the currently active tab instead of opening a new one. Default false."
3482
+ } catch (err) {
3483
+ if (!capped) consola.warn(`readResponseBodyCapped: read error at ${routePath}:`, err);
3484
+ }
3485
+ if (capped) {
3486
+ consola.warn(`Non-streaming upstream response at ${routePath} exceeded ${capBytes} bytes (10 MiB cap); dropping body to prevent OOM. Check upstream health.`);
3487
+ return {
3488
+ ok: false,
3489
+ status: 502,
3490
+ errorResponse: {
3491
+ type: "error",
3492
+ error: {
3493
+ type: "api_error",
3494
+ message: `Upstream response body exceeded the 10 MiB size cap for non-streaming ${routePath}. The upstream may be misbehaving. Try enabling streaming (stream: true) which handles large responses chunk-by-chunk.`
3274
3495
  }
3275
3496
  }
3276
- },
3277
- capability: "browser",
3278
- async handler(args, signal) {
3279
- return dispatchBrowserTool("browser_open_tab", args, signal);
3280
- }
3281
- },
3282
- {
3283
- toolNameHttp: "browser_close_tab",
3284
- description: "Close one or more tabs by tab id.",
3285
- inputSchema: {
3286
- type: "object",
3287
- required: ["tabIds"],
3288
- additionalProperties: false,
3289
- properties: { tabIds: {
3290
- type: "array",
3291
- items: { type: "number" },
3292
- description: "Array of tab ids to close (from browser_list_tabs)."
3293
- } }
3294
- },
3295
- capability: "browser",
3296
- async handler(args, signal) {
3297
- return dispatchBrowserTool("browser_close_tab", args, signal);
3298
- }
3299
- },
3300
- {
3301
- toolNameHttp: "browser_navigate",
3302
- description: "Navigate an existing tab: goto a URL, go back, go forward, or reload. Same URL-blocking policy as browser_open_tab.",
3303
- inputSchema: {
3304
- type: "object",
3305
- required: ["tabId", "action"],
3306
- additionalProperties: false,
3307
- properties: {
3308
- tabId: {
3309
- type: "number",
3310
- description: "Tab id from browser_list_tabs / browser_open_tab."
3311
- },
3497
+ };
3498
+ }
3499
+ const merged = new Uint8Array(totalBytes);
3500
+ let offset = 0;
3501
+ for (const chunk of chunks) {
3502
+ merged.set(chunk, offset);
3503
+ offset += chunk.byteLength;
3504
+ }
3505
+ const text = new TextDecoder().decode(merged);
3506
+ try {
3507
+ return {
3508
+ ok: true,
3509
+ value: JSON.parse(text)
3510
+ };
3511
+ } catch (err) {
3512
+ const preview = text.slice(0, 200);
3513
+ const contentType = response.headers.get("content-type") ?? "(none)";
3514
+ consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..200]=${JSON.stringify(preview)}`);
3515
+ throw err;
3516
+ }
3517
+ }
3518
+
3519
+ //#endregion
3520
+ //#region src/services/copilot/create-chat-completions.ts
3521
+ const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
3522
+ if (!state.copilotToken) throw new Error("Copilot token not found");
3523
+ const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
3524
+ const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
3525
+ const url = `${copilotBaseUrl(state)}/chat/completions`;
3526
+ const doFetch = () => {
3527
+ const fetchInit = {
3528
+ method: "POST",
3529
+ headers: {
3530
+ ...copilotHeaders(state, enableVision),
3531
+ ...modelHeaders,
3532
+ "X-Initiator": isAgentCall ? "agent" : "user"
3533
+ },
3534
+ body: JSON.stringify(payload)
3535
+ };
3536
+ const signals = [];
3537
+ if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
3538
+ if (callerSignal) signals.push(callerSignal);
3539
+ if (signals.length === 1) fetchInit.signal = signals[0];
3540
+ else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
3541
+ return fetch(url, fetchInit);
3542
+ };
3543
+ const response = await tryRefreshAndRetry(doFetch, "/chat/completions");
3544
+ if (!response.ok) {
3545
+ let errorBody = "";
3546
+ try {
3547
+ errorBody = await response.text();
3548
+ } catch {
3549
+ errorBody = "(could not read error body)";
3550
+ }
3551
+ const claudeModels = state.models?.data.filter((m) => m.id.startsWith("claude")).map((m) => m.id).join(", ") ?? "(models not loaded)";
3552
+ consola.error(`Copilot rejected model "${payload.model}": ${response.status} ${errorBody} (available Claude models: ${claudeModels})`);
3553
+ throw new HTTPError("Failed to create chat completions", new Response(errorBody, {
3554
+ status: response.status,
3555
+ statusText: response.statusText,
3556
+ headers: response.headers
3557
+ }));
3558
+ }
3559
+ if (payload.stream) return events(response);
3560
+ const cappedResult = await readResponseBodyCapped(response, "/v1/chat/completions", MAX_RESPONSE_BODY_BYTES);
3561
+ if (!cappedResult.ok) throw new HTTPError("Upstream /v1/chat/completions response exceeded 10 MiB size cap", new Response(JSON.stringify(cappedResult.errorResponse), {
3562
+ status: cappedResult.status,
3563
+ headers: { "content-type": "application/json" }
3564
+ }));
3565
+ return cappedResult.value;
3566
+ };
3567
+
3568
+ //#endregion
3569
+ //#region src/lib/browser-mcp/compressor.ts
3570
+ /**
3571
+ * Static fallback chain. Order is preference: faster + multimodal +
3572
+ * cheaper at the top. All three support `tool_calls` and image input
3573
+ * (the latter is required for Phase D visual fallback).
3574
+ */
3575
+ const COMPRESSOR_FALLBACK_CHAIN = [
3576
+ "gemini-3.5-flash",
3577
+ "gpt-5.4-mini",
3578
+ "claude-haiku-4-5"
3579
+ ];
3580
+ let selectedBackend;
3581
+ /**
3582
+ * Walk the fallback chain against the live Copilot catalog. Returns
3583
+ * the first id present AND advertising `tool_calls` support, or
3584
+ * undefined when none match. Cached after first successful selection
3585
+ * so all compressor calls in a session hit the same backend; clear
3586
+ * the cache by calling `__resetCompressorBackendForTests`.
3587
+ */
3588
+ function pickBackendFromCatalog() {
3589
+ if (selectedBackend) return selectedBackend;
3590
+ const models$1 = state.models?.data;
3591
+ if (!models$1) return void 0;
3592
+ for (const candidate of COMPRESSOR_FALLBACK_CHAIN) {
3593
+ const found = models$1.find((m) => m.id === candidate);
3594
+ if (!found) continue;
3595
+ if (found.capabilities?.supports?.tool_calls !== true) continue;
3596
+ selectedBackend = candidate;
3597
+ consola.info(`[browser-mcp] compressor backend: ${candidate}`);
3598
+ return candidate;
3599
+ }
3600
+ }
3601
+ /**
3602
+ * True iff any compressor backend is available. Mirrors
3603
+ * `workerToolsEnabled()` / `standInToolEnabled()` — used by the
3604
+ * compound-tool capability gate so `browser_find` / `browser_act
3605
+ * (intent mode)` / `browser_extract` are dropped from `tools/list`
3606
+ * AND fail `tools/call` with -32601 when no backend is reachable.
3607
+ */
3608
+ function compressorAvailable() {
3609
+ return pickBackendFromCatalog() !== void 0;
3610
+ }
3611
+ /**
3612
+ * One round-trip to the picked backend. Wraps slot acquisition, payload
3613
+ * assembly, and JSON parsing. Forces structured output via tool-calling:
3614
+ * each caller supplies a tool schema and we set `tool_choice` so the
3615
+ * model has to emit a tool call whose `arguments` field is a
3616
+ * shape-validated JSON string. This eliminates a whole class of bug
3617
+ * where models wrap their JSON in markdown code fences despite
3618
+ * `response_format: { type: "json_object" }`. As a belt-and-suspenders
3619
+ * fallback for backends that ignore `tool_choice`, we ALSO accept
3620
+ * free-form `message.content` and strip a leading / trailing ```` ``` ````
3621
+ * code fence before parsing.
3622
+ */
3623
+ async function callCompressor(systemPrompt, userMessage, tool, signal) {
3624
+ const model = pickBackendFromCatalog();
3625
+ if (!model) throw new Error(`browser-mcp compressor: no backend available in catalog. Checked: ${COMPRESSOR_FALLBACK_CHAIN.join(", ")}`);
3626
+ const release = acquireInFlightSlot();
3627
+ if (!release) throw new Error("browser-mcp compressor: inflight slot saturated (cap 8); try again shortly");
3628
+ try {
3629
+ const msg = ((await createChatCompletions({
3630
+ model,
3631
+ stream: false,
3632
+ messages: [{
3633
+ role: "system",
3634
+ content: systemPrompt
3635
+ }, {
3636
+ role: "user",
3637
+ content: userMessage
3638
+ }],
3639
+ tools: [{
3640
+ type: "function",
3641
+ function: {
3642
+ name: tool.name,
3643
+ description: tool.description,
3644
+ parameters: tool.parameters
3645
+ }
3646
+ }],
3647
+ tool_choice: {
3648
+ type: "function",
3649
+ function: { name: tool.name }
3650
+ }
3651
+ }, void 0, signal)).choices?.[0])?.message;
3652
+ const toolArgs = msg?.tool_calls?.[0]?.function?.arguments;
3653
+ if (typeof toolArgs === "string" && toolArgs.length > 0) return JSON.parse(toolArgs);
3654
+ const text = typeof msg?.content === "string" ? msg.content : "";
3655
+ if (text.length === 0) throw new Error("browser-mcp compressor: empty response from backend (no tool_calls and no content)");
3656
+ return JSON.parse(stripCodeFence(text));
3657
+ } finally {
3658
+ release();
3659
+ }
3660
+ }
3661
+ /**
3662
+ * Strip a single leading / trailing ``` (or ```json) code fence from a
3663
+ * model's free-form text reply so JSON.parse works. Idempotent on
3664
+ * fence-free input. Defensive against the failure mode caught in PR #55
3665
+ * smoke-test: some models wrap JSON output in ```json ... ``` even
3666
+ * with response_format: { type: "json_object" } set.
3667
+ */
3668
+ function stripCodeFence(text) {
3669
+ const t = text.trim();
3670
+ const fenced = /^```(?:json)?\s*\n?([\s\S]*?)\n?```$/.exec(t);
3671
+ if (fenced) return fenced[1].trim();
3672
+ return t;
3673
+ }
3674
+ /**
3675
+ * Pick a single element matching the natural-language intent. Used by
3676
+ * `browser_act` in intent mode. Internally delegates the matching step
3677
+ * to `pickMatchingElements` (the same picker `browser_find` uses) so
3678
+ * `find` and `act` can't disagree on the same intent, then infers the
3679
+ * action verb deterministically from the picked element's role and
3680
+ * whether the intent supplied a value. Single source of truth for
3681
+ * element matching.
3682
+ *
3683
+ * Returns ref="" + confidence=0 when no element matches — caller
3684
+ * should escalate to visual fallback (when `visualSurfaces` is
3685
+ * present) or surface the miss to the lead model.
3686
+ */
3687
+ async function pickElement(snapshot, intent, signal, value) {
3688
+ const matches = await pickMatchingElements(snapshot, intent, signal);
3689
+ if (matches.length === 0) return {
3690
+ ref: "",
3691
+ action: "click",
3692
+ confidence: 0
3693
+ };
3694
+ const top = matches[0];
3695
+ const el = snapshot.elements.find((e) => e.ref === top.ref);
3696
+ if (!el) return {
3697
+ ref: "",
3698
+ action: "click",
3699
+ confidence: 0
3700
+ };
3701
+ const action = inferAction(el.role, intent, value);
3702
+ const out = {
3703
+ ref: top.ref,
3704
+ action,
3705
+ confidence: .8
3706
+ };
3707
+ if (value !== void 0 && (action === "fill" || action === "type" || action === "select")) out.value = value;
3708
+ return out;
3709
+ }
3710
+ /**
3711
+ * Deterministic action picker. Given an element role + the intent text
3712
+ * + an optional value, decide which primitive action to dispatch.
3713
+ * Pulled out of the compressor's responsibility so the compressor only
3714
+ * has to match elements (one prompt, one schema), and action selection
3715
+ * is a few small rules a future contributor can read at a glance.
3716
+ */
3717
+ function inferAction(role, intent, value) {
3718
+ const intentLower = intent.toLowerCase();
3719
+ const r = role.toLowerCase();
3720
+ if (/\bscroll\b/.test(intentLower) || /scroll[ -]?into[ -]?view/.test(intentLower)) return "scroll_into_view";
3721
+ if (r === "select" || r === "combobox") return "select";
3722
+ if (r === "textarea" || r === "input" || r === "textbox" || r === "searchbox" || r === "spinbutton") {
3723
+ if (/\btype\b/.test(intentLower) && value !== void 0) return "type";
3724
+ return "fill";
3725
+ }
3726
+ return "click";
3727
+ }
3728
+ const FIND_ELEMENTS_SYSTEM = `You match a natural-language intent to elements from a browser page snapshot.
3729
+
3730
+ Snapshot elements look like: {ref: "e42", role: "button", name: "Sign in"}.
3731
+
3732
+ Call the find_elements tool with up to 5 best matches ordered by relevance.`;
3733
+ const FIND_ELEMENTS_TOOL = {
3734
+ name: "find_elements",
3735
+ description: "Report ranked element matches for the intent.",
3736
+ parameters: {
3737
+ type: "object",
3738
+ required: ["matches"],
3739
+ additionalProperties: false,
3740
+ properties: { matches: {
3741
+ type: "array",
3742
+ maxItems: 5,
3743
+ items: {
3744
+ type: "object",
3745
+ required: ["ref", "reason"],
3746
+ additionalProperties: false,
3747
+ properties: {
3748
+ ref: { type: "string" },
3749
+ reason: { type: "string" }
3750
+ }
3751
+ }
3752
+ } }
3753
+ }
3754
+ };
3755
+ /**
3756
+ * Return up to 5 candidate matches for an intent. Used by
3757
+ * `browser_find` — the lead model gets a small ranked list rather than
3758
+ * a full element dump. Empty array when nothing matches.
3759
+ */
3760
+ async function pickMatchingElements(snapshot, intent, signal) {
3761
+ const trimmed = snapshot.elements.map((e) => ({
3762
+ ref: e.ref,
3763
+ role: e.role,
3764
+ name: e.name
3765
+ }));
3766
+ const raw = await callCompressor(FIND_ELEMENTS_SYSTEM, JSON.stringify({
3767
+ intent,
3768
+ elements: trimmed
3769
+ }), FIND_ELEMENTS_TOOL, signal);
3770
+ if (!raw || typeof raw !== "object") return [];
3771
+ const matches = raw.matches;
3772
+ if (!Array.isArray(matches)) return [];
3773
+ const out = [];
3774
+ for (const m of matches.slice(0, 5)) {
3775
+ if (!m || typeof m !== "object") continue;
3776
+ const ref = m.ref;
3777
+ const reason = m.reason;
3778
+ if (typeof ref === "string" && ref.length > 0) out.push({
3779
+ ref,
3780
+ reason: typeof reason === "string" ? reason : ""
3781
+ });
3782
+ }
3783
+ return out;
3784
+ }
3785
+ const EXTRACT_SYSTEM = `You extract structured data from a browser page snapshot into a JSON object matching the result schema you've been given.
3786
+
3787
+ Use the snapshot's text + element list as your source. Be faithful to what's visible; do not invent values.
3788
+
3789
+ Call the extract_result tool with your answer in the result field. The result field's schema is the caller's exact requested shape — fill it completely. If a field cannot be determined from the snapshot, omit it (when optional) or use a sensible empty value (when required).`;
3790
+ /**
3791
+ * Lightweight sanity check on a caller-supplied JSON Schema: the
3792
+ * schema must be a non-null object AND declare at least one of a
3793
+ * recognized `type` value, `properties`, `items`, `$ref`, or a
3794
+ * compound combinator (`oneOf` / `anyOf` / `allOf`). This catches the
3795
+ * two failure modes the prior smoke test surfaced — empty `{}` and
3796
+ * structurally-malformed schemas like `{type: "nonsense"}` — both of
3797
+ * which the permissive upstream silently accepts and the model then
3798
+ * fills with a useless primitive.
3799
+ *
3800
+ * Returns an error message string when the schema fails the check,
3801
+ * or undefined when the schema looks plausible.
3802
+ */
3803
+ function validateExtractSchema(schema) {
3804
+ if (!schema || typeof schema !== "object" || Array.isArray(schema)) return "schema must be a non-null JSON object";
3805
+ const obj = schema;
3806
+ const validTypes = new Set([
3807
+ "object",
3808
+ "array",
3809
+ "string",
3810
+ "number",
3811
+ "integer",
3812
+ "boolean",
3813
+ "null"
3814
+ ]);
3815
+ const hasValidType = typeof obj.type === "string" && validTypes.has(obj.type);
3816
+ const hasShape = "properties" in obj || "items" in obj || "$ref" in obj || "oneOf" in obj || "anyOf" in obj || "allOf" in obj;
3817
+ if (!hasValidType && !hasShape) return `schema must declare a recognized type (one of ${Array.from(validTypes).join(", ")}) OR have properties / items / $ref / oneOf / anyOf / allOf`;
3818
+ if ("type" in obj && !hasValidType) return `schema 'type' field must be one of: ${Array.from(validTypes).join(", ")}`;
3819
+ }
3820
+ /**
3821
+ * Structured extraction. The caller's JSON schema is injected directly
3822
+ * into the extract_result tool's `result` parameter so the model's
3823
+ * tool-call mechanism enforces shape — the model can't satisfy the
3824
+ * call without producing data of the requested shape.
3825
+ *
3826
+ * Schema is pre-validated by `validateExtractSchema` — bad schemas
3827
+ * fail loud with a clear `SchemaValidationError` instead of slipping
3828
+ * through to the upstream (which is permissive enough to accept
3829
+ * garbage and let the model return a useless primitive).
3830
+ *
3831
+ * Post-validation: if the model's `result` ended up as a primitive
3832
+ * (string / number / boolean) when the schema declared object / array,
3833
+ * surface the shape mismatch — the model returned the wrong type and
3834
+ * the caller should know rather than receive a confusing value.
3835
+ */
3836
+ var SchemaValidationError = class extends Error {
3837
+ constructor(message) {
3838
+ super(message);
3839
+ this.name = "SchemaValidationError";
3840
+ }
3841
+ };
3842
+ var ResultShapeError = class extends Error {
3843
+ constructor(message) {
3844
+ super(message);
3845
+ this.name = "ResultShapeError";
3846
+ }
3847
+ };
3848
+ async function extractStructured(snapshot, schema, instruction, signal) {
3849
+ const schemaError = validateExtractSchema(schema);
3850
+ if (schemaError) throw new SchemaValidationError(schemaError);
3851
+ const raw = await callCompressor(EXTRACT_SYSTEM, JSON.stringify({
3852
+ instruction,
3853
+ snapshot: {
3854
+ text: snapshot.text,
3855
+ elements: snapshot.elements
3856
+ }
3857
+ }), {
3858
+ name: "extract_result",
3859
+ description: "Report the extracted object. The result field's schema is the caller's requested shape; fill it completely.",
3860
+ parameters: {
3861
+ type: "object",
3862
+ required: ["result"],
3863
+ additionalProperties: false,
3864
+ properties: { result: schema }
3865
+ }
3866
+ }, signal);
3867
+ const unwrapped = raw && typeof raw === "object" && "result" in raw ? raw.result : raw;
3868
+ const declaredType = schema.type;
3869
+ if (declaredType === "object" && (typeof unwrapped !== "object" || unwrapped === null || Array.isArray(unwrapped))) throw new ResultShapeError(`schema declared type "object" but model returned ${describeType(unwrapped)}`);
3870
+ if (declaredType === "array" && !Array.isArray(unwrapped)) throw new ResultShapeError(`schema declared type "array" but model returned ${describeType(unwrapped)}`);
3871
+ return unwrapped;
3872
+ }
3873
+ function describeType(v) {
3874
+ if (v === null) return "null";
3875
+ if (Array.isArray(v)) return "array";
3876
+ return typeof v;
3877
+ }
3878
+ const PICK_VISUAL_SYSTEM = `You're given a browser screenshot, a natural-language intent, and a list of canvas / svg regions in CSS-pixel coordinates.
3879
+
3880
+ Find the pixel coordinates in the screenshot where the intent points. Coordinates are CSS pixels (origin top-left of viewport).
3881
+
3882
+ Call the pick_visual tool with the coordinates. If no clear target is visible, call with x=0, y=0, confidence=0.`;
3883
+ const PICK_VISUAL_TOOL = {
3884
+ name: "pick_visual",
3885
+ description: "Report the pixel coordinates the intent points at.",
3886
+ parameters: {
3887
+ type: "object",
3888
+ required: [
3889
+ "x",
3890
+ "y",
3891
+ "confidence",
3892
+ "reason"
3893
+ ],
3894
+ additionalProperties: false,
3895
+ properties: {
3896
+ x: { type: "number" },
3897
+ y: { type: "number" },
3898
+ confidence: { type: "number" },
3899
+ reason: { type: "string" }
3900
+ }
3901
+ }
3902
+ };
3903
+ /**
3904
+ * Visual fallback for Phase D — used when text-based `pickElement`
3905
+ * misses AND the snapshot reported `visualSurfaces` in the viewport
3906
+ * (a canvas / svg blackhole the a11y tree can't see into). Takes the
3907
+ * base64-encoded screenshot, the original intent, and the surfaces
3908
+ * list; returns CSS-pixel coordinates the caller dispatches to
3909
+ * `browser_mouse {x, y}`.
3910
+ */
3911
+ async function pickElementVisual(screenshotB64, contentType, intent, visualSurfaces, signal) {
3912
+ const raw = await callCompressor(PICK_VISUAL_SYSTEM, [{
3913
+ type: "text",
3914
+ text: JSON.stringify({
3915
+ intent,
3916
+ visual_surfaces: visualSurfaces
3917
+ })
3918
+ }, {
3919
+ type: "image_url",
3920
+ image_url: { url: `data:${contentType};base64,${screenshotB64}` }
3921
+ }], PICK_VISUAL_TOOL, signal);
3922
+ if (!raw || typeof raw !== "object") return {
3923
+ x: 0,
3924
+ y: 0,
3925
+ confidence: 0,
3926
+ reason: "empty backend response"
3927
+ };
3928
+ const obj = raw;
3929
+ return {
3930
+ x: typeof obj.x === "number" ? Math.round(obj.x) : 0,
3931
+ y: typeof obj.y === "number" ? Math.round(obj.y) : 0,
3932
+ confidence: typeof obj.confidence === "number" ? Math.max(0, Math.min(1, obj.confidence)) : 0,
3933
+ reason: typeof obj.reason === "string" ? obj.reason : ""
3934
+ };
3935
+ }
3936
+
3937
+ //#endregion
3938
+ //#region src/lib/browser-mcp/index.ts
3939
+ /**
3940
+ * Helper for compound tools (`browser_find` / `browser_act` /
3941
+ * `browser_extract`): fetch the page snapshot via the existing
3942
+ * primitive dispatcher and unwrap the JSON text envelope. Compound
3943
+ * tools all start from a snapshot, so a single helper keeps the
3944
+ * unwrap logic in one place.
3945
+ */
3946
+ async function fetchSnapshot(tabId, signal) {
3947
+ const env = await dispatchBrowserTool("browser_read_page", {
3948
+ tabId,
3949
+ mode: "summary"
3950
+ }, signal);
3951
+ if (env.isError) throw new Error("browser_read_page returned an error envelope; bridge / extension not ready");
3952
+ const text = env.content?.[0]?.text;
3953
+ if (typeof text !== "string") throw new Error("browser_read_page returned no text content");
3954
+ return JSON.parse(text);
3955
+ }
3956
+ function toolEnvelope(data, isError) {
3957
+ const text = typeof data === "string" ? data : JSON.stringify(data, null, 2);
3958
+ return isError ? {
3959
+ content: [{
3960
+ type: "text",
3961
+ text
3962
+ }],
3963
+ isError: true
3964
+ } : { content: [{
3965
+ type: "text",
3966
+ text
3967
+ }] };
3968
+ }
3969
+ /**
3970
+ * Browser-control MCP tools (`browser_*`). All entries route through
3971
+ * `dispatchBrowserTool()` which (1) runs the bridge-layer URL policy
3972
+ * check, (2) runs the install-check pre-flight (returning structured
3973
+ * install_required JSON when the bridge or extension isn't ready),
3974
+ * and (3) opens a WS to the bridge, sends the tool call, awaits the
3975
+ * response with a per-tool timeout.
3976
+ *
3977
+ * Each entry carries `capability: "browser"` so `browserToolsEnabled()`
3978
+ * in `src/routes/mcp/handler.ts` drops them at both list-time and
3979
+ * call-time when the operator hasn't opted in via `--browse` or
3980
+ * `GH_ROUTER_ENABLE_BROWSE=1`.
3981
+ *
3982
+ * v1 surface: 19 tools (Phases 3 + 4a + 4b + humanlike input v2).
3983
+ */
3984
+ const BROWSER_TOOLS = Object.freeze([
3985
+ {
3986
+ toolNameHttp: "browser_list_tabs",
3987
+ description: "List all open tabs across all browser windows. Returns each tab's id (used by other browser_* tools), URL, title, active flag, and window id.",
3988
+ inputSchema: {
3989
+ type: "object",
3990
+ additionalProperties: false,
3991
+ properties: {}
3992
+ },
3993
+ capability: "browser",
3994
+ async handler(args, signal) {
3995
+ return dispatchBrowserTool("browser_list_tabs", args, signal);
3996
+ }
3997
+ },
3998
+ {
3999
+ toolNameHttp: "browser_open_tab",
4000
+ description: "Open a URL in a new browser tab and wait for the page to finish loading. Returns the new tab's id, final URL after redirects, and HTTP status. Refuses to navigate to browser-internal settings / preferences / extensions / flags pages (returns {blocked: true, reason}); devtools://* is allowed.",
4001
+ inputSchema: {
4002
+ type: "object",
4003
+ required: ["url"],
4004
+ additionalProperties: false,
4005
+ properties: {
4006
+ url: {
4007
+ type: "string",
4008
+ description: "The URL to load. Maximum 8 KB. Settings / preferences / extensions / flags pages are blocked."
4009
+ },
4010
+ reuseActive: {
4011
+ type: "boolean",
4012
+ description: "When true, navigate the currently active tab instead of opening a new one. Default false."
4013
+ }
4014
+ }
4015
+ },
4016
+ capability: "browser",
4017
+ async handler(args, signal) {
4018
+ return dispatchBrowserTool("browser_open_tab", args, signal);
4019
+ }
4020
+ },
4021
+ {
4022
+ toolNameHttp: "browser_close_tab",
4023
+ description: "Close one or more tabs by tab id.",
4024
+ inputSchema: {
4025
+ type: "object",
4026
+ required: ["tabIds"],
4027
+ additionalProperties: false,
4028
+ properties: { tabIds: {
4029
+ type: "array",
4030
+ items: { type: "number" },
4031
+ description: "Array of tab ids to close (from browser_list_tabs)."
4032
+ } }
4033
+ },
4034
+ capability: "browser",
4035
+ async handler(args, signal) {
4036
+ return dispatchBrowserTool("browser_close_tab", args, signal);
4037
+ }
4038
+ },
4039
+ {
4040
+ toolNameHttp: "browser_navigate",
4041
+ description: "Navigate an existing tab: goto a URL, go back, go forward, or reload. Same URL-blocking policy as browser_open_tab.",
4042
+ inputSchema: {
4043
+ type: "object",
4044
+ required: ["tabId", "action"],
4045
+ additionalProperties: false,
4046
+ properties: {
4047
+ tabId: {
4048
+ type: "number",
4049
+ description: "Tab id from browser_list_tabs / browser_open_tab."
4050
+ },
3312
4051
  action: {
3313
4052
  type: "string",
3314
4053
  enum: [
@@ -3360,85 +4099,26 @@ const BROWSER_TOOLS = Object.freeze([
3360
4099
  },
3361
4100
  {
3362
4101
  toolNameHttp: "browser_read_page",
3363
- description: "Extract rendered page text plus interactive elements (refs, roles, names, bounding boxes) plus viewport metadata. Each element entry carries bbox: [x, y, w, h] in CSS viewport pixels — the same coordinate space used by browser_mouse / browser_drag / browser_scroll(at-pointer). Element refs returned here are intended as the primary input to follow-up tool calls preferred over CSS selectors because refs are stable across dynamic class names. The viewport block {width, height, devicePixelRatio, scrollX, scrollY} lets you map a CSS-px bbox to a device-px pixel in browser_screenshot (device_px = css_px * devicePixelRatio). Text is capped at 256 KiB; elements at the first 200 interactive nodes.",
3364
- inputSchema: {
3365
- type: "object",
3366
- required: ["tabId"],
3367
- additionalProperties: false,
3368
- properties: { tabId: {
3369
- type: "number",
3370
- description: "Tab id from browser_list_tabs / browser_open_tab."
3371
- } }
3372
- },
3373
- capability: "browser",
3374
- async handler(args, signal) {
3375
- return dispatchBrowserTool("browser_read_page", args, signal);
3376
- }
3377
- },
3378
- {
3379
- toolNameHttp: "browser_click",
3380
- description: "Click an element by ref (from a prior browser_read_page) or CSS selector. Returns {ok, navigated} where navigated=true if the URL changed within ~300ms of the click.",
4102
+ description: "Compressed page snapshot for the model: visible text, interactive elements with stable refs, viewport metadata, and (when present) `visualSurfaces` listing canvas / svg regions that need vision. Each element entry carries `bbox: [x, y, w, h]` in CSS viewport pixels (same coord space as browser_mouse / drag / scroll-at-pointer). Refs (e.g. `e42`) are stable for the lifetime of one read_page snapshot and are the preferred input to follow-up actions over brittle CSS selectors. The `viewport` block (`width`, `height`, `devicePixelRatio`, `scrollX`, `scrollY`) lets you map CSS-px bbox to device-px pixels for browser_screenshot. Mode controls what ships back: `summary` (default, ~5-15 KB) returns only viewport-visible elements/text and drops nameless non-interactive nodes; `full` returns up to 200 elements + 256 KiB of innerText (the legacy behavior — use only when you need off-screen content unscrolled). PREFER browser_act / browser_find for intent-driven interaction; read_page is the lower-level snapshot when you need to enumerate.",
3381
4103
  inputSchema: {
3382
4104
  type: "object",
3383
4105
  required: ["tabId"],
3384
4106
  additionalProperties: false,
3385
4107
  properties: {
3386
- tabId: { type: "number" },
3387
- ref: {
3388
- type: "string",
3389
- description: "Element ref from browser_read_page (preferred)."
3390
- },
3391
- selector: {
3392
- type: "string",
3393
- description: "CSS selector (fallback when no ref)."
3394
- },
3395
- button: {
3396
- type: "string",
3397
- enum: ["left", "right"],
3398
- description: "Mouse button. Default 'left'."
3399
- },
3400
- clickCount: {
4108
+ tabId: {
3401
4109
  type: "number",
3402
- description: "Number of times to click. Default 1."
3403
- }
3404
- }
3405
- },
3406
- capability: "browser",
3407
- async handler(args, signal) {
3408
- return dispatchBrowserTool("browser_click", args, signal);
3409
- }
3410
- },
3411
- {
3412
- toolNameHttp: "browser_fill",
3413
- description: "Type into an input / textarea, select from a dropdown, or toggle a checkbox / radio. Dispatches native input and change events so React-style controlled inputs see the value.",
3414
- inputSchema: {
3415
- type: "object",
3416
- required: ["tabId", "value"],
3417
- additionalProperties: false,
3418
- properties: {
3419
- tabId: { type: "number" },
3420
- ref: {
3421
- type: "string",
3422
- description: "Element ref from browser_read_page (preferred)."
4110
+ description: "Tab id from browser_list_tabs / browser_open_tab."
3423
4111
  },
3424
- selector: {
4112
+ mode: {
3425
4113
  type: "string",
3426
- description: "CSS selector (fallback when no ref)."
3427
- },
3428
- value: { description: "The value to set. String for inputs / textareas / select option value. Boolean for checkbox / radio. Max 1 MB." },
3429
- clearFirst: {
3430
- type: "boolean",
3431
- description: "Clear the input before typing (default true). No effect on select / checkbox."
3432
- },
3433
- pressEnter: {
3434
- type: "boolean",
3435
- description: "After typing, dispatch Enter keydown / keyup and call form.requestSubmit if available. Default false."
4114
+ enum: ["summary", "full"],
4115
+ description: "Snapshot scope. Default 'summary' returns viewport-visible elements + text capped at 20 KiB. 'full' returns up to 200 interactive elements page-wide + 256 KiB of innerText."
3436
4116
  }
3437
4117
  }
3438
4118
  },
3439
4119
  capability: "browser",
3440
4120
  async handler(args, signal) {
3441
- return dispatchBrowserTool("browser_fill", args, signal);
4121
+ return dispatchBrowserTool("browser_read_page", args, signal);
3442
4122
  }
3443
4123
  },
3444
4124
  {
@@ -3613,48 +4293,6 @@ const BROWSER_TOOLS = Object.freeze([
3613
4293
  return dispatchBrowserTool("browser_download", args, signal);
3614
4294
  }
3615
4295
  },
3616
- {
3617
- toolNameHttp: "browser_console_logs",
3618
- description: "Drain console messages a tab has emitted since the last call. The first call for a tab attaches chrome.debugger and starts capturing, so very-early-load messages from before the first call are missed; subsequent calls return everything since the previous drain. Buffer is capped at 1000 entries per tab.",
3619
- inputSchema: {
3620
- type: "object",
3621
- required: ["tabId"],
3622
- additionalProperties: false,
3623
- properties: {
3624
- tabId: { type: "number" },
3625
- level: {
3626
- type: "string",
3627
- enum: [
3628
- "log",
3629
- "info",
3630
- "warn",
3631
- "error",
3632
- "debug",
3633
- "all"
3634
- ],
3635
- description: "Filter by console level. Default 'all'."
3636
- }
3637
- }
3638
- },
3639
- capability: "browser",
3640
- async handler(args, signal) {
3641
- return dispatchBrowserTool("browser_console_logs", args, signal);
3642
- }
3643
- },
3644
- {
3645
- toolNameHttp: "browser_network_log",
3646
- description: "Drain network responses a tab has received since the last call. Same lazy-attach + cap-1000 behavior as browser_console_logs. Returns request URL, method, status, mime type, and timestamp per entry.",
3647
- inputSchema: {
3648
- type: "object",
3649
- required: ["tabId"],
3650
- additionalProperties: false,
3651
- properties: { tabId: { type: "number" } }
3652
- },
3653
- capability: "browser",
3654
- async handler(args, signal) {
3655
- return dispatchBrowserTool("browser_network_log", args, signal);
3656
- }
3657
- },
3658
4296
  {
3659
4297
  toolNameHttp: "browser_mouse",
3660
4298
  description: "Move / click / hover / press / release the mouse via real CDP input events (Input.dispatchMouseEvent). Use this when you need behavior that synthetic .click() can't trigger: hover-to-reveal menus, canvas / map / image-map clicks, sites that check event.isTrusted, or precise coordinate targeting. Target with ref (from browser_read_page), CSS selector, or (x, y) in CSS viewport pixels — exactly one. action='move' is the hover (single mouseMoved fires :hover and pointerover reliably). action='dblclick' sends two press/release cycles with incrementing clickCount (a real double-click, not one cycle with clickCount=2). By default the target is hit-tested with elementFromPoint and the call fails with `target_obscured` if the topmost element isn't the target or a descendant — pass force:true to bypass when you know an overlay forwards events.",
@@ -3806,42 +4444,340 @@ const BROWSER_TOOLS = Object.freeze([
3806
4444
  type: "string",
3807
4445
  description: "The text to type. Max 4096 chars. Iterates as Unicode code points (surrogate pairs handled correctly)."
3808
4446
  },
3809
- delayMs: {
3810
- type: "number",
3811
- description: "Pause between characters. Default 0. Clamped to [0, 50]. Set > 0 when typing into search-as-you-type inputs that debounce."
4447
+ delayMs: {
4448
+ type: "number",
4449
+ description: "Pause between characters. Default 0. Clamped to [0, 50]. Set > 0 when typing into search-as-you-type inputs that debounce."
4450
+ }
4451
+ }
4452
+ },
4453
+ capability: "browser",
4454
+ async handler(args, signal) {
4455
+ return dispatchBrowserTool("browser_type", args, signal);
4456
+ }
4457
+ },
4458
+ {
4459
+ toolNameHttp: "browser_diagnostics",
4460
+ description: "Drain console messages or network responses for a tab, with filtering. Replaces the prior browser_console_logs / browser_network_log primitives. `kind` selects the stream; remaining params filter the result before it ships to the model so the response carries only what the caller asked for instead of a raw 1000-entry array dump. Lazy-attach behavior: first call for a tab attaches chrome.debugger; very-early-load events from before the first call are missed.",
4461
+ inputSchema: {
4462
+ type: "object",
4463
+ required: ["tabId", "kind"],
4464
+ additionalProperties: false,
4465
+ properties: {
4466
+ tabId: { type: "number" },
4467
+ kind: {
4468
+ type: "string",
4469
+ enum: ["console", "network"],
4470
+ description: "Which stream to drain."
4471
+ },
4472
+ level: {
4473
+ type: "string",
4474
+ enum: [
4475
+ "log",
4476
+ "info",
4477
+ "warn",
4478
+ "error",
4479
+ "debug",
4480
+ "all"
4481
+ ],
4482
+ description: "Console only. Default 'all'. Ignored when kind=network."
4483
+ },
4484
+ regex: {
4485
+ type: "string",
4486
+ description: "Optional JS-regex string. Console: matches the message body. Network: matches the request URL."
4487
+ },
4488
+ limit: {
4489
+ type: "number",
4490
+ description: "Max entries to return after filtering. Default 100. Hard cap 1000."
4491
+ }
4492
+ }
4493
+ },
4494
+ capability: "browser",
4495
+ async handler(args, signal) {
4496
+ const kind = args.kind === "network" ? "network" : "console";
4497
+ const tool = kind === "network" ? "browser_network_log" : "browser_console_logs";
4498
+ const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
4499
+ const level = typeof args.level === "string" ? args.level : "all";
4500
+ const regexStr = typeof args.regex === "string" ? args.regex : void 0;
4501
+ const limit = typeof args.limit === "number" ? Math.min(1e3, Math.max(1, args.limit)) : 100;
4502
+ const env = await dispatchBrowserTool(tool, {
4503
+ tabId,
4504
+ level
4505
+ }, signal);
4506
+ if (env.isError) return env;
4507
+ const text = env.content?.[0]?.text;
4508
+ if (typeof text !== "string") return env;
4509
+ let entries;
4510
+ try {
4511
+ const parsed = JSON.parse(text);
4512
+ entries = (Array.isArray(parsed) ? parsed : Array.isArray(parsed?.entries) ? parsed.entries : []).filter((e) => typeof e === "object" && e !== null);
4513
+ } catch {
4514
+ return env;
4515
+ }
4516
+ let filtered = entries;
4517
+ if (regexStr) try {
4518
+ const re = new RegExp(regexStr);
4519
+ const field = kind === "network" ? "url" : "text";
4520
+ filtered = filtered.filter((e) => {
4521
+ const v = e[field];
4522
+ return typeof v === "string" && re.test(v);
4523
+ });
4524
+ } catch {
4525
+ return toolEnvelope({ error: `invalid regex: ${regexStr}` }, true);
4526
+ }
4527
+ const out = filtered.slice(0, limit);
4528
+ return toolEnvelope({
4529
+ kind,
4530
+ total: entries.length,
4531
+ returned: out.length,
4532
+ entries: out
4533
+ });
4534
+ }
4535
+ },
4536
+ {
4537
+ toolNameHttp: "browser_find",
4538
+ description: "Find up to 5 elements matching a natural-language intent ('the search box at the top', 'the Submit button at the bottom of the login form'). Returns ranked candidates with stable refs the model can pass to browser_act (ref mode) or browser_mouse. Cheaper than browser_read_page when you know what you're looking for — the inner compressor (Gemini Flash class) filters the snapshot for you instead of sending the full element list to the lead model.",
4539
+ inputSchema: {
4540
+ type: "object",
4541
+ required: ["tabId", "intent"],
4542
+ additionalProperties: false,
4543
+ properties: {
4544
+ tabId: { type: "number" },
4545
+ intent: {
4546
+ type: "string",
4547
+ description: "Natural-language description of what to find."
4548
+ }
4549
+ }
4550
+ },
4551
+ capability: "browser_compound",
4552
+ async handler(args, signal) {
4553
+ const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
4554
+ const intent = typeof args.intent === "string" ? args.intent : "";
4555
+ if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
4556
+ if (!intent) return toolEnvelope({ error: "intent required" }, true);
4557
+ const snapshot = await fetchSnapshot(tabId, signal);
4558
+ const matches = await pickMatchingElements(snapshot, intent, signal);
4559
+ const indexed = new Map(snapshot.elements.map((e) => [e.ref, e]));
4560
+ return toolEnvelope({ matches: matches.map((m) => {
4561
+ const el = indexed.get(m.ref);
4562
+ return el ? {
4563
+ ref: m.ref,
4564
+ role: el.role,
4565
+ name: el.name,
4566
+ bbox: el.bbox,
4567
+ reason: m.reason
4568
+ } : {
4569
+ ref: m.ref,
4570
+ reason: m.reason
4571
+ };
4572
+ }) });
4573
+ }
4574
+ },
4575
+ {
4576
+ toolNameHttp: "browser_act",
4577
+ description: "Preferred for any click / fill / type / scroll-to action against a tab. Two modes: (1) INTENT mode — pass `intent` as natural language ('click the submit button'); the inner compressor (Gemini Flash class) maps it to an element + action. Auto-escalates to visual fallback (screenshot + multimodal model + pixel-coord click) when the intent points into a canvas / svg region the a11y tree can't see. (2) REF mode — pass `ref` (from a prior browser_find or browser_read_page) and optionally `value`; dispatches directly with zero compressor latency. This is the fold-in path for the now-removed browser_click and browser_fill. Returns {ok, action_taken, target_ref, navigated}.",
4578
+ inputSchema: {
4579
+ type: "object",
4580
+ required: ["tabId"],
4581
+ additionalProperties: false,
4582
+ properties: {
4583
+ tabId: { type: "number" },
4584
+ intent: {
4585
+ type: "string",
4586
+ description: "Natural-language description of the action. Triggers INTENT mode. Mutually exclusive with `ref`."
4587
+ },
4588
+ ref: {
4589
+ type: "string",
4590
+ description: "Element ref from browser_find / browser_read_page. Triggers REF mode (no compressor round-trip)."
4591
+ },
4592
+ action: {
4593
+ type: "string",
4594
+ enum: [
4595
+ "click",
4596
+ "fill",
4597
+ "type",
4598
+ "select",
4599
+ "scroll_into_view"
4600
+ ],
4601
+ description: "REF mode only. Defaults to 'click'. In INTENT mode, the compressor picks the action."
4602
+ },
4603
+ value: {
4604
+ type: "string",
4605
+ description: "For fill / type / select: the string value to set. In INTENT mode the compressor uses this when an action requires a value."
3812
4606
  }
3813
4607
  }
3814
4608
  },
3815
4609
  capability: "browser",
3816
4610
  async handler(args, signal) {
3817
- return dispatchBrowserTool("browser_type", args, signal);
4611
+ const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
4612
+ if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
4613
+ const refIn = typeof args.ref === "string" ? args.ref : void 0;
4614
+ const intent = typeof args.intent === "string" ? args.intent : void 0;
4615
+ const value = typeof args.value === "string" ? args.value : void 0;
4616
+ if (!refIn && !intent) return toolEnvelope({ error: "either `ref` (REF mode) or `intent` (INTENT mode) is required" }, true);
4617
+ if (refIn) return dispatchActionByRef(tabId, refIn, typeof args.action === "string" ? args.action : "click", value, signal);
4618
+ const snapshot = await fetchSnapshot(tabId, signal);
4619
+ const picked = await pickElement(snapshot, intent, signal, value);
4620
+ if (!picked.ref || picked.confidence < .5) {
4621
+ const surfaces = snapshot.visualSurfaces;
4622
+ if (surfaces && surfaces.length > 0) {
4623
+ const shotEnv = await dispatchBrowserTool("browser_screenshot", {
4624
+ tabId,
4625
+ format: "png"
4626
+ }, signal);
4627
+ if (shotEnv.isError) return toolEnvelope({
4628
+ ok: false,
4629
+ error: "no text match; screenshot for visual fallback failed",
4630
+ picked
4631
+ }, true);
4632
+ const shotText = shotEnv.content?.[0]?.text;
4633
+ let shot = {};
4634
+ try {
4635
+ shot = shotText ? JSON.parse(shotText) : {};
4636
+ } catch {
4637
+ return toolEnvelope({
4638
+ ok: false,
4639
+ error: "no text match; screenshot envelope unparseable"
4640
+ }, true);
4641
+ }
4642
+ if (!shot.contentType || !shot.dataBase64) return toolEnvelope({
4643
+ ok: false,
4644
+ error: "no text match; screenshot envelope missing fields"
4645
+ }, true);
4646
+ const visual = await pickElementVisual(shot.dataBase64, shot.contentType, intent, surfaces, signal);
4647
+ if (visual.confidence < .5) return toolEnvelope({
4648
+ ok: false,
4649
+ error: "no element matched intent (text + visual)",
4650
+ picked,
4651
+ visual
4652
+ }, true);
4653
+ const clickEnv = await dispatchBrowserTool("browser_mouse", {
4654
+ tabId,
4655
+ action: "click",
4656
+ x: visual.x,
4657
+ y: visual.y,
4658
+ force: true
4659
+ }, signal);
4660
+ if (clickEnv.isError) return clickEnv;
4661
+ return toolEnvelope({
4662
+ ok: true,
4663
+ action_taken: "click_visual",
4664
+ x: visual.x,
4665
+ y: visual.y,
4666
+ confidence: visual.confidence,
4667
+ reason: visual.reason
4668
+ });
4669
+ }
4670
+ return toolEnvelope({
4671
+ ok: false,
4672
+ error: "no element matched intent",
4673
+ picked
4674
+ }, true);
4675
+ }
4676
+ return dispatchActionByRef(tabId, picked.ref, picked.action, picked.value ?? value, signal);
3818
4677
  }
3819
4678
  },
3820
4679
  {
3821
- toolNameHttp: "browser_locate",
3822
- description: "Resolve a single ref or selector to bounding box + hit-test metadata, without a full browser_read_page snapshot. Cheap one in-page script call. Returns bbox (CSS viewport px), center, inView (bbox intersects viewport), visible (display/visibility/opacity > 0 and bbox > 0), computed pointer-events, viewport metadata, and topmostAtCenter (is the element at the bbox center actually this target, or is it occluded by an overlay?). Use this before browser_mouse / browser_drag to detect overlay-occluded targets, or to check whether something scrolled out of view.",
4680
+ toolNameHttp: "browser_extract",
4681
+ description: "Structured extraction from the current page into a JSON object matching the provided schema. The inner compressor reads the page snapshot (text + elements) and synthesizes the typed object. Use this instead of browser_read_page + lead-model parsing when you know the shape you want (e.g. a list of {title, author, url} rows from a PR list).",
3823
4682
  inputSchema: {
3824
4683
  type: "object",
3825
- required: ["tabId"],
4684
+ required: [
4685
+ "tabId",
4686
+ "schema",
4687
+ "instruction"
4688
+ ],
3826
4689
  additionalProperties: false,
3827
4690
  properties: {
3828
4691
  tabId: { type: "number" },
3829
- ref: {
3830
- type: "string",
3831
- description: "Element ref from browser_read_page (preferred). Exactly one of ref / selector required."
3832
- },
3833
- selector: {
4692
+ schema: { description: "JSON schema (or schema-shaped descriptor) for the desired output shape." },
4693
+ instruction: {
3834
4694
  type: "string",
3835
- description: "CSS selector (fallback)."
4695
+ description: "What to extract, in plain language ('the visible PR list')."
3836
4696
  }
3837
4697
  }
3838
4698
  },
3839
- capability: "browser",
4699
+ capability: "browser_compound",
3840
4700
  async handler(args, signal) {
3841
- return dispatchBrowserTool("browser_locate", args, signal);
4701
+ const tabId = typeof args.tabId === "number" ? args.tabId : void 0;
4702
+ const instruction = typeof args.instruction === "string" ? args.instruction : "";
4703
+ const schema = args.schema;
4704
+ if (!tabId) return toolEnvelope({ error: "tabId required" }, true);
4705
+ if (!instruction) return toolEnvelope({ error: "instruction required" }, true);
4706
+ if (!schema) return toolEnvelope({ error: "schema required" }, true);
4707
+ const snapshot = await fetchSnapshot(tabId, signal);
4708
+ try {
4709
+ return toolEnvelope(await extractStructured(snapshot, schema, instruction, signal));
4710
+ } catch (err) {
4711
+ if (err instanceof SchemaValidationError) return toolEnvelope({ error: `invalid schema: ${err.message}` }, true);
4712
+ if (err instanceof ResultShapeError) return toolEnvelope({ error: `extraction produced wrong shape: ${err.message}` }, true);
4713
+ throw err;
4714
+ }
3842
4715
  }
3843
4716
  }
3844
4717
  ]);
4718
+ /**
4719
+ * Dispatch an action against a known ref via the appropriate primitive.
4720
+ * Shared between REF mode and INTENT-mode-text-match in `browser_act`.
4721
+ * Returns an MCP envelope (text content + optional isError).
4722
+ */
4723
+ async function dispatchActionByRef(tabId, ref, action, value, signal) {
4724
+ let env;
4725
+ switch (action) {
4726
+ case "click":
4727
+ env = await dispatchBrowserTool("browser_click", {
4728
+ tabId,
4729
+ ref
4730
+ }, signal);
4731
+ break;
4732
+ case "fill":
4733
+ env = await dispatchBrowserTool("browser_fill", {
4734
+ tabId,
4735
+ ref,
4736
+ value
4737
+ }, signal);
4738
+ break;
4739
+ case "type":
4740
+ await dispatchBrowserTool("browser_click", {
4741
+ tabId,
4742
+ ref
4743
+ }, signal);
4744
+ env = await dispatchBrowserTool("browser_type", {
4745
+ tabId,
4746
+ text: value ?? ""
4747
+ }, signal);
4748
+ break;
4749
+ case "select":
4750
+ env = await dispatchBrowserTool("browser_fill", {
4751
+ tabId,
4752
+ ref,
4753
+ value
4754
+ }, signal);
4755
+ break;
4756
+ case "scroll_into_view":
4757
+ env = await dispatchBrowserTool("browser_scroll", {
4758
+ tabId,
4759
+ target: "element",
4760
+ ref
4761
+ }, signal);
4762
+ break;
4763
+ default: return toolEnvelope({
4764
+ ok: false,
4765
+ error: `unknown action: ${action}`
4766
+ }, true);
4767
+ }
4768
+ if (env.isError) return env;
4769
+ const innerText = env.content?.[0]?.text;
4770
+ let parsed = {};
4771
+ if (typeof innerText === "string") try {
4772
+ parsed = JSON.parse(innerText);
4773
+ } catch {}
4774
+ return toolEnvelope({
4775
+ ok: true,
4776
+ action_taken: action,
4777
+ target_ref: ref,
4778
+ navigated: typeof parsed.navigated === "boolean" ? parsed.navigated : void 0
4779
+ });
4780
+ }
3845
4781
 
3846
4782
  //#endregion
3847
4783
  //#region src/vendor/pi/ai/api-registry.ts
@@ -5416,7 +6352,7 @@ const MAX_INFLIGHT_WORKER_CALLS = (() => {
5416
6352
  if (!Number.isFinite(n) || n <= 0 || !Number.isInteger(n)) return 8;
5417
6353
  return n;
5418
6354
  })();
5419
- let inFlight$1 = 0;
6355
+ let inFlight = 0;
5420
6356
  /**
5421
6357
  * Acquire a worker slot.
5422
6358
  *
@@ -5434,176 +6370,16 @@ let inFlight$1 = 0;
5434
6370
  */
5435
6371
  async function acquireWorkerSlot(signal) {
5436
6372
  if (signal?.aborted) return null;
5437
- if (inFlight$1 >= MAX_INFLIGHT_WORKER_CALLS) return null;
5438
- inFlight$1 += 1;
6373
+ if (inFlight >= MAX_INFLIGHT_WORKER_CALLS) return null;
6374
+ inFlight += 1;
5439
6375
  let released = false;
5440
6376
  return () => {
5441
6377
  if (released) return;
5442
6378
  released = true;
5443
- inFlight$1 = Math.max(0, inFlight$1 - 1);
5444
- };
5445
- }
5446
-
5447
- //#endregion
5448
- //#region src/lib/diagnose-response.ts
5449
- const PREVIEW_LIMIT = 200;
5450
- async function parseJsonOrDiagnose(response, routePath) {
5451
- const cloned = response.clone();
5452
- try {
5453
- return await response.json();
5454
- } catch (error) {
5455
- const contentType = response.headers.get("content-type") ?? "(none)";
5456
- const bodyText = await cloned.text().catch(() => "(unreadable)");
5457
- const preview = bodyText.length > PREVIEW_LIMIT ? bodyText.slice(0, PREVIEW_LIMIT) + "...(truncated)" : bodyText;
5458
- consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..${PREVIEW_LIMIT}]=${JSON.stringify(preview)}`);
5459
- throw error;
5460
- }
5461
- }
5462
-
5463
- //#endregion
5464
- //#region src/lib/response-cap.ts
5465
- /**
5466
- * Hard byte cap for non-streaming upstream response bodies.
5467
- *
5468
- * Anthropic responses with large tool_use blocks can legitimately reach
5469
- * several MB, but a multi-GB body is either a buggy upstream or a malicious
5470
- * one. Buffering it would OOM the proxy and crash all in-flight requests.
5471
- *
5472
- * Applies to /v1/messages, /v1/chat/completions, and /v1/responses.
5473
- */
5474
- const MAX_RESPONSE_BODY_BYTES = 10 * 1024 * 1024;
5475
- /**
5476
- * Read a Response body with a hard byte cap, then parse as JSON.
5477
- *
5478
- * Falls back to the fast path (response.json()) when Content-Length is
5479
- * present and within the cap, avoiding the streaming-reader overhead for
5480
- * the vast majority of normal responses.
5481
- *
5482
- * When the cap is hit:
5483
- * - the reader is cancelled to release the upstream socket
5484
- * - a structured Anthropic-format error is returned to the caller
5485
- * (the caller wraps it in c.json(), not throws — the client gets a
5486
- * clean 413 error, not an unhandled-rejection crash)
5487
- *
5488
- * Returns `{ ok: true, value }` on success or `{ ok: false, errorResponse, status }`
5489
- * on cap exceeded.
5490
- */
5491
- async function readResponseBodyCapped(response, routePath, capBytes = MAX_RESPONSE_BODY_BYTES) {
5492
- const contentLengthHeader = response.headers.get("content-length");
5493
- const contentLength = contentLengthHeader ? parseInt(contentLengthHeader, 10) : NaN;
5494
- if (!isNaN(contentLength) && contentLength <= capBytes) return {
5495
- ok: true,
5496
- value: await parseJsonOrDiagnose(response, routePath)
5497
- };
5498
- const reader = response.body?.getReader();
5499
- if (!reader) return {
5500
- ok: true,
5501
- value: await parseJsonOrDiagnose(response, routePath)
6379
+ inFlight = Math.max(0, inFlight - 1);
5502
6380
  };
5503
- const chunks = [];
5504
- let totalBytes = 0;
5505
- let capped = false;
5506
- try {
5507
- while (true) {
5508
- const { done, value } = await reader.read();
5509
- if (done) break;
5510
- if (!value) continue;
5511
- totalBytes += value.byteLength;
5512
- if (totalBytes > capBytes) {
5513
- capped = true;
5514
- try {
5515
- await reader.cancel("size_cap");
5516
- } catch {}
5517
- break;
5518
- }
5519
- chunks.push(value);
5520
- }
5521
- } catch (err) {
5522
- if (!capped) consola.warn(`readResponseBodyCapped: read error at ${routePath}:`, err);
5523
- }
5524
- if (capped) {
5525
- consola.warn(`Non-streaming upstream response at ${routePath} exceeded ${capBytes} bytes (10 MiB cap); dropping body to prevent OOM. Check upstream health.`);
5526
- return {
5527
- ok: false,
5528
- status: 502,
5529
- errorResponse: {
5530
- type: "error",
5531
- error: {
5532
- type: "api_error",
5533
- message: `Upstream response body exceeded the 10 MiB size cap for non-streaming ${routePath}. The upstream may be misbehaving. Try enabling streaming (stream: true) which handles large responses chunk-by-chunk.`
5534
- }
5535
- }
5536
- };
5537
- }
5538
- const merged = new Uint8Array(totalBytes);
5539
- let offset = 0;
5540
- for (const chunk of chunks) {
5541
- merged.set(chunk, offset);
5542
- offset += chunk.byteLength;
5543
- }
5544
- const text = new TextDecoder().decode(merged);
5545
- try {
5546
- return {
5547
- ok: true,
5548
- value: JSON.parse(text)
5549
- };
5550
- } catch (err) {
5551
- const preview = text.slice(0, 200);
5552
- const contentType = response.headers.get("content-type") ?? "(none)";
5553
- consola.error(`Upstream JSON parse failed at ${routePath}: status=${response.status} content-type="${contentType}" body[0..200]=${JSON.stringify(preview)}`);
5554
- throw err;
5555
- }
5556
6381
  }
5557
6382
 
5558
- //#endregion
5559
- //#region src/services/copilot/create-chat-completions.ts
5560
- const createChatCompletions = async (payload, modelHeaders, callerSignal) => {
5561
- if (!state.copilotToken) throw new Error("Copilot token not found");
5562
- const enableVision = payload.messages.some((x) => typeof x.content !== "string" && x.content?.some((x$1) => x$1.type === "image_url"));
5563
- const isAgentCall = payload.messages.some((msg) => ["assistant", "tool"].includes(msg.role));
5564
- const url = `${copilotBaseUrl(state)}/chat/completions`;
5565
- const doFetch = () => {
5566
- const fetchInit = {
5567
- method: "POST",
5568
- headers: {
5569
- ...copilotHeaders(state, enableVision),
5570
- ...modelHeaders,
5571
- "X-Initiator": isAgentCall ? "agent" : "user"
5572
- },
5573
- body: JSON.stringify(payload)
5574
- };
5575
- const signals = [];
5576
- if (UPSTREAM_FETCH_TIMEOUT_MS > 0) signals.push(AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS));
5577
- if (callerSignal) signals.push(callerSignal);
5578
- if (signals.length === 1) fetchInit.signal = signals[0];
5579
- else if (signals.length > 1) fetchInit.signal = AbortSignal.any(signals);
5580
- return fetch(url, fetchInit);
5581
- };
5582
- const response = await tryRefreshAndRetry(doFetch, "/chat/completions");
5583
- if (!response.ok) {
5584
- let errorBody = "";
5585
- try {
5586
- errorBody = await response.text();
5587
- } catch {
5588
- errorBody = "(could not read error body)";
5589
- }
5590
- const claudeModels = state.models?.data.filter((m) => m.id.startsWith("claude")).map((m) => m.id).join(", ") ?? "(models not loaded)";
5591
- consola.error(`Copilot rejected model "${payload.model}": ${response.status} ${errorBody} (available Claude models: ${claudeModels})`);
5592
- throw new HTTPError("Failed to create chat completions", new Response(errorBody, {
5593
- status: response.status,
5594
- statusText: response.statusText,
5595
- headers: response.headers
5596
- }));
5597
- }
5598
- if (payload.stream) return events(response);
5599
- const cappedResult = await readResponseBodyCapped(response, "/v1/chat/completions", MAX_RESPONSE_BODY_BYTES);
5600
- if (!cappedResult.ok) throw new HTTPError("Upstream /v1/chat/completions response exceeded 10 MiB size cap", new Response(JSON.stringify(cappedResult.errorResponse), {
5601
- status: cappedResult.status,
5602
- headers: { "content-type": "application/json" }
5603
- }));
5604
- return cappedResult.value;
5605
- };
5606
-
5607
6383
  //#endregion
5608
6384
  //#region src/lib/worker-agent/stream-fn.ts
5609
6385
  function createCopilotStreamFn(opts) {
@@ -6057,56 +6833,6 @@ function isAbortError(err) {
6057
6833
  return false;
6058
6834
  }
6059
6835
 
6060
- //#endregion
6061
- //#region src/lib/mcp-inflight.ts
6062
- /**
6063
- * Shared concurrency cap for MCP `tools/call` dispatches.
6064
- *
6065
- * Originally lived as a module-private counter inside
6066
- * `src/routes/mcp/handler.ts`. Extracted because the worker-agent's
6067
- * `peer_review` and `advisor` tools (which dispatch to peer-model
6068
- * personas / the advisor responses endpoint from inside a worker
6069
- * subagent loop) must participate in the same backpressure budget;
6070
- * otherwise a single worker can fan out unboundedly to peers and
6071
- * starve the operator's own `tools/list` callers.
6072
- *
6073
- * The counter is a single process-wide integer — no per-route
6074
- * partitioning. Persona calls at the MCP boundary (handler.ts),
6075
- * peer/advisor calls nested inside a worker (tools.ts), and any
6076
- * future MCP-adjacent dispatcher all increment the same number.
6077
- *
6078
- * Cap = `MAX_INFLIGHT_TOOLS_CALL = 8`. Justification lives at the
6079
- * historical home (`src/routes/mcp/handler.ts` comment block); do not
6080
- * change the value without re-reading
6081
- * `docs/research/peer-mcp-investigation.md` § "Concurrency cap
6082
- * investigation".
6083
- */
6084
- const MAX_INFLIGHT_TOOLS_CALL = 8;
6085
- let inFlight = 0;
6086
- /**
6087
- * Acquire a slot if one is available. Returns a release function the
6088
- * caller MUST invoke exactly once (typically from a `finally` block);
6089
- * returns `null` if the cap is saturated. The release fn is idempotent
6090
- * — calling it twice is a no-op so callers can release defensively
6091
- * without worrying about double-decrementing the counter under unusual
6092
- * unwind paths.
6093
- *
6094
- * Synchronous on purpose. Async semaphore acquisition would let callers
6095
- * queue indefinitely; we want immediate "queue full" feedback so the
6096
- * MCP client (or the model holding the nested tool call) can choose to
6097
- * back off or retry.
6098
- */
6099
- function acquireInFlightSlot() {
6100
- if (inFlight >= MAX_INFLIGHT_TOOLS_CALL) return null;
6101
- inFlight++;
6102
- let released = false;
6103
- return () => {
6104
- if (released) return;
6105
- released = true;
6106
- inFlight--;
6107
- };
6108
- }
6109
-
6110
6836
  //#endregion
6111
6837
  //#region src/lib/tokenizer.ts
6112
6838
  const ENCODING_MAP = {
@@ -6563,6 +7289,26 @@ function workerToolsEnabled() {
6563
7289
  if (!found) return false;
6564
7290
  return found.capabilities?.supports?.tool_calls === true;
6565
7291
  }
7292
+ /**
7293
+ * Gate for the compound L2 browser tools (`browser_find`, `browser_act`
7294
+ * in intent mode, `browser_extract`).
7295
+ *
7296
+ * Returns true iff `compressorAvailable()` — i.e. at least one model in
7297
+ * the compressor fallback chain (`gemini-3.5-flash` → `gpt-5.4-mini` →
7298
+ * `claude-haiku-4-5`) is present in the live catalog with `tool_calls`
7299
+ * support. When none are reachable the compound tools are dropped from
7300
+ * `tools/list` AND fail `tools/call` with -32601.
7301
+ *
7302
+ * Note: this gate does NOT additionally re-check the `browser` opt-in.
7303
+ * The `handler.ts` filter chain runs `browser` and `browser_compound`
7304
+ * via separate `capability` tags; the compound tools' entries also
7305
+ * apply at the route level via the existing `--browse` enablement
7306
+ * because they live under the browser MCP surface that the route
7307
+ * only mounts when `state.browseEnabled`.
7308
+ */
7309
+ function browserCompoundToolsEnabled() {
7310
+ return compressorAvailable();
7311
+ }
6566
7312
 
6567
7313
  //#endregion
6568
7314
  //#region src/routes/mcp/handler.ts
@@ -6739,6 +7485,7 @@ function toolEntries() {
6739
7485
  if (t.capability === "worker") return workerToolsEnabled();
6740
7486
  if (t.capability === "stand_in") return standInToolEnabled();
6741
7487
  if (t.capability === "browser") return browserToolsEnabled();
7488
+ if (t.capability === "browser_compound") return browserToolsEnabled() && browserCompoundToolsEnabled();
6742
7489
  return true;
6743
7490
  }).map((t) => ({
6744
7491
  name: t.toolNameHttp,
@@ -7030,6 +7777,7 @@ async function handleToolsCall(body) {
7030
7777
  if (nonPersonaTool && nonPersonaTool.capability === "worker" && !workerToolsEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
7031
7778
  if (nonPersonaTool && nonPersonaTool.capability === "stand_in" && !standInToolEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
7032
7779
  if (nonPersonaTool && nonPersonaTool.capability === "browser" && !browserToolsEnabled()) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
7780
+ if (nonPersonaTool && nonPersonaTool.capability === "browser_compound" && !(browserToolsEnabled() && browserCompoundToolsEnabled())) return rpcError(body.id, RPC_METHOD_NOT_FOUND, `tools/call: unknown tool "${name$1}"`);
7033
7781
  let personaPrompt;
7034
7782
  let personaContext;
7035
7783
  let personaEffort;
@@ -10583,6 +11331,7 @@ function buildPeerAwarenessSnippet(opts) {
10583
11331
  if (opts.workerToolsAvailable) para2Parts.push("`worker_explore` runs a Gemini-backed read-only worker that returns a summary, using its own context rather than yours; concurrent launches share the `MAX_INFLIGHT_TOOLS_CALL=8` cap with operator traffic.", "`worker_implement` is the same worker with edit/write/bash; `worktree: true` runs it in an isolated git worktree and returns the diff.", "Workers themselves have `code_search` in their toolset.");
10584
11332
  para2Parts.push("`web_search` surfaces citable sources for docs, errors, and upstream issues.");
10585
11333
  if (opts.standInAvailable) para2Parts.push("`stand_in` provides three-lab consensus for decision tiebreak when the user is unavailable.");
11334
+ if (opts.browseAvailable) para2Parts.push("`browser_*` tools (under `mcp__gh-router-peers__browser_*`) drive a real Chrome / Edge browser via a local extension; prefer the L2 compound tools `browser_act(intent | ref, value?)` / `browser_find(intent)` / `browser_extract(schema, instruction)` over the L0/L1 primitives.");
10586
11335
  return [
10587
11336
  "## Peer review and advisor",
10588
11337
  "",
@@ -12045,7 +12794,7 @@ function initProxyFromEnv() {
12045
12794
  //#endregion
12046
12795
  //#region package.json
12047
12796
  var name = "github-router";
12048
- var version = "0.3.45";
12797
+ var version$1 = "0.3.52";
12049
12798
 
12050
12799
  //#endregion
12051
12800
  //#region src/lib/approval.ts
@@ -13716,7 +14465,7 @@ server.use(cors());
13716
14465
  server.get("/", (c) => c.text("Server running"));
13717
14466
  server.get("/version", (c) => c.json({
13718
14467
  name,
13719
- version,
14468
+ version: version$1,
13720
14469
  gitSha: process.env.GITHUB_SHA ?? "unknown"
13721
14470
  }));
13722
14471
  server.on("HEAD", ["/"], (c) => c.body(null, 200));
@@ -14150,7 +14899,8 @@ const claude = defineCommand({
14150
14899
  codexCli: backend === "cli",
14151
14900
  geminiAvailable: geminiAvailable$1,
14152
14901
  workerToolsAvailable: workerToolsEnabled(),
14153
- standInAvailable: standInToolEnabled()
14902
+ standInAvailable: standInToolEnabled(),
14903
+ browseAvailable: state.browseEnabled
14154
14904
  });
14155
14905
  extraArgs.push("--append-system-prompt", peerSnippet);
14156
14906
  try {
@@ -14247,7 +14997,7 @@ const codex = defineCommand({
14247
14997
 
14248
14998
  //#endregion
14249
14999
  //#region src/debug.ts
14250
- async function getPackageVersion() {
15000
+ async function getPackageVersion$1() {
14251
15001
  try {
14252
15002
  const packageJsonPath = new URL("../package.json", import.meta.url).pathname;
14253
15003
  return JSON.parse(await fs.readFile(packageJsonPath)).version;
@@ -14273,9 +15023,9 @@ async function checkTokenExists() {
14273
15023
  }
14274
15024
  }
14275
15025
  async function getDebugInfo() {
14276
- const [version$1, tokenExists] = await Promise.all([getPackageVersion(), checkTokenExists()]);
15026
+ const [version$2, tokenExists] = await Promise.all([getPackageVersion$1(), checkTokenExists()]);
14277
15027
  return {
14278
- version: version$1,
15028
+ version: version$2,
14279
15029
  runtime: getRuntimeInfo(),
14280
15030
  paths: {
14281
15031
  APP_DIR: PATHS.APP_DIR,
@@ -14597,9 +15347,12 @@ process.on("uncaughtException", (error) => {
14597
15347
  consola.error("Uncaught exception:", error);
14598
15348
  process.exit(1);
14599
15349
  });
15350
+ const version = getPackageVersion();
15351
+ if (!process.argv.slice(2).includes("--version")) consola.info(`github-router v${version}`);
14600
15352
  await runMain(defineCommand({
14601
15353
  meta: {
14602
15354
  name: "github-router",
15355
+ version,
14603
15356
  description: "A reverse proxy that exposes GitHub Copilot as OpenAI and Anthropic compatible API endpoints."
14604
15357
  },
14605
15358
  subCommands: {