github-router 0.3.42 → 0.3.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -3015,7 +3015,7 @@ const PER_TOOL_TIMEOUTS = {
3015
3015
  },
3016
3016
  browser_scroll: {
3017
3017
  defaultMs: 5e3,
3018
- maxMs: 1e4
3018
+ maxMs: 15e3
3019
3019
  },
3020
3020
  browser_keyboard: {
3021
3021
  defaultMs: 5e3,
@@ -3040,6 +3040,22 @@ const PER_TOOL_TIMEOUTS = {
3040
3040
  browser_network_log: {
3041
3041
  defaultMs: 5e3,
3042
3042
  maxMs: 1e4
3043
+ },
3044
+ browser_mouse: {
3045
+ defaultMs: 1e4,
3046
+ maxMs: 3e4
3047
+ },
3048
+ browser_drag: {
3049
+ defaultMs: 15e3,
3050
+ maxMs: 3e4
3051
+ },
3052
+ browser_type: {
3053
+ defaultMs: 15e3,
3054
+ maxMs: 21e4
3055
+ },
3056
+ browser_locate: {
3057
+ defaultMs: 5e3,
3058
+ maxMs: 1e4
3043
3059
  }
3044
3060
  };
3045
3061
  function pickTimeout(tool) {
@@ -3224,7 +3240,7 @@ function logAudit$1(record) {
3224
3240
  * call-time when the operator hasn't opted in via `--browse` or
3225
3241
  * `GH_ROUTER_ENABLE_BROWSE=1`.
3226
3242
  *
3227
- * v1 surface: 15 tools (Phases 3 + 4a + 4b).
3243
+ * v1 surface: 19 tools (Phases 3 + 4a + 4b + humanlike input v2).
3228
3244
  */
3229
3245
  const BROWSER_TOOLS = Object.freeze([
3230
3246
  {
@@ -3344,7 +3360,7 @@ const BROWSER_TOOLS = Object.freeze([
3344
3360
  },
3345
3361
  {
3346
3362
  toolNameHttp: "browser_read_page",
3347
- description: "Extract rendered page text plus the list of interactive elements (refs, roles, names, bounding boxes). Element refs returned here are intended as the input to a follow-up browser_click / browser_fill / browser_scroll — preferred over CSS selectors because refs are stable across dynamic class names. Text is capped at 256 KiB.",
3363
+ description: "Extract rendered page text plus interactive elements (refs, roles, names, bounding boxes) plus viewport metadata. Each element entry carries bbox: [x, y, w, h] in CSS viewport pixels — the same coordinate space used by browser_mouse / browser_drag / browser_scroll(at-pointer). Element refs returned here are intended as the primary input to follow-up tool calls — preferred over CSS selectors because refs are stable across dynamic class names. The viewport block {width, height, devicePixelRatio, scrollX, scrollY} lets you map a CSS-px bbox to a device-px pixel in browser_screenshot (device_px = css_px * devicePixelRatio). Text is capped at 256 KiB; elements at the first 200 interactive nodes.",
3348
3364
  inputSchema: {
3349
3365
  type: "object",
3350
3366
  required: ["tabId"],
@@ -3427,7 +3443,7 @@ const BROWSER_TOOLS = Object.freeze([
3427
3443
  },
3428
3444
  {
3429
3445
  toolNameHttp: "browser_scroll",
3430
- description: "Scroll a tab to the top, to the bottom, by a pixel amount, or to a specific element by ref.",
3446
+ description: "Scroll a tab. Five modes: top / bottom of the page, by an absolute pixel delta, to a specific element (by ref), or wheel-scroll a sub-region at a pointer location ('at-pointer' — the path that works for chat windows / infinite-scroll lists / modal bodies that don't respond to window.scrollTo because they have their own scroll container).",
3431
3447
  inputSchema: {
3432
3448
  type: "object",
3433
3449
  required: ["tabId", "target"],
@@ -3440,7 +3456,8 @@ const BROWSER_TOOLS = Object.freeze([
3440
3456
  "top",
3441
3457
  "bottom",
3442
3458
  "pixels",
3443
- "element"
3459
+ "element",
3460
+ "at-pointer"
3444
3461
  ],
3445
3462
  description: "Scroll target type."
3446
3463
  },
@@ -3450,7 +3467,31 @@ const BROWSER_TOOLS = Object.freeze([
3450
3467
  },
3451
3468
  ref: {
3452
3469
  type: "string",
3453
- description: "Element ref when target=element. Scrolls so the element is centered in the viewport."
3470
+ description: "Element ref. For target=element, scrolls so the element is centered. For target=at-pointer, resolves to the bbox center as the wheel position."
3471
+ },
3472
+ selector: {
3473
+ type: "string",
3474
+ description: "CSS selector. For target=at-pointer, fallback when no ref. Resolves to bbox center."
3475
+ },
3476
+ x: {
3477
+ type: "number",
3478
+ description: "Pointer x (CSS viewport px) for target=at-pointer. Pair with y. Exactly one of (ref, selector, or x+y) is required for at-pointer."
3479
+ },
3480
+ y: {
3481
+ type: "number",
3482
+ description: "Pointer y (CSS viewport px) for target=at-pointer. Pair with x."
3483
+ },
3484
+ deltaX: {
3485
+ type: "number",
3486
+ description: "Wheel delta x (CSS px) for target=at-pointer. Default 0. Clamped to |10000|."
3487
+ },
3488
+ deltaY: {
3489
+ type: "number",
3490
+ description: "Wheel delta y (CSS px) for target=at-pointer. Positive scrolls down. Default 0. Clamped to |10000|. At least one of deltaX/deltaY must be non-zero."
3491
+ },
3492
+ force: {
3493
+ type: "boolean",
3494
+ description: "Skip the pre-wheel elementFromPoint hit-test for target=at-pointer. Default false. Set true when an overlay covers the target but forwards wheel events."
3454
3495
  }
3455
3496
  }
3456
3497
  },
@@ -3613,6 +3654,192 @@ const BROWSER_TOOLS = Object.freeze([
3613
3654
  async handler(args, signal) {
3614
3655
  return dispatchBrowserTool("browser_network_log", args, signal);
3615
3656
  }
3657
+ },
3658
+ {
3659
+ toolNameHttp: "browser_mouse",
3660
+ description: "Move / click / hover / press / release the mouse via real CDP input events (Input.dispatchMouseEvent). Use this when you need behavior that synthetic .click() can't trigger: hover-to-reveal menus, canvas / map / image-map clicks, sites that check event.isTrusted, or precise coordinate targeting. Target with ref (from browser_read_page), CSS selector, or (x, y) in CSS viewport pixels — exactly one. action='move' is the hover (single mouseMoved fires :hover and pointerover reliably). action='dblclick' sends two press/release cycles with incrementing clickCount (a real double-click, not one cycle with clickCount=2). By default the target is hit-tested with elementFromPoint and the call fails with `target_obscured` if the topmost element isn't the target or a descendant — pass force:true to bypass when you know an overlay forwards events.",
3661
+ inputSchema: {
3662
+ type: "object",
3663
+ required: ["tabId", "action"],
3664
+ additionalProperties: false,
3665
+ properties: {
3666
+ tabId: { type: "number" },
3667
+ action: {
3668
+ type: "string",
3669
+ enum: [
3670
+ "move",
3671
+ "click",
3672
+ "dblclick",
3673
+ "down",
3674
+ "up"
3675
+ ],
3676
+ description: "What to do. move=position cursor (hover). click=press+release. dblclick=two press+release with clickCount 1 then 2. down=press only. up=release only."
3677
+ },
3678
+ ref: {
3679
+ type: "string",
3680
+ description: "Element ref from browser_read_page (preferred). Resolves to bbox center. Exactly one of ref / selector / (x+y) required."
3681
+ },
3682
+ selector: {
3683
+ type: "string",
3684
+ description: "CSS selector (fallback). Resolves to bbox center."
3685
+ },
3686
+ x: {
3687
+ type: "number",
3688
+ description: "Target x in CSS viewport pixels. Pair with y. Use when working from a screenshot or eval_js output."
3689
+ },
3690
+ y: {
3691
+ type: "number",
3692
+ description: "Target y in CSS viewport pixels. Pair with x."
3693
+ },
3694
+ button: {
3695
+ type: "string",
3696
+ enum: [
3697
+ "left",
3698
+ "right",
3699
+ "middle"
3700
+ ],
3701
+ description: "Mouse button for click / dblclick / down / up. Default 'left'. Ignored for action=move."
3702
+ },
3703
+ steps: {
3704
+ type: "number",
3705
+ description: "Humanlike trajectory. >1 interpolates the cursor approach over N mouseMoved events. Default 1 (teleport). Clamped to [1, 100]."
3706
+ },
3707
+ stepDelayMs: {
3708
+ type: "number",
3709
+ description: "Pause between interpolated mouseMoved events when steps > 1. Default 8. Clamped to [0, 50]."
3710
+ },
3711
+ force: {
3712
+ type: "boolean",
3713
+ description: "Skip the pre-click elementFromPoint hit-test (ref/selector mode only). Default false."
3714
+ }
3715
+ }
3716
+ },
3717
+ capability: "browser",
3718
+ async handler(args, signal) {
3719
+ return dispatchBrowserTool("browser_mouse", args, signal);
3720
+ }
3721
+ },
3722
+ {
3723
+ toolNameHttp: "browser_drag",
3724
+ description: "Drag from a source to a destination. Auto-detects whether to use HTML5 native DnD (for elements with draggable='true', via CDP Input.setInterceptDrags + Input.dispatchDragEvent — the only path that triggers Chromium's native dragstart pipeline) or pointer-based DnD (for react-dnd / Sortable.js / mouse-event-based drag handlers — via CDP mouse events with buttons:1 held throughout). Each of from/to can be a ref (preferred), a CSS selector, or x+y coordinates. Returns { ok: true, mode_used: 'pointer'|'html5' } so you can verify which path ran.",
3725
+ inputSchema: {
3726
+ type: "object",
3727
+ required: ["tabId"],
3728
+ additionalProperties: false,
3729
+ properties: {
3730
+ tabId: { type: "number" },
3731
+ fromRef: {
3732
+ type: "string",
3733
+ description: "Source ref from browser_read_page (preferred)."
3734
+ },
3735
+ fromSelector: {
3736
+ type: "string",
3737
+ description: "Source CSS selector (fallback)."
3738
+ },
3739
+ fromX: {
3740
+ type: "number",
3741
+ description: "Source x in CSS viewport pixels. Pair with fromY."
3742
+ },
3743
+ fromY: {
3744
+ type: "number",
3745
+ description: "Source y in CSS viewport pixels. Pair with fromX."
3746
+ },
3747
+ toRef: {
3748
+ type: "string",
3749
+ description: "Destination ref from browser_read_page (preferred)."
3750
+ },
3751
+ toSelector: {
3752
+ type: "string",
3753
+ description: "Destination CSS selector (fallback)."
3754
+ },
3755
+ toX: {
3756
+ type: "number",
3757
+ description: "Destination x in CSS viewport pixels. Pair with toY."
3758
+ },
3759
+ toY: {
3760
+ type: "number",
3761
+ description: "Destination y in CSS viewport pixels. Pair with toX."
3762
+ },
3763
+ button: {
3764
+ type: "string",
3765
+ enum: ["left", "middle"],
3766
+ description: "Mouse button held during drag. Default 'left'."
3767
+ },
3768
+ steps: {
3769
+ type: "number",
3770
+ description: "Intermediate mouseMoved events from→to with the button held. Drag-detect libraries need a trajectory to fire. Default 15. Clamped to [1, 100]."
3771
+ },
3772
+ stepDelayMs: {
3773
+ type: "number",
3774
+ description: "Pause between intermediate moves. Default 12. Clamped to [0, 50]."
3775
+ },
3776
+ mode: {
3777
+ type: "string",
3778
+ enum: [
3779
+ "auto",
3780
+ "pointer",
3781
+ "html5"
3782
+ ],
3783
+ description: "Drag mode. 'auto' (default) picks html5 if the source has draggable='true', else pointer. Override only when auto detection misses."
3784
+ },
3785
+ force: {
3786
+ type: "boolean",
3787
+ description: "Skip the pre-press elementFromPoint hit-test on the source. Default false."
3788
+ }
3789
+ }
3790
+ },
3791
+ capability: "browser",
3792
+ async handler(args, signal) {
3793
+ return dispatchBrowserTool("browser_drag", args, signal);
3794
+ }
3795
+ },
3796
+ {
3797
+ toolNameHttp: "browser_type",
3798
+ description: "Type a string into the currently-focused element per-keystroke via CDP Input.dispatchKeyEvent. Each character fires keydown + keypress + input — this is the tool for keystroke-driven autocomplete, chips, search-as-you-type, and any site whose handlers listen on keydown rather than just reading element.value. For plain form-value entry use browser_fill (faster, sets value directly). For chord shortcuts (Control+L, etc) use browser_keyboard. Special characters in text: \\n→Enter, \\t→Tab, \\b→Backspace (dispatched as the named key, not as a literal control char). Other control chars (< 0x20) are rejected with an actionable error. Uppercase letters come from the natural code point — event.shiftKey is false but the typed value is correct.",
3799
+ inputSchema: {
3800
+ type: "object",
3801
+ required: ["tabId", "text"],
3802
+ additionalProperties: false,
3803
+ properties: {
3804
+ tabId: { type: "number" },
3805
+ text: {
3806
+ type: "string",
3807
+ description: "The text to type. Max 4096 chars. Iterates as Unicode code points (surrogate pairs handled correctly)."
3808
+ },
3809
+ delayMs: {
3810
+ type: "number",
3811
+ description: "Pause between characters. Default 0. Clamped to [0, 50]. Set > 0 when typing into search-as-you-type inputs that debounce."
3812
+ }
3813
+ }
3814
+ },
3815
+ capability: "browser",
3816
+ async handler(args, signal) {
3817
+ return dispatchBrowserTool("browser_type", args, signal);
3818
+ }
3819
+ },
3820
+ {
3821
+ toolNameHttp: "browser_locate",
3822
+ description: "Resolve a single ref or selector to bounding box + hit-test metadata, without a full browser_read_page snapshot. Cheap — one in-page script call. Returns bbox (CSS viewport px), center, inView (bbox intersects viewport), visible (display/visibility/opacity > 0 and bbox > 0), computed pointer-events, viewport metadata, and topmostAtCenter (is the element at the bbox center actually this target, or is it occluded by an overlay?). Use this before browser_mouse / browser_drag to detect overlay-occluded targets, or to check whether something scrolled out of view.",
3823
+ inputSchema: {
3824
+ type: "object",
3825
+ required: ["tabId"],
3826
+ additionalProperties: false,
3827
+ properties: {
3828
+ tabId: { type: "number" },
3829
+ ref: {
3830
+ type: "string",
3831
+ description: "Element ref from browser_read_page (preferred). Exactly one of ref / selector required."
3832
+ },
3833
+ selector: {
3834
+ type: "string",
3835
+ description: "CSS selector (fallback)."
3836
+ }
3837
+ }
3838
+ },
3839
+ capability: "browser",
3840
+ async handler(args, signal) {
3841
+ return dispatchBrowserTool("browser_locate", args, signal);
3842
+ }
3616
3843
  }
3617
3844
  ]);
3618
3845
 
@@ -10578,6 +10805,10 @@ const NON_PERSONA_MCP_TOOLS = Object.freeze([
10578
10805
  "xhigh"
10579
10806
  ],
10580
10807
  description: "Optional reasoning depth (default high). Silently clamped to the model's allowed range; \"off\" drops the parameter entirely."
10808
+ },
10809
+ workspace: {
10810
+ type: "string",
10811
+ description: "Optional absolute path to the workspace the worker operates in. Defaults to the proxy's launch cwd. Use this when the parent agent has multiple workspaces open and the worker must operate in a specific one. Must be absolute (relative paths rejected)."
10581
10812
  }
10582
10813
  }
10583
10814
  },
@@ -10621,6 +10852,10 @@ const NON_PERSONA_MCP_TOOLS = Object.freeze([
10621
10852
  "xhigh"
10622
10853
  ],
10623
10854
  description: "Optional reasoning depth (default high). Silently clamped to the model's allowed range; \"off\" drops the parameter entirely."
10855
+ },
10856
+ workspace: {
10857
+ type: "string",
10858
+ description: "Optional absolute path to the workspace the worker operates in. Defaults to the proxy's launch cwd. Use this when the parent agent has multiple workspaces open and the worker must operate in a specific one. Must be absolute (relative paths rejected). For worktree:true, must be inside a git repo."
10624
10859
  }
10625
10860
  }
10626
10861
  },
@@ -10685,11 +10920,13 @@ const NON_PERSONA_MCP_TOOLS = Object.freeze([
10685
10920
  /**
10686
10921
  * Shared closure body for the two worker MCP tools. Validates the
10687
10922
  * minimal arg shape (prompt required + optional knobs typed), then
10688
- * forwards to `runWorkerAgent` with `workspace = process.cwd()`. The
10689
- * engine performs every deeper validation (model existence, thinking
10690
- * clamp, worktree provisioning, semaphore acquisition) and never
10691
- * throws its `{text, isError?}` envelope is forwarded verbatim into
10692
- * the MCP `tool result` shape.
10923
+ * forwards to `runWorkerAgent`. `workspace` defaults to the proxy's
10924
+ * launch cwd; callers can override via the optional `workspace` arg
10925
+ * (absolute paths only enforced here). The engine performs every
10926
+ * deeper validation (model existence, thinking clamp, worktree
10927
+ * provisioning, semaphore acquisition, workspace realpath +
10928
+ * accessibility) and never throws — its `{text, isError?}` envelope
10929
+ * is forwarded verbatim into the MCP `tool result` shape.
10693
10930
  *
10694
10931
  * Arg-validation policy mirrors `web_search`'s pattern: shape errors
10695
10932
  * surface as `isError: true` tool-result envelopes (NOT JSON-RPC -32602
@@ -10746,10 +10983,28 @@ async function runWorkerToolCall(call) {
10746
10983
  };
10747
10984
  worktree = args.worktree;
10748
10985
  }
10986
+ let workspace = process.cwd();
10987
+ if (args.workspace !== void 0) {
10988
+ if (typeof args.workspace !== "string" || args.workspace.length === 0) return {
10989
+ content: [{
10990
+ type: "text",
10991
+ text: `worker_${mode}: arguments.workspace must be a non-empty string when provided`
10992
+ }],
10993
+ isError: true
10994
+ };
10995
+ if (!path.isAbsolute(args.workspace)) return {
10996
+ content: [{
10997
+ type: "text",
10998
+ text: `worker_${mode}: arguments.workspace must be an absolute path (got "${args.workspace}")`
10999
+ }],
11000
+ isError: true
11001
+ };
11002
+ workspace = args.workspace;
11003
+ }
10749
11004
  const result = await runWorkerAgent({
10750
11005
  mode,
10751
11006
  prompt,
10752
- workspace: process.cwd(),
11007
+ workspace,
10753
11008
  model,
10754
11009
  thinking,
10755
11010
  worktree,
@@ -11459,7 +11714,7 @@ function initProxyFromEnv() {
11459
11714
  //#endregion
11460
11715
  //#region package.json
11461
11716
  var name = "github-router";
11462
- var version = "0.3.42";
11717
+ var version = "0.3.43";
11463
11718
 
11464
11719
  //#endregion
11465
11720
  //#region src/lib/approval.ts