agex 0.2.7 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+ #!/bin/bash
2
+ # Smooth click: animates visual cursor to element, clicks, then fades cursor out
3
+ # 1. If cursor not visible, starts from left-center of viewport
4
+ # 2. Smoothly moves visual cursor to element's center
5
+ # 3. Performs the actual click via agent-browser
6
+ # 4. Fades the cursor out
7
+ # Usage: ab-click <selector|@ref>
8
+ SELECTOR="$1"
9
+ if [ -z "$SELECTOR" ]; then
10
+ echo "Usage: ab-click <selector|@ref>"
11
+ exit 1
12
+ fi
13
+
14
+ # Scroll element into center of its container before measuring position
15
+ if [[ "$SELECTOR" == @* ]]; then
16
+ agent-browser scrollintoview "$SELECTOR" {{SESSION_ARG}} >/dev/null 2>&1
17
+ else
18
+ ESCAPED_SELECTOR=$(printf '%s' "$SELECTOR" | sed "s/'/\\\\'/g")
19
+ agent-browser eval "window.__agexEffects.proof.scrollElementToCenter('${ESCAPED_SELECTOR}')" {{SESSION_ARG}} >/dev/null 2>&1
20
+ fi
21
+ agent-browser wait 300 {{SESSION_ARG}} >/dev/null 2>&1
22
+
23
+ # Get element bounding box without interacting with it
24
+ if [[ "$SELECTOR" == @* ]]; then
25
+ # For @ref: use agent-browser get styles --json which returns bounding box
26
+ STYLES=$(agent-browser get styles "$SELECTOR" --json {{SESSION_ARG}} 2>&1) || true
27
+ EL_X=$(echo "$STYLES" | grep -o '"x":[0-9.-]*' | head -1 | cut -d: -f2)
28
+ EL_Y=$(echo "$STYLES" | grep -o '"y":[0-9.-]*' | head -1 | cut -d: -f2)
29
+ EL_WIDTH=$(echo "$STYLES" | grep -o '"width":[0-9.-]*' | head -1 | cut -d: -f2)
30
+ EL_HEIGHT=$(echo "$STYLES" | grep -o '"height":[0-9.-]*' | head -1 | cut -d: -f2)
31
+
32
+ if [ -z "$EL_X" ] || [ -z "$EL_Y" ] || [ -z "$EL_WIDTH" ] || [ -z "$EL_HEIGHT" ]; then
33
+ # Last resort: just click without animation
34
+ agent-browser click "$SELECTOR" {{SESSION_ARG}} >/dev/null 2>&1
35
+ echo "clicked $SELECTOR (no animation - could not get position)"
36
+ exit 0
37
+ fi
38
+ else
39
+ BOX=$(agent-browser eval "JSON.stringify(window.__agexEffects.proof.getBoundingBox('${ESCAPED_SELECTOR}'))" {{SESSION_ARG}} 2>&1) || true
40
+ if [ -z "$BOX" ] || [[ "$BOX" == "null" ]] || [[ "$BOX" == *"error"* ]]; then
41
+ echo "element not found: $SELECTOR"
42
+ exit 1
43
+ fi
44
+ EL_X=$(echo "$BOX" | grep -o '"x":[0-9.-]*' | cut -d: -f2)
45
+ EL_Y=$(echo "$BOX" | grep -o '"y":[0-9.-]*' | cut -d: -f2)
46
+ EL_WIDTH=$(echo "$BOX" | grep -o '"width":[0-9.-]*' | cut -d: -f2)
47
+ EL_HEIGHT=$(echo "$BOX" | grep -o '"height":[0-9.-]*' | cut -d: -f2)
48
+ fi
49
+
50
+ # Click target: element center
51
+ CLICK_X=$(echo "$EL_X + $EL_WIDTH / 2" | bc -l)
52
+ CLICK_Y=$(echo "$EL_Y + $EL_HEIGHT / 2" | bc -l)
53
+
54
+ # Cursor target: center of element
55
+ TARGET_X=$CLICK_X
56
+ TARGET_Y=$CLICK_Y
57
+
58
+ # Start animation (fire-and-forget via requestAnimationFrame) and get duration synchronously
59
+ ANIM_DURATION=$(agent-browser eval "'' + window.__agexEffects.cursor.animateToPosition(${TARGET_X}, ${TARGET_Y})" {{SESSION_ARG}} 2>/dev/null)
60
+
61
+ # Parse duration
62
+ ANIM_DURATION=$(echo "$ANIM_DURATION" | tr -d '[:space:]"')
63
+ if [ -z "$ANIM_DURATION" ] || [ "$ANIM_DURATION" = "0" ]; then
64
+ ANIM_DURATION=2000
65
+ fi
66
+
67
+ # Wait for animation to complete + pause so viewer can see the target
68
+ WAIT_MS=$((ANIM_DURATION + 300))
69
+ agent-browser wait "$WAIT_MS" {{SESSION_ARG}} >/dev/null 2>&1
70
+ agent-browser wait 1000 {{SESSION_ARG}} >/dev/null 2>&1
71
+
72
+ # Show click ripple and perform the actual click
73
+ agent-browser eval "window.__agexEffects.cursor.showClick(${CLICK_X}, ${CLICK_Y})" {{SESSION_ARG}} >/dev/null 2>&1
74
+ agent-browser click "$SELECTOR" {{SESSION_ARG}} >/dev/null 2>&1
75
+
76
+ # Sync _lastPos to the actual native click position (element center)
77
+ # so the next animateToPosition starts from where the native mouse really is
78
+ agent-browser eval "window.__agexEffects.cursor._lastPos = { x: ${CLICK_X}, y: ${CLICK_Y} }" {{SESSION_ARG}} >/dev/null 2>&1
79
+
80
+ # Fade cursor out
81
+ agent-browser eval "window.__agexEffects.cursor.fadeOut()" {{SESSION_ARG}} >/dev/null 2>&1
82
+
83
+ agent-browser wait 450 {{SESSION_ARG}} >/dev/null 2>&1
84
+
85
+ echo "clicked $SELECTOR"
@@ -13,11 +13,11 @@ deduplicate_video() {
13
13
 
14
14
  echo "Removing duplicate frames (output fps: $fps)..."
15
15
  # Suppress ffmpeg/ffprobe progress output (extremely verbose, confuses agent)
16
- if ffmpeg -y -i "$input" -vf "mpdecimate,setpts=N/FRAME_RATE/TB" -r "$fps" -c:v libx264 "$output" 2>/dev/null; then
16
+ if ffmpeg -y -i "$input" -vf "mpdecimate=hi=200:lo=100:frac=0.1:max=8,setpts=N/FRAME_RATE/TB" -r "$fps" -c:v libx264 -crf 18 -preset slow -pix_fmt yuv420p -minrate 500k -maxrate 5M -bufsize 2M "$output" 2>/dev/null; then
17
17
  local orig_frames=$(ffprobe -v error -count_frames -select_streams v:0 -show_entries stream=nb_read_frames -of default=noprint_wrappers=1:nokey=1 "$input" 2>/dev/null)
18
18
  local new_frames=$(ffprobe -v error -count_frames -select_streams v:0 -show_entries stream=nb_read_frames -of default=noprint_wrappers=1:nokey=1 "$output" 2>/dev/null)
19
19
  echo "Deduplicated: $orig_frames -> $new_frames frames"
20
- rm -f "$input"
20
+ # Keep raw video for debugging (don't delete $input)
21
21
  return 0
22
22
  else
23
23
  echo "Warning: deduplication failed, keeping original"
@@ -97,7 +97,7 @@ else
97
97
  fi
98
98
 
99
99
  # Run ffmpeg with crossfade
100
- FFMPEG_CMD="ffmpeg $INPUTS -filter_complex \"$FILTER\" $OUTPUT_MAP -y \"{{RAW_VIDEO_PATH}}\""
100
+ FFMPEG_CMD="ffmpeg $INPUTS -filter_complex \"$FILTER\" $OUTPUT_MAP -c:v libx264 -crf 18 -preset slow -pix_fmt yuv420p -minrate 500k -maxrate 5M -bufsize 2M -y \"{{RAW_VIDEO_PATH}}\""
101
101
 
102
102
  # Suppress ffmpeg progress output (extremely verbose, confuses agent)
103
103
  if eval "$FFMPEG_CMD" 2>/dev/null; then
@@ -8,8 +8,8 @@ SEGMENT_PATH="{{SEGMENTS_DIR}}/$SEGMENT_NAME"
8
8
 
9
9
  agent-browser record start "$SEGMENT_PATH" {{SESSION_ARG}}
10
10
 
11
- # RE-INJECT init script after recording starts (recording causes page context change)
12
- # stdout suppressed to avoid echoing JS source back to agent
11
+ # Recording creates a fresh browser context restore viewport + zoom + effects
12
+ agent-browser set viewport {{VIEWPORT_WIDTH}} {{VIEWPORT_HEIGHT}} {{SESSION_ARG}}
13
13
  agent-browser eval --stdin {{SESSION_ARG}} < "{{INIT_SCRIPT_PATH}}" >/dev/null
14
14
 
15
15
  # Wait to capture initial frame
@@ -19,8 +19,8 @@ SEGMENT_PATH="{{SEGMENTS_DIR}}/$SEGMENT_NAME"
19
19
  agent-browser record start "$SEGMENT_PATH" {{SESSION_ARG}}
20
20
  echo "Recording segment $SEGMENT_NAME: $TITLE"
21
21
 
22
- # RE-INJECT init script after recording starts (recording causes page context change)
23
- # stdout suppressed to avoid echoing JS source back to agent
22
+ # Recording creates a fresh browser context restore viewport + zoom + effects
23
+ agent-browser set viewport {{VIEWPORT_WIDTH}} {{VIEWPORT_HEIGHT}} {{SESSION_ARG}}
24
24
  agent-browser eval --stdin {{SESSION_ARG}} < "{{INIT_SCRIPT_PATH}}" >/dev/null
25
25
 
26
26
  # Inject title overlay
@@ -0,0 +1,47 @@
1
+ #!/bin/bash
2
+ # Smooth type: moves cursor to input, clicks to focus, then types text in-browser
3
+ # Uses ab-click for the animated cursor movement + click
4
+ # Typing happens entirely in JS (one eval call) instead of per-character shell commands
5
+ # Usage: ab-type <selector|@ref> "text"
6
+ SELECTOR="$1"
7
+ TEXT="$2"
8
+ if [ -z "$SELECTOR" ] || [ -z "$TEXT" ]; then
9
+ echo "Usage: ab-type <selector|@ref> \"text\""
10
+ exit 1
11
+ fi
12
+
13
+ # Move cursor to input and click to focus (animated)
14
+ ab-click "$SELECTOR"
15
+
16
+ ESCAPED_SEL=$(printf '%s' "$SELECTOR" | sed "s/'/\\\\'/g")
17
+ ESCAPED_TEXT=$(printf '%s' "$TEXT" | sed "s/'/\\\\'/g")
18
+
19
+ # Show glow focus effect, lock cursor, type text — all in one eval
20
+ # typeText returns total duration in ms; we wait for it in shell
21
+ TYPE_DURATION=$(agent-browser eval "(function(){
22
+ var fx = window.__agexEffects;
23
+ if (!fx) return '0';
24
+ fx.cursor.showInputFocus('${ESCAPED_SEL}');
25
+ if (window.__agexCursor) window.__agexCursor.setAnimationLock(true);
26
+ var dur = fx.cursor.typeText('${ESCAPED_SEL}', '${ESCAPED_TEXT}', 80);
27
+ return '' + dur;
28
+ })()" {{SESSION_ARG}} 2>/dev/null)
29
+
30
+ TYPE_DURATION=$(echo "$TYPE_DURATION" | tr -d '[:space:]"')
31
+ if [ -z "$TYPE_DURATION" ] || [ "$TYPE_DURATION" = "0" ]; then
32
+ TYPE_DURATION=1000
33
+ fi
34
+
35
+ # Wait for typing animation to finish + brief pause for viewer to read
36
+ WAIT_MS=$((TYPE_DURATION + 400))
37
+ agent-browser wait "$WAIT_MS" {{SESSION_ARG}} >/dev/null 2>&1
38
+
39
+ # Clear glow focus effect and unlock cursor
40
+ agent-browser eval "(function(){
41
+ var fx = window.__agexEffects;
42
+ if (!fx) return;
43
+ fx.cursor.clearInputFocus();
44
+ if (window.__agexCursor) window.__agexCursor.setAnimationLock(false);
45
+ })()" {{SESSION_ARG}} >/dev/null 2>&1
46
+
47
+ echo "typed \"$TEXT\" into $SELECTOR"
@@ -1,5 +1,6 @@
1
1
  #!/bin/bash
2
- # Auto-inject --session for all agent-browser commands
2
+ # Auto-inject --session and ensure daemon discovery works
3
+ export AGENT_BROWSER_HOME="{{AGENT_BROWSER_HOME}}"
3
4
  for arg in "$@"; do
4
5
  [ "$arg" = "--session" ] && exec "{{REAL_AGENT_BROWSER}}" "$@"
5
6
  done
@@ -13,14 +13,22 @@ fi
13
13
  # Escape label for JS (handle quotes and backslashes)
14
14
  ESCAPED_LABEL=$(printf '%s' "$LABEL" | sed "s/\\\\/\\\\\\\\/g; s/'/\\\\'/g")
15
15
 
16
+ {{FX_INJECT_HELPER}}
17
+
16
18
  if [[ "$SELECTOR" == @* ]]; then
17
- # @ref format - use agent-browser get box to get element position
18
- BOX=$(agent-browser get box "$SELECTOR" {{SESSION_ARG}} 2>&1) || true
19
- if [ -z "$BOX" ] || [[ "$BOX" == *"error"* ]]; then
19
+ # @ref format - use get styles --json (get box does NOT support @ref)
20
+ STYLES=$(agent-browser get styles "$SELECTOR" --json {{SESSION_ARG}} 2>&1) || true
21
+ EL_X=$(echo "$STYLES" | grep -o '"x":[0-9.-]*' | head -1 | cut -d: -f2)
22
+ EL_Y=$(echo "$STYLES" | grep -o '"y":[0-9.-]*' | head -1 | cut -d: -f2)
23
+ EL_WIDTH=$(echo "$STYLES" | grep -o '"width":[0-9.-]*' | head -1 | cut -d: -f2)
24
+ EL_HEIGHT=$(echo "$STYLES" | grep -o '"height":[0-9.-]*' | head -1 | cut -d: -f2)
25
+
26
+ if [ -z "$EL_X" ] || [ -z "$EL_Y" ] || [ -z "$EL_WIDTH" ] || [ -z "$EL_HEIGHT" ]; then
20
27
  echo "Element not found: $SELECTOR"
21
28
  exit 1
22
29
  fi
23
30
 
31
+ BOX="{\"x\":$EL_X,\"y\":$EL_Y,\"width\":$EL_WIDTH,\"height\":$EL_HEIGHT}"
24
32
  agent-browser eval {{SESSION_ARG}} "window.__agexEffects.proof.highlightBox($BOX,'${ESCAPED_LABEL}','$COLOR')" 2>&1 || true
25
33
  else
26
34
  # CSS selector - use effects.js
@@ -0,0 +1,14 @@
1
+ #!/bin/bash
2
+ TEXT="$1"
3
+ DURATION="${2:-0}"
4
+ POSITION="${3:-bottom}"
5
+ FG="${4:-}"
6
+ BG="${5:-}"
7
+ if [ -z "$TEXT" ]; then
8
+ echo "Usage: fx-subtitle <text> [duration_ms] [position] [fg_color] [bg_color]"
9
+ exit 1
10
+ fi
11
+ {{FX_INJECT_HELPER}}
12
+ agent-browser eval {{SESSION_ARG}} "window.__agexEffects.narration.clear()"
13
+ ESCAPED_TEXT=$(printf '%s' "$TEXT" | sed "s/'/\\\\'/g")
14
+ agent-browser eval {{SESSION_ARG}} "window.__agexEffects.narration.showSubtitle('${ESCAPED_TEXT}',${DURATION},'${POSITION}','${FG}','${BG}')"
@@ -49,8 +49,10 @@ var ValidationError = class extends AgexError {
49
49
  };
50
50
  var SkillInstallError = class extends AgexError {
51
51
  skill;
52
- constructor(skill, cause) {
53
- super(`Failed to install skill: ${skill}`, "SKILL_INSTALL_ERROR", cause);
52
+ constructor(skill, detail, cause) {
53
+ const msg = detail ? `Failed to install skill: ${skill}
54
+ ${detail}` : `Failed to install skill: ${skill}`;
55
+ super(msg, "SKILL_INSTALL_ERROR", cause);
54
56
  this.name = "SkillInstallError";
55
57
  this.skill = skill;
56
58
  }
@@ -140,7 +142,7 @@ var DEFAULTS = {
140
142
  // Browser / video
141
143
  VIEWPORT_WIDTH: 1920,
142
144
  VIEWPORT_HEIGHT: 1080,
143
- DEDUP_FPS: 5,
145
+ DEDUP_FPS: 24,
144
146
  XFADE_DURATION_SEC: 0.3,
145
147
  // Git truncation limits
146
148
  GIT_DIFF_MAX_LINES: 100,
@@ -4713,6 +4715,14 @@ function extractToolResult(toolCall) {
4713
4715
  stderr: error.stderr ?? error.message
4714
4716
  };
4715
4717
  }
4718
+ const failure = result.failure;
4719
+ if (failure) {
4720
+ return {
4721
+ exitCode: failure.exitCode ?? 1,
4722
+ stdout: failure.stdout,
4723
+ stderr: failure.stderr ?? failure.message
4724
+ };
4725
+ }
4716
4726
  }
4717
4727
  }
4718
4728
  return null;
@@ -5457,11 +5467,14 @@ function getScriptEntries(videoEnabled) {
5457
5467
  { outputName: "fx-spotlight", scriptFile: "fx-spotlight.sh" },
5458
5468
  { outputName: "fx-circle", scriptFile: "fx-circle.sh" },
5459
5469
  { outputName: "fx-arrow", scriptFile: "fx-arrow.sh" },
5470
+ { outputName: "fx-subtitle", scriptFile: "fx-subtitle.sh" },
5460
5471
  { outputName: "fx-clear", scriptFile: "fx-clear.sh" },
5461
5472
  { outputName: "fx-wait", scriptFile: "fx-wait.sh" },
5462
5473
  { outputName: "ab-proof", scriptFile: "ab-proof.sh" },
5463
5474
  { outputName: "ab-not-found", scriptFile: "ab-not-found.sh" },
5464
5475
  { outputName: "ab-move", scriptFile: "ab-move.sh" },
5476
+ { outputName: "ab-click", scriptFile: "ab-click.sh" },
5477
+ { outputName: "ab-type", scriptFile: "ab-type.sh" },
5465
5478
  { outputName: "agent-browser", scriptFile: "agent-browser-wrapper.sh" }
5466
5479
  ];
5467
5480
  }
@@ -5474,19 +5487,40 @@ function loadAndRenderScript(scriptFile, vars) {
5474
5487
  var log8 = createLogger("browse");
5475
5488
  function resolveAgentBrowser() {
5476
5489
  try {
5477
- const resolved = execSync2("which agent-browser", { encoding: "utf-8" }).trim();
5478
- log8.debug(`resolved agent-browser: ${resolved}`);
5479
- return resolved;
5490
+ const binPath = execSync2("which agent-browser", { encoding: "utf-8" }).trim();
5491
+ log8.debug(`resolved agent-browser: ${binPath}`);
5492
+ let home;
5493
+ const binDir = path4.dirname(binPath);
5494
+ const npmGlobalCandidate = path4.join(binDir, "..", "lib", "node_modules", "agent-browser");
5495
+ if (fs3.existsSync(path4.join(npmGlobalCandidate, "package.json"))) {
5496
+ home = path4.resolve(npmGlobalCandidate);
5497
+ }
5498
+ if (!home) {
5499
+ try {
5500
+ const realBin = fs3.realpathSync(binPath);
5501
+ let dir = path4.dirname(realBin);
5502
+ for (let i = 0; i < 5; i++) {
5503
+ if (fs3.existsSync(path4.join(dir, "package.json"))) {
5504
+ home = dir;
5505
+ break;
5506
+ }
5507
+ dir = path4.dirname(dir);
5508
+ }
5509
+ } catch {
5510
+ }
5511
+ }
5512
+ log8.debug(`agent-browser home: ${home ?? "not found"}`);
5513
+ return { bin: binPath, home };
5480
5514
  } catch {
5481
5515
  log8.warn("agent-browser not found in PATH, using bare command name");
5482
- return "agent-browser";
5516
+ return { bin: "agent-browser" };
5483
5517
  }
5484
5518
  }
5485
5519
  function buildFxInjectHelper(sessionArg, initScriptPath) {
5486
5520
  return `
5487
- # Ensure effects are loaded (check may fail if page not ready \u2014 that's ok)
5488
- if ! agent-browser eval ${sessionArg} "!!window.__agexEffects" 2>&1 | grep -q "true"; then
5489
- agent-browser eval --stdin ${sessionArg} < "${initScriptPath}" >/dev/null
5521
+ # Ensure effects are loaded (re-inject if not present)
5522
+ if ! agent-browser eval ${sessionArg} "!!window.__agexEffects" 2>/dev/null | grep -q "true"; then
5523
+ agent-browser eval --stdin ${sessionArg} < "${initScriptPath}" >/dev/null || { echo "Error: failed to inject effects (is browser session running?)" >&2; exit 1; }
5490
5524
  fi`;
5491
5525
  }
5492
5526
  function buildTemplateVars(config, tempDir, sessionId, initScriptPath) {
@@ -5498,7 +5532,7 @@ function buildTemplateVars(config, tempDir, sessionId, initScriptPath) {
5498
5532
  const rawVideoPath = path4.join(outputDir, "recording-raw.webm");
5499
5533
  const finalVideoPath = path4.join(outputDir, "recording.mp4");
5500
5534
  const sessionArg = `--session ${sessionId}`;
5501
- const realAgentBrowser = resolveAgentBrowser();
5535
+ const agentBrowser = resolveAgentBrowser();
5502
5536
  return {
5503
5537
  SESSION_ID: sessionId,
5504
5538
  SESSION_ARG: sessionArg,
@@ -5512,7 +5546,8 @@ function buildTemplateVars(config, tempDir, sessionId, initScriptPath) {
5512
5546
  RAW_VIDEO_PATH: rawVideoPath,
5513
5547
  FINAL_VIDEO_PATH: finalVideoPath,
5514
5548
  DEDUP_FPS: String(DEFAULTS.DEDUP_FPS),
5515
- REAL_AGENT_BROWSER: realAgentBrowser,
5549
+ REAL_AGENT_BROWSER: agentBrowser.bin,
5550
+ AGENT_BROWSER_HOME: agentBrowser.home ?? "",
5516
5551
  FX_INJECT_HELPER: buildFxInjectHelper(sessionArg, initScriptPath),
5517
5552
  VIDEO_MKDIR: videoEnabled ? `mkdir -p "${segmentsDir}"` : "# video recording disabled",
5518
5553
  VIDEO_INIT_COUNTER: videoEnabled ? `echo "0" > "${segmentCounterPath}"` : "",
@@ -5542,13 +5577,17 @@ function createWorkspaceFiles(config) {
5542
5577
  const initScriptPath = writeInitScript(tempDir, config);
5543
5578
  const vars = buildTemplateVars(config, tempDir, sessionId, initScriptPath);
5544
5579
  writeScripts(tempDir, vars, videoEnabled);
5580
+ const cursorDir = path4.join(tempDir, ".cursor");
5581
+ fs3.mkdirSync(cursorDir, { recursive: true });
5582
+ fs3.writeFileSync(path4.join(cursorDir, "mcp.json"), JSON.stringify({ mcpServers: {} }));
5545
5583
  const wrapperPath = path4.join(tempDir, "ab-fx");
5546
5584
  return {
5547
5585
  workspacePath: tempDir,
5548
5586
  initScriptPath,
5549
5587
  injectEffectsCommand: wrapperPath,
5550
5588
  env: {
5551
- AGENT_BROWSER_SESSION: sessionId
5589
+ AGENT_BROWSER_SESSION: sessionId,
5590
+ ...vars.AGENT_BROWSER_HOME ? { AGENT_BROWSER_HOME: vars.AGENT_BROWSER_HOME } : {}
5552
5591
  },
5553
5592
  cleanup: () => {
5554
5593
  try {
@@ -5659,7 +5698,7 @@ EXAMPLE - FAIL case: Verifying "dropdown has Options A, B, and C" but B is missi
5659
5698
  # === PHASE 1: EXPLORE ===
5660
5699
  ab-open "https://example.com"
5661
5700
  agent-browser snapshot -i
5662
- agent-browser click @dropdownButton
5701
+ ab-click @dropdownButton
5663
5702
  agent-browser wait 500
5664
5703
  agent-browser snapshot -i
5665
5704
  # See: link "Option A" [ref=e5], link "Option C" [ref=e7]
@@ -5670,7 +5709,7 @@ agent-browser eval 'document.body.textContent.includes("Option B") ? "found" : "
5670
5709
 
5671
5710
  # === PHASE 2: RECORD (document the FAILURE - RED highlight is PRIMARY!) ===
5672
5711
  ab-record-start
5673
- agent-browser click @dropdownButton
5712
+ ab-click @dropdownButton
5674
5713
  agent-browser wait 500
5675
5714
  agent-browser snapshot -i
5676
5715
  # CRITICAL: For FAIL, highlight the MISSING element FIRST (this is your evidence!)
@@ -5689,6 +5728,8 @@ COMMANDS:
5689
5728
  ab-record-start # Start recording
5690
5729
  ab-close [1 2 ...] # Stop recording and merge segments
5691
5730
  ab-screenshot <name>.png # Capture screenshot
5731
+ ab-click <selector|@ref> # Smooth cursor animation + click
5732
+ ab-type <selector|@ref> "text" # Click input + type text slowly (triggers React events)
5692
5733
 
5693
5734
  EFFECTS (use AFTER ab-record-start):
5694
5735
  fx-highlight <selector> "label" [color] # Highlight element with label
@@ -5747,6 +5788,11 @@ For elements WITH @ref:
5747
5788
  agent-browser get attr @e5 aria-label # Get aria-label
5748
5789
  agent-browser get styles @e5 --json # Get tag name from "tag" field
5749
5790
 
5791
+ TYPING INTO INPUTS:
5792
+ ALWAYS use ab-type or agent-browser fill to type into inputs.
5793
+ NEVER set .value via eval \u2014 it does NOT trigger React/framework events and buttons stay disabled.
5794
+ ab-type @e1 "text" # Best: animated cursor + slow typing (triggers all events)
5795
+
5750
5796
  BROWSER:
5751
5797
  agent-browser snapshot # Get ALL elements with refs
5752
5798
  agent-browser snapshot -i # Interactive only (buttons, links, inputs)
@@ -5755,8 +5801,8 @@ BROWSER:
5755
5801
  agent-browser snapshot -s "#id" # Scope to CSS selector
5756
5802
  agent-browser scrollintoview @e1 # Smooth scroll to element
5757
5803
  agent-browser wait <ms> # Wait milliseconds
5758
- agent-browser click @e1 # Click element
5759
- agent-browser eval '<js>' # Run JavaScript
5804
+ ab-click @e1 # Click element (smooth cursor animation)
5805
+ agent-browser eval '<js>' # Run JavaScript (NOT for typing into inputs!)
5760
5806
 
5761
5807
  SNAPSHOT TIPS:
5762
5808
  - Use snapshot (no flags) when looking for VISUAL elements (images, logos, text)
@@ -5809,7 +5855,8 @@ function parseVerdict(output) {
5809
5855
  if (matches.length === 0) {
5810
5856
  return { verdict: "fail", reason: "No verdict found in output" };
5811
5857
  }
5812
- const lastMatch = matches[matches.length - 1];
5858
+ const real = matches.filter((m) => !(m[2] ?? "").includes("PROVE_VERDICT"));
5859
+ const lastMatch = real.length > 0 ? real[real.length - 1] : matches[matches.length - 1];
5813
5860
  return {
5814
5861
  verdict: lastMatch[1].toLowerCase(),
5815
5862
  reason: lastMatch[2]?.trim() || ""
@@ -5832,14 +5879,14 @@ function installSkills(workspacePath, agent, skills) {
5832
5879
  });
5833
5880
  log9.debug(`installed skill: ${skill}`);
5834
5881
  } catch (err) {
5835
- const error = new SkillInstallError(skill, err instanceof Error ? err : void 0);
5882
+ const detail = (err?.stderr ?? err?.stdout)?.toString().trim() || err?.message || "";
5883
+ const error = new SkillInstallError(skill, detail, err instanceof Error ? err : void 0);
5836
5884
  log9.warn(error.message);
5837
5885
  }
5838
5886
  }
5839
5887
  }
5840
5888
  var DEFAULT_SKILLS = ["vercel-labs/agent-browser@agent-browser"];
5841
5889
  var DEFAULT_OPTIONS = {
5842
- agent: DEFAULTS.AGENT,
5843
5890
  outputDir: "./prove-results",
5844
5891
  video: true,
5845
5892
  screenshots: true,
@@ -5858,7 +5905,8 @@ async function prove(assertion, options = {}) {
5858
5905
  throw new ValidationError("Assertion cannot be empty", "assertion");
5859
5906
  }
5860
5907
  const opts = { ...DEFAULT_OPTIONS, ...options };
5861
- const agent = opts.agent;
5908
+ const adapter = opts.agent ? getAdapter(opts.agent) : await findAvailableAgent();
5909
+ const agent = adapter.name;
5862
5910
  const outputDir = createOutputDir(opts.outputDir, agent, assertion);
5863
5911
  log9.info(`proving: "${assertion}" (agent=${agent})`);
5864
5912
  const browserConfig = {
@@ -6098,12 +6146,14 @@ function normalizeReviewOutput(output) {
6098
6146
  async function runReview(request) {
6099
6147
  const hypothesesCount = request.hypotheses ?? DEFAULTS.HYPOTHESES_COUNT;
6100
6148
  const includeWorktree = request.includeWorktree ?? true;
6101
- log11.info(`reviewing: base=${request.baseRef} agent=${request.agent} hypotheses=${hypothesesCount}`);
6149
+ const adapter = request.agent ? getAdapter(request.agent) : await findAvailableAgent();
6150
+ const agent = adapter.name;
6151
+ log11.info(`reviewing: base=${request.baseRef} agent=${agent} hypotheses=${hypothesesCount}`);
6102
6152
  const gitContext = await buildGitContext(request.baseRef, includeWorktree, request.cwd);
6103
6153
  const planPrompt = buildPlanPrompt(gitContext, hypothesesCount, request.promptHint);
6104
6154
  log11.debug("generating review plan...");
6105
6155
  const plan = await runAgent({
6106
- agent: request.agent,
6156
+ agent,
6107
6157
  model: request.model,
6108
6158
  prompt: planPrompt,
6109
6159
  cwd: request.cwd,
@@ -6112,7 +6162,7 @@ async function runReview(request) {
6112
6162
  });
6113
6163
  log11.debug("executing review...");
6114
6164
  const execution = await runAgent({
6115
- agent: request.agent,
6165
+ agent,
6116
6166
  model: request.model,
6117
6167
  prompt: plan.json.stdout,
6118
6168
  cwd: request.cwd,
@@ -6121,7 +6171,7 @@ async function runReview(request) {
6121
6171
  });
6122
6172
  const outputJson = {
6123
6173
  baseRef: request.baseRef,
6124
- agent: request.agent,
6174
+ agent,
6125
6175
  model: request.model,
6126
6176
  plan: plan.json,
6127
6177
  execution: execution.json
@@ -6170,7 +6220,7 @@ HELPER SCRIPTS:
6170
6220
 
6171
6221
  AGENT-BROWSER COMMANDS:
6172
6222
  - agent-browser snapshot -i # Get interactive elements with refs (@e1, @e2)
6173
- - agent-browser click @e1 # Click element
6223
+ - ab-click @e1 # Click element (smooth cursor animation)
6174
6224
  - agent-browser fill @e2 "text" # Fill input
6175
6225
  - agent-browser get text @e1 # Get element text
6176
6226
  - agent-browser eval "<js>" # Run JavaScript
@@ -6248,13 +6298,15 @@ async function provePr(request) {
6248
6298
  const baseRef = request.baseRef || detectDefaultBranch();
6249
6299
  const outputDir = createOutputDir2(request.outputDir);
6250
6300
  const { width, height } = request.viewport;
6251
- log12.info(`proving PR: base=${baseRef} agent=${request.agent} hypotheses=${request.hypothesesCount}`);
6301
+ const adapter = request.agent ? getAdapter(request.agent) : await findAvailableAgent();
6302
+ const agent = adapter.name;
6303
+ log12.info(`proving PR: base=${baseRef} agent=${agent} hypotheses=${request.hypothesesCount}`);
6252
6304
  const startTime = Date.now();
6253
6305
  const gitContext = await buildGitContext(baseRef, true);
6254
6306
  log12.debug("generating hypotheses...");
6255
6307
  const planPrompt = buildPlanPrompt(gitContext, request.hypothesesCount, request.hint);
6256
6308
  const planResult = await runAgent({
6257
- agent: request.agent,
6309
+ agent,
6258
6310
  model: request.model,
6259
6311
  prompt: planPrompt,
6260
6312
  streamOutput: true,
@@ -6276,7 +6328,7 @@ async function provePr(request) {
6276
6328
  const provePrompt = buildProvePrPrompt(gitContext, hypothesesPlan, outputDir, request.url);
6277
6329
  try {
6278
6330
  const proveResult = await runAgent({
6279
- agent: request.agent,
6331
+ agent,
6280
6332
  model: request.model,
6281
6333
  prompt: provePrompt,
6282
6334
  approveMcps: true,
@@ -6319,7 +6371,7 @@ var viewportSchema2 = external_exports.object({
6319
6371
  });
6320
6372
  var provePrRequestSchema = external_exports.object({
6321
6373
  baseRef: external_exports.string().optional(),
6322
- agent: external_exports.enum(AGENT_NAMES),
6374
+ agent: external_exports.enum(AGENT_NAMES).optional(),
6323
6375
  url: external_exports.string().url("Invalid URL").optional(),
6324
6376
  outputDir: external_exports.string().min(1, "Output directory cannot be empty"),
6325
6377
  hypothesesCount: external_exports.number().int().positive("Hypotheses count must be a positive integer"),
@@ -6331,14 +6383,25 @@ var provePrRequestSchema = external_exports.object({
6331
6383
 
6332
6384
  export {
6333
6385
  AgexError,
6386
+ ValidationError,
6387
+ SkillInstallError,
6334
6388
  isAgexError,
6389
+ createLogger,
6335
6390
  AGENT_NAMES,
6336
6391
  OUTPUT_MODES,
6337
6392
  APPROVAL_POLICIES,
6393
+ DEFAULTS,
6394
+ external_exports,
6338
6395
  viewportStringSchema,
6339
6396
  validate,
6397
+ slugify,
6398
+ timestamp,
6399
+ buildPathEnv,
6340
6400
  detectDefaultBranch,
6401
+ getAdapter,
6402
+ findAvailableAgent,
6341
6403
  runAgent,
6404
+ createWorkspaceFiles,
6342
6405
  prove,
6343
6406
  proveOptionsSchema,
6344
6407
  runReview,