agex 0.2.8 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ #!/bin/bash
2
+ # Smooth click: animates visual cursor to element, clicks, then fades cursor out
3
+ # 1. If cursor not visible, starts from left-center of viewport
4
+ # 2. Smoothly moves visual cursor to element's center
5
+ # 3. Performs the actual click via agent-browser
6
+ # 4. Fades the cursor out
7
+ # Usage: ab-click <selector|@ref>
8
+ SELECTOR="$1"
9
+ if [ -z "$SELECTOR" ]; then
10
+ echo "Usage: ab-click <selector|@ref>"
11
+ exit 1
12
+ fi
13
+
14
+ # Smooth-scroll element into center of viewport so the viewer can follow
15
+ if [[ "$SELECTOR" == @* ]]; then
16
+ # For @ref: get bounding box first, then smooth-scroll by coordinates
17
+ PRE_STYLES=$(agent-browser get styles "$SELECTOR" --json {{SESSION_ARG}} 2>&1) || true
18
+ PRE_Y=$(echo "$PRE_STYLES" | grep -o '"y":[0-9.-]*' | head -1 | cut -d: -f2)
19
+ PRE_H=$(echo "$PRE_STYLES" | grep -o '"height":[0-9.-]*' | head -1 | cut -d: -f2)
20
+ if [ -n "$PRE_Y" ] && [ -n "$PRE_H" ]; then
21
+ SCROLL_DUR=$(agent-browser eval "(function(){ var viewH=window.innerHeight; var centerY=${PRE_Y}+${PRE_H}/2; if(centerY>viewH*0.2 && centerY<viewH*0.8) return '0'; var dist=Math.abs(centerY-viewH/2); var dur=Math.max(600,Math.min(2000,Math.round(dist*1.5))); window.scrollBy({top:centerY-viewH/2,behavior:'smooth'}); return ''+dur; })()" {{SESSION_ARG}} 2>/dev/null || echo "0")
22
+ SCROLL_DUR=$(echo "$SCROLL_DUR" | tr -d '[:space:]"')
23
+ if [ -n "$SCROLL_DUR" ] && [ "$SCROLL_DUR" != "0" ]; then
24
+ SCROLL_WAIT=$((SCROLL_DUR + 200))
25
+ agent-browser wait "$SCROLL_WAIT" {{SESSION_ARG}} >/dev/null 2>&1
26
+ else
27
+ agent-browser wait 300 {{SESSION_ARG}} >/dev/null 2>&1
28
+ fi
29
+ else
30
+ # Fallback: use instant scrollintoview
31
+ agent-browser scrollintoview "$SELECTOR" {{SESSION_ARG}} >/dev/null 2>&1
32
+ agent-browser wait 300 {{SESSION_ARG}} >/dev/null 2>&1
33
+ fi
34
+ else
35
+ ESCAPED_SELECTOR=$(printf '%s' "$SELECTOR" | sed "s/'/\\\\'/g")
36
+ SCROLL_DUR=$(agent-browser eval "'' + (window.__agexEffects ? window.__agexEffects.proof.scrollElementToCenterSmooth('${ESCAPED_SELECTOR}') : 0)" {{SESSION_ARG}} 2>/dev/null || echo "0")
37
+ SCROLL_DUR=$(echo "$SCROLL_DUR" | tr -d '[:space:]"')
38
+ if [ -z "$SCROLL_DUR" ] || [ "$SCROLL_DUR" = "0" ]; then
39
+ agent-browser eval "window.__agexEffects.proof.scrollElementToCenter('${ESCAPED_SELECTOR}')" {{SESSION_ARG}} >/dev/null 2>&1
40
+ agent-browser wait 300 {{SESSION_ARG}} >/dev/null 2>&1
41
+ else
42
+ SCROLL_WAIT=$((SCROLL_DUR + 200))
43
+ agent-browser wait "$SCROLL_WAIT" {{SESSION_ARG}} >/dev/null 2>&1
44
+ fi
45
+ fi
46
+
47
+ # Get element bounding box without interacting with it
48
+ if [[ "$SELECTOR" == @* ]]; then
49
+ # For @ref: use agent-browser get styles --json which returns bounding box
50
+ STYLES=$(agent-browser get styles "$SELECTOR" --json {{SESSION_ARG}} 2>&1) || true
51
+ EL_X=$(echo "$STYLES" | grep -o '"x":[0-9.-]*' | head -1 | cut -d: -f2)
52
+ EL_Y=$(echo "$STYLES" | grep -o '"y":[0-9.-]*' | head -1 | cut -d: -f2)
53
+ EL_WIDTH=$(echo "$STYLES" | grep -o '"width":[0-9.-]*' | head -1 | cut -d: -f2)
54
+ EL_HEIGHT=$(echo "$STYLES" | grep -o '"height":[0-9.-]*' | head -1 | cut -d: -f2)
55
+
56
+ if [ -z "$EL_X" ] || [ -z "$EL_Y" ] || [ -z "$EL_WIDTH" ] || [ -z "$EL_HEIGHT" ]; then
57
+ # Last resort: just click without animation
58
+ agent-browser click "$SELECTOR" {{SESSION_ARG}} >/dev/null 2>&1
59
+ echo "clicked $SELECTOR (no animation - could not get position)"
60
+ exit 0
61
+ fi
62
+ else
63
+ BOX=$(agent-browser eval "JSON.stringify(window.__agexEffects.proof.getBoundingBox('${ESCAPED_SELECTOR}'))" {{SESSION_ARG}} 2>&1) || true
64
+ if [ -z "$BOX" ] || [[ "$BOX" == "null" ]] || [[ "$BOX" == *"error"* ]]; then
65
+ echo "element not found: $SELECTOR"
66
+ exit 1
67
+ fi
68
+ EL_X=$(echo "$BOX" | grep -o '"x":[0-9.-]*' | cut -d: -f2)
69
+ EL_Y=$(echo "$BOX" | grep -o '"y":[0-9.-]*' | cut -d: -f2)
70
+ EL_WIDTH=$(echo "$BOX" | grep -o '"width":[0-9.-]*' | cut -d: -f2)
71
+ EL_HEIGHT=$(echo "$BOX" | grep -o '"height":[0-9.-]*' | cut -d: -f2)
72
+ fi
73
+
74
+ # Click target: element center
75
+ CLICK_X=$(echo "$EL_X + $EL_WIDTH / 2" | bc -l)
76
+ CLICK_Y=$(echo "$EL_Y + $EL_HEIGHT / 2" | bc -l)
77
+
78
+ # Cursor target: center of element
79
+ TARGET_X=$CLICK_X
80
+ TARGET_Y=$CLICK_Y
81
+
82
+ # Start animation (fire-and-forget via requestAnimationFrame) and get duration synchronously
83
+ ANIM_DURATION=$(agent-browser eval "'' + window.__agexEffects.cursor.animateToPosition(${TARGET_X}, ${TARGET_Y})" {{SESSION_ARG}} 2>/dev/null)
84
+
85
+ # Parse duration
86
+ ANIM_DURATION=$(echo "$ANIM_DURATION" | tr -d '[:space:]"')
87
+ if [ -z "$ANIM_DURATION" ] || [ "$ANIM_DURATION" = "0" ]; then
88
+ ANIM_DURATION=2000
89
+ fi
90
+
91
+ # Wait for animation to complete + pause so viewer can see the target
92
+ WAIT_MS=$((ANIM_DURATION + 300))
93
+ agent-browser wait "$WAIT_MS" {{SESSION_ARG}} >/dev/null 2>&1
94
+ agent-browser wait 1000 {{SESSION_ARG}} >/dev/null 2>&1
95
+
96
+ # Re-measure element position right before clicking (it may have moved during animation)
97
+ if [[ "$SELECTOR" == @* ]]; then
98
+ STYLES2=$(agent-browser get styles "$SELECTOR" --json {{SESSION_ARG}} 2>&1) || true
99
+ EL_X2=$(echo "$STYLES2" | grep -o '"x":[0-9.-]*' | head -1 | cut -d: -f2)
100
+ EL_Y2=$(echo "$STYLES2" | grep -o '"y":[0-9.-]*' | head -1 | cut -d: -f2)
101
+ EL_W2=$(echo "$STYLES2" | grep -o '"width":[0-9.-]*' | head -1 | cut -d: -f2)
102
+ EL_H2=$(echo "$STYLES2" | grep -o '"height":[0-9.-]*' | head -1 | cut -d: -f2)
103
+ if [ -n "$EL_X2" ] && [ -n "$EL_Y2" ] && [ -n "$EL_W2" ] && [ -n "$EL_H2" ]; then
104
+ CLICK_X=$(echo "$EL_X2 + $EL_W2 / 2" | bc -l)
105
+ CLICK_Y=$(echo "$EL_Y2 + $EL_H2 / 2" | bc -l)
106
+ fi
107
+ else
108
+ BOX2=$(agent-browser eval "JSON.stringify(window.__agexEffects.proof.getBoundingBox('${ESCAPED_SELECTOR}'))" {{SESSION_ARG}} 2>&1) || true
109
+ if [ -n "$BOX2" ] && [[ "$BOX2" != "null" ]] && [[ "$BOX2" != *"error"* ]]; then
110
+ EL_X2=$(echo "$BOX2" | grep -o '"x":[0-9.-]*' | cut -d: -f2)
111
+ EL_Y2=$(echo "$BOX2" | grep -o '"y":[0-9.-]*' | cut -d: -f2)
112
+ EL_W2=$(echo "$BOX2" | grep -o '"width":[0-9.-]*' | cut -d: -f2)
113
+ EL_H2=$(echo "$BOX2" | grep -o '"height":[0-9.-]*' | cut -d: -f2)
114
+ if [ -n "$EL_X2" ] && [ -n "$EL_Y2" ] && [ -n "$EL_W2" ] && [ -n "$EL_H2" ]; then
115
+ CLICK_X=$(echo "$EL_X2 + $EL_W2 / 2" | bc -l)
116
+ CLICK_Y=$(echo "$EL_Y2 + $EL_H2 / 2" | bc -l)
117
+ fi
118
+ fi
119
+ fi
120
+
121
+ # Snap cursor to fresh position so ripple lands on the actual element
122
+ agent-browser eval "window.__agexEffects.cursor.setPosition(${CLICK_X}, ${CLICK_Y})" {{SESSION_ARG}} >/dev/null 2>&1
123
+
124
+ # Show click ripple and perform the actual click
125
+ agent-browser eval "window.__agexEffects.cursor.showClick(${CLICK_X}, ${CLICK_Y})" {{SESSION_ARG}} >/dev/null 2>&1
126
+ agent-browser click "$SELECTOR" {{SESSION_ARG}} >/dev/null 2>&1
127
+
128
+ # Sync _lastPos to the actual click position
129
+ # so the next animateToPosition starts from where the native mouse really is
130
+ agent-browser eval "window.__agexEffects.cursor._lastPos = { x: ${CLICK_X}, y: ${CLICK_Y} }" {{SESSION_ARG}} >/dev/null 2>&1
131
+
132
+ # Fade cursor out
133
+ agent-browser eval "window.__agexEffects.cursor.fadeOut()" {{SESSION_ARG}} >/dev/null 2>&1
134
+
135
+ agent-browser wait 450 {{SESSION_ARG}} >/dev/null 2>&1
136
+
137
+ echo "clicked $SELECTOR"
@@ -13,11 +13,11 @@ deduplicate_video() {
13
13
 
14
14
  echo "Removing duplicate frames (output fps: $fps)..."
15
15
  # Suppress ffmpeg/ffprobe progress output (extremely verbose, confuses agent)
16
- if ffmpeg -y -i "$input" -vf "mpdecimate,setpts=N/FRAME_RATE/TB" -r "$fps" -c:v libx264 "$output" 2>/dev/null; then
16
+ if ffmpeg -y -i "$input" -vf "mpdecimate=hi=200:lo=100:frac=0.1:max=8,setpts=N/FRAME_RATE/TB" -r "$fps" -c:v libx264 -crf 18 -preset slow -pix_fmt yuv420p -minrate 500k -maxrate 5M -bufsize 2M "$output" 2>/dev/null; then
17
17
  local orig_frames=$(ffprobe -v error -count_frames -select_streams v:0 -show_entries stream=nb_read_frames -of default=noprint_wrappers=1:nokey=1 "$input" 2>/dev/null)
18
18
  local new_frames=$(ffprobe -v error -count_frames -select_streams v:0 -show_entries stream=nb_read_frames -of default=noprint_wrappers=1:nokey=1 "$output" 2>/dev/null)
19
19
  echo "Deduplicated: $orig_frames -> $new_frames frames"
20
- rm -f "$input"
20
+ # Keep raw video for debugging (don't delete $input)
21
21
  return 0
22
22
  else
23
23
  echo "Warning: deduplication failed, keeping original"
@@ -97,7 +97,7 @@ else
97
97
  fi
98
98
 
99
99
  # Run ffmpeg with crossfade
100
- FFMPEG_CMD="ffmpeg $INPUTS -filter_complex \"$FILTER\" $OUTPUT_MAP -y \"{{RAW_VIDEO_PATH}}\""
100
+ FFMPEG_CMD="ffmpeg $INPUTS -filter_complex \"$FILTER\" $OUTPUT_MAP -c:v libx264 -crf 18 -preset slow -pix_fmt yuv420p -minrate 500k -maxrate 5M -bufsize 2M -y \"{{RAW_VIDEO_PATH}}\""
101
101
 
102
102
  # Suppress ffmpeg progress output (extremely verbose, confuses agent)
103
103
  if eval "$FFMPEG_CMD" 2>/dev/null; then
@@ -8,8 +8,8 @@ SEGMENT_PATH="{{SEGMENTS_DIR}}/$SEGMENT_NAME"
8
8
 
9
9
  agent-browser record start "$SEGMENT_PATH" {{SESSION_ARG}}
10
10
 
11
- # RE-INJECT init script after recording starts (recording causes page context change)
12
- # stdout suppressed to avoid echoing JS source back to agent
11
+ # Recording creates a fresh browser context restore viewport + zoom + effects
12
+ agent-browser set viewport {{VIEWPORT_WIDTH}} {{VIEWPORT_HEIGHT}} {{SESSION_ARG}}
13
13
  agent-browser eval --stdin {{SESSION_ARG}} < "{{INIT_SCRIPT_PATH}}" >/dev/null
14
14
 
15
15
  # Wait to capture initial frame
@@ -19,8 +19,8 @@ SEGMENT_PATH="{{SEGMENTS_DIR}}/$SEGMENT_NAME"
19
19
  agent-browser record start "$SEGMENT_PATH" {{SESSION_ARG}}
20
20
  echo "Recording segment $SEGMENT_NAME: $TITLE"
21
21
 
22
- # RE-INJECT init script after recording starts (recording causes page context change)
23
- # stdout suppressed to avoid echoing JS source back to agent
22
+ # Recording creates a fresh browser context restore viewport + zoom + effects
23
+ agent-browser set viewport {{VIEWPORT_WIDTH}} {{VIEWPORT_HEIGHT}} {{SESSION_ARG}}
24
24
  agent-browser eval --stdin {{SESSION_ARG}} < "{{INIT_SCRIPT_PATH}}" >/dev/null
25
25
 
26
26
  # Inject title overlay
@@ -0,0 +1,15 @@
1
+ #!/bin/bash
2
+ # Enable or disable CSS animations and transitions on the page.
3
+ # Preserves agex visual effects.
4
+ # Usage: ab-toggle-animations true|false
5
+ # true = enable animations (default browser behavior)
6
+ # false = disable animations
7
+ ENABLED="${1:-true}"
8
+ {{FX_INJECT_HELPER}}
9
+ if [ "$ENABLED" = "false" ]; then
10
+ agent-browser eval "window.__agexEffects.setPageAnimations(false)" {{SESSION_ARG}} >/dev/null 2>&1
11
+ echo "page animations disabled"
12
+ else
13
+ agent-browser eval "window.__agexEffects.setPageAnimations(true)" {{SESSION_ARG}} >/dev/null 2>&1
14
+ echo "page animations enabled"
15
+ fi
@@ -0,0 +1,39 @@
1
+ #!/bin/bash
2
+ # Smooth type: moves cursor to input, clicks to focus, then types text slowly
3
+ # Uses ab-click for the animated cursor movement + click
4
+ # Usage: ab-type <selector|@ref> "text"
5
+ SELECTOR="$1"
6
+ TEXT="$2"
7
+ if [ -z "$SELECTOR" ] || [ -z "$TEXT" ]; then
8
+ echo "Usage: ab-type <selector|@ref> \"text\""
9
+ exit 1
10
+ fi
11
+
12
+ # Move cursor to input and click to focus (animated)
13
+ ab-click "$SELECTOR"
14
+
15
+ # Show glow focus effect on the input
16
+ ESCAPED_SEL=$(printf '%s' "$SELECTOR" | sed "s/'/\\\\'/g")
17
+ agent-browser eval "window.__agexEffects.cursor.showInputFocus('${ESCAPED_SEL}')" {{SESSION_ARG}} >/dev/null 2>&1
18
+
19
+ # Lock demo cursor so native mouse moves during typing don't cause jumps
20
+ agent-browser eval "if(window.__agexCursor) window.__agexCursor.setAnimationLock(true)" {{SESSION_ARG}} >/dev/null 2>&1
21
+
22
+ # Type text one character at a time with a human-like delay
23
+ LENGTH=${#TEXT}
24
+ for (( i=0; i<LENGTH; i++ )); do
25
+ CHAR="${TEXT:$i:1}"
26
+ agent-browser type "$SELECTOR" "$CHAR" {{SESSION_ARG}} >/dev/null 2>&1
27
+ agent-browser wait 70 {{SESSION_ARG}} >/dev/null 2>&1
28
+ done
29
+
30
+ # Brief pause so viewer can read the typed text
31
+ agent-browser wait 400 {{SESSION_ARG}} >/dev/null 2>&1
32
+
33
+ # Clear glow focus effect
34
+ agent-browser eval "window.__agexEffects.cursor.clearInputFocus()" {{SESSION_ARG}} >/dev/null 2>&1
35
+
36
+ # Unlock demo cursor
37
+ agent-browser eval "if(window.__agexCursor) window.__agexCursor.setAnimationLock(false)" {{SESSION_ARG}} >/dev/null 2>&1
38
+
39
+ echo "typed \"$TEXT\" into $SELECTOR"
@@ -1,5 +1,6 @@
1
1
  #!/bin/bash
2
- # Auto-inject --session for all agent-browser commands
2
+ # Auto-inject --session and ensure daemon discovery works
3
+ export AGENT_BROWSER_HOME="{{AGENT_BROWSER_HOME}}"
3
4
  for arg in "$@"; do
4
5
  [ "$arg" = "--session" ] && exec "{{REAL_AGENT_BROWSER}}" "$@"
5
6
  done
@@ -13,14 +13,22 @@ fi
13
13
  # Escape label for JS (handle quotes and backslashes)
14
14
  ESCAPED_LABEL=$(printf '%s' "$LABEL" | sed "s/\\\\/\\\\\\\\/g; s/'/\\\\'/g")
15
15
 
16
+ {{FX_INJECT_HELPER}}
17
+
16
18
  if [[ "$SELECTOR" == @* ]]; then
17
- # @ref format - use agent-browser get box to get element position
18
- BOX=$(agent-browser get box "$SELECTOR" {{SESSION_ARG}} 2>&1) || true
19
- if [ -z "$BOX" ] || [[ "$BOX" == *"error"* ]]; then
19
+ # @ref format - use get styles --json (get box does NOT support @ref)
20
+ STYLES=$(agent-browser get styles "$SELECTOR" --json {{SESSION_ARG}} 2>&1) || true
21
+ EL_X=$(echo "$STYLES" | grep -o '"x":[0-9.-]*' | head -1 | cut -d: -f2)
22
+ EL_Y=$(echo "$STYLES" | grep -o '"y":[0-9.-]*' | head -1 | cut -d: -f2)
23
+ EL_WIDTH=$(echo "$STYLES" | grep -o '"width":[0-9.-]*' | head -1 | cut -d: -f2)
24
+ EL_HEIGHT=$(echo "$STYLES" | grep -o '"height":[0-9.-]*' | head -1 | cut -d: -f2)
25
+
26
+ if [ -z "$EL_X" ] || [ -z "$EL_Y" ] || [ -z "$EL_WIDTH" ] || [ -z "$EL_HEIGHT" ]; then
20
27
  echo "Element not found: $SELECTOR"
21
28
  exit 1
22
29
  fi
23
30
 
31
+ BOX="{\"x\":$EL_X,\"y\":$EL_Y,\"width\":$EL_WIDTH,\"height\":$EL_HEIGHT}"
24
32
  agent-browser eval {{SESSION_ARG}} "window.__agexEffects.proof.highlightBox($BOX,'${ESCAPED_LABEL}','$COLOR')" 2>&1 || true
25
33
  else
26
34
  # CSS selector - use effects.js
@@ -0,0 +1,14 @@
1
+ #!/bin/bash
2
+ TEXT="$1"
3
+ DURATION="${2:-0}"
4
+ POSITION="${3:-bottom}"
5
+ FG="${4:-}"
6
+ BG="${5:-}"
7
+ if [ -z "$TEXT" ]; then
8
+ echo "Usage: fx-subtitle <text> [duration_ms] [position] [fg_color] [bg_color]"
9
+ exit 1
10
+ fi
11
+ {{FX_INJECT_HELPER}}
12
+ agent-browser eval {{SESSION_ARG}} "window.__agexEffects.narration.clear()"
13
+ ESCAPED_TEXT=$(printf '%s' "$TEXT" | sed "s/'/\\\\'/g")
14
+ agent-browser eval {{SESSION_ARG}} "window.__agexEffects.narration.showSubtitle('${ESCAPED_TEXT}',${DURATION},'${POSITION}','${FG}','${BG}')"
@@ -49,8 +49,10 @@ var ValidationError = class extends AgexError {
49
49
  };
50
50
  var SkillInstallError = class extends AgexError {
51
51
  skill;
52
- constructor(skill, cause) {
53
- super(`Failed to install skill: ${skill}`, "SKILL_INSTALL_ERROR", cause);
52
+ constructor(skill, detail, cause) {
53
+ const msg = detail ? `Failed to install skill: ${skill}
54
+ ${detail}` : `Failed to install skill: ${skill}`;
55
+ super(msg, "SKILL_INSTALL_ERROR", cause);
54
56
  this.name = "SkillInstallError";
55
57
  this.skill = skill;
56
58
  }
@@ -140,7 +142,7 @@ var DEFAULTS = {
140
142
  // Browser / video
141
143
  VIEWPORT_WIDTH: 1920,
142
144
  VIEWPORT_HEIGHT: 1080,
143
- DEDUP_FPS: 5,
145
+ DEDUP_FPS: 24,
144
146
  XFADE_DURATION_SEC: 0.3,
145
147
  // Git truncation limits
146
148
  GIT_DIFF_MAX_LINES: 100,
@@ -4713,6 +4715,14 @@ function extractToolResult(toolCall) {
4713
4715
  stderr: error.stderr ?? error.message
4714
4716
  };
4715
4717
  }
4718
+ const failure = result.failure;
4719
+ if (failure) {
4720
+ return {
4721
+ exitCode: failure.exitCode ?? 1,
4722
+ stdout: failure.stdout,
4723
+ stderr: failure.stderr ?? failure.message
4724
+ };
4725
+ }
4716
4726
  }
4717
4727
  }
4718
4728
  return null;
@@ -5457,11 +5467,15 @@ function getScriptEntries(videoEnabled) {
5457
5467
  { outputName: "fx-spotlight", scriptFile: "fx-spotlight.sh" },
5458
5468
  { outputName: "fx-circle", scriptFile: "fx-circle.sh" },
5459
5469
  { outputName: "fx-arrow", scriptFile: "fx-arrow.sh" },
5470
+ { outputName: "fx-subtitle", scriptFile: "fx-subtitle.sh" },
5460
5471
  { outputName: "fx-clear", scriptFile: "fx-clear.sh" },
5461
5472
  { outputName: "fx-wait", scriptFile: "fx-wait.sh" },
5462
5473
  { outputName: "ab-proof", scriptFile: "ab-proof.sh" },
5463
5474
  { outputName: "ab-not-found", scriptFile: "ab-not-found.sh" },
5464
5475
  { outputName: "ab-move", scriptFile: "ab-move.sh" },
5476
+ { outputName: "ab-click", scriptFile: "ab-click.sh" },
5477
+ { outputName: "ab-type", scriptFile: "ab-type.sh" },
5478
+ { outputName: "ab-toggle-animations", scriptFile: "ab-toggle-animations.sh" },
5465
5479
  { outputName: "agent-browser", scriptFile: "agent-browser-wrapper.sh" }
5466
5480
  ];
5467
5481
  }
@@ -5474,19 +5488,40 @@ function loadAndRenderScript(scriptFile, vars) {
5474
5488
  var log8 = createLogger("browse");
5475
5489
  function resolveAgentBrowser() {
5476
5490
  try {
5477
- const resolved = execSync2("which agent-browser", { encoding: "utf-8" }).trim();
5478
- log8.debug(`resolved agent-browser: ${resolved}`);
5479
- return resolved;
5491
+ const binPath = execSync2("which agent-browser", { encoding: "utf-8" }).trim();
5492
+ log8.debug(`resolved agent-browser: ${binPath}`);
5493
+ let home;
5494
+ const binDir = path4.dirname(binPath);
5495
+ const npmGlobalCandidate = path4.join(binDir, "..", "lib", "node_modules", "agent-browser");
5496
+ if (fs3.existsSync(path4.join(npmGlobalCandidate, "package.json"))) {
5497
+ home = path4.resolve(npmGlobalCandidate);
5498
+ }
5499
+ if (!home) {
5500
+ try {
5501
+ const realBin = fs3.realpathSync(binPath);
5502
+ let dir = path4.dirname(realBin);
5503
+ for (let i = 0; i < 5; i++) {
5504
+ if (fs3.existsSync(path4.join(dir, "package.json"))) {
5505
+ home = dir;
5506
+ break;
5507
+ }
5508
+ dir = path4.dirname(dir);
5509
+ }
5510
+ } catch {
5511
+ }
5512
+ }
5513
+ log8.debug(`agent-browser home: ${home ?? "not found"}`);
5514
+ return { bin: binPath, home };
5480
5515
  } catch {
5481
5516
  log8.warn("agent-browser not found in PATH, using bare command name");
5482
- return "agent-browser";
5517
+ return { bin: "agent-browser" };
5483
5518
  }
5484
5519
  }
5485
5520
  function buildFxInjectHelper(sessionArg, initScriptPath) {
5486
5521
  return `
5487
- # Ensure effects are loaded (check may fail if page not ready \u2014 that's ok)
5488
- if ! agent-browser eval ${sessionArg} "!!window.__agexEffects" 2>&1 | grep -q "true"; then
5489
- agent-browser eval --stdin ${sessionArg} < "${initScriptPath}" >/dev/null
5522
+ # Ensure effects are loaded (re-inject if not present)
5523
+ if ! agent-browser eval ${sessionArg} "!!window.__agexEffects" 2>/dev/null | grep -q "true"; then
5524
+ agent-browser eval --stdin ${sessionArg} < "${initScriptPath}" >/dev/null || { echo "Error: failed to inject effects (is browser session running?)" >&2; exit 1; }
5490
5525
  fi`;
5491
5526
  }
5492
5527
  function buildTemplateVars(config, tempDir, sessionId, initScriptPath) {
@@ -5498,7 +5533,7 @@ function buildTemplateVars(config, tempDir, sessionId, initScriptPath) {
5498
5533
  const rawVideoPath = path4.join(outputDir, "recording-raw.webm");
5499
5534
  const finalVideoPath = path4.join(outputDir, "recording.mp4");
5500
5535
  const sessionArg = `--session ${sessionId}`;
5501
- const realAgentBrowser = resolveAgentBrowser();
5536
+ const agentBrowser = resolveAgentBrowser();
5502
5537
  return {
5503
5538
  SESSION_ID: sessionId,
5504
5539
  SESSION_ARG: sessionArg,
@@ -5512,7 +5547,8 @@ function buildTemplateVars(config, tempDir, sessionId, initScriptPath) {
5512
5547
  RAW_VIDEO_PATH: rawVideoPath,
5513
5548
  FINAL_VIDEO_PATH: finalVideoPath,
5514
5549
  DEDUP_FPS: String(DEFAULTS.DEDUP_FPS),
5515
- REAL_AGENT_BROWSER: realAgentBrowser,
5550
+ REAL_AGENT_BROWSER: agentBrowser.bin,
5551
+ AGENT_BROWSER_HOME: agentBrowser.home ?? "",
5516
5552
  FX_INJECT_HELPER: buildFxInjectHelper(sessionArg, initScriptPath),
5517
5553
  VIDEO_MKDIR: videoEnabled ? `mkdir -p "${segmentsDir}"` : "# video recording disabled",
5518
5554
  VIDEO_INIT_COUNTER: videoEnabled ? `echo "0" > "${segmentCounterPath}"` : "",
@@ -5542,13 +5578,17 @@ function createWorkspaceFiles(config) {
5542
5578
  const initScriptPath = writeInitScript(tempDir, config);
5543
5579
  const vars = buildTemplateVars(config, tempDir, sessionId, initScriptPath);
5544
5580
  writeScripts(tempDir, vars, videoEnabled);
5581
+ const cursorDir = path4.join(tempDir, ".cursor");
5582
+ fs3.mkdirSync(cursorDir, { recursive: true });
5583
+ fs3.writeFileSync(path4.join(cursorDir, "mcp.json"), JSON.stringify({ mcpServers: {} }));
5545
5584
  const wrapperPath = path4.join(tempDir, "ab-fx");
5546
5585
  return {
5547
5586
  workspacePath: tempDir,
5548
5587
  initScriptPath,
5549
5588
  injectEffectsCommand: wrapperPath,
5550
5589
  env: {
5551
- AGENT_BROWSER_SESSION: sessionId
5590
+ AGENT_BROWSER_SESSION: sessionId,
5591
+ ...vars.AGENT_BROWSER_HOME ? { AGENT_BROWSER_HOME: vars.AGENT_BROWSER_HOME } : {}
5552
5592
  },
5553
5593
  cleanup: () => {
5554
5594
  try {
@@ -5659,7 +5699,7 @@ EXAMPLE - FAIL case: Verifying "dropdown has Options A, B, and C" but B is missi
5659
5699
  # === PHASE 1: EXPLORE ===
5660
5700
  ab-open "https://example.com"
5661
5701
  agent-browser snapshot -i
5662
- agent-browser click @dropdownButton
5702
+ ab-click @dropdownButton
5663
5703
  agent-browser wait 500
5664
5704
  agent-browser snapshot -i
5665
5705
  # See: link "Option A" [ref=e5], link "Option C" [ref=e7]
@@ -5670,7 +5710,7 @@ agent-browser eval 'document.body.textContent.includes("Option B") ? "found" : "
5670
5710
 
5671
5711
  # === PHASE 2: RECORD (document the FAILURE - RED highlight is PRIMARY!) ===
5672
5712
  ab-record-start
5673
- agent-browser click @dropdownButton
5713
+ ab-click @dropdownButton
5674
5714
  agent-browser wait 500
5675
5715
  agent-browser snapshot -i
5676
5716
  # CRITICAL: For FAIL, highlight the MISSING element FIRST (this is your evidence!)
@@ -5689,6 +5729,8 @@ COMMANDS:
5689
5729
  ab-record-start # Start recording
5690
5730
  ab-close [1 2 ...] # Stop recording and merge segments
5691
5731
  ab-screenshot <name>.png # Capture screenshot
5732
+ ab-click <selector|@ref> # Smooth cursor animation + click
5733
+ ab-type <selector|@ref> "text" # Click input + type text slowly (triggers React events)
5692
5734
 
5693
5735
  EFFECTS (use AFTER ab-record-start):
5694
5736
  fx-highlight <selector> "label" [color] # Highlight element with label
@@ -5747,6 +5789,20 @@ For elements WITH @ref:
5747
5789
  agent-browser get attr @e5 aria-label # Get aria-label
5748
5790
  agent-browser get styles @e5 --json # Get tag name from "tag" field
5749
5791
 
5792
+ TYPING INTO INPUTS:
5793
+ ALWAYS use ab-type or agent-browser fill to type into inputs.
5794
+ NEVER set .value via eval \u2014 it does NOT trigger React/framework events and buttons stay disabled.
5795
+ ab-type @e1 "text" # Best: animated cursor + slow typing (triggers all events)
5796
+ agent-browser fill @e1 "text" # Instant replace (clears existing text)
5797
+
5798
+ IMPORTANT: ab-type APPENDS text \u2014 it does NOT clear existing content.
5799
+ If the input already has text and you want to REPLACE it, clear first:
5800
+ agent-browser fill @e1 "text" # Easiest: instant replace
5801
+ # OR for animated typing with clear:
5802
+ ab-click @e1 # Focus the input
5803
+ agent-browser key @e1 Meta+a # Select all existing text
5804
+ ab-type @e1 "new text" # First char replaces selection, rest appends
5805
+
5750
5806
  BROWSER:
5751
5807
  agent-browser snapshot # Get ALL elements with refs
5752
5808
  agent-browser snapshot -i # Interactive only (buttons, links, inputs)
@@ -5755,8 +5811,8 @@ BROWSER:
5755
5811
  agent-browser snapshot -s "#id" # Scope to CSS selector
5756
5812
  agent-browser scrollintoview @e1 # Smooth scroll to element
5757
5813
  agent-browser wait <ms> # Wait milliseconds
5758
- agent-browser click @e1 # Click element
5759
- agent-browser eval '<js>' # Run JavaScript
5814
+ ab-click @e1 # Click element (smooth cursor animation)
5815
+ agent-browser eval '<js>' # Run JavaScript (NOT for typing into inputs!)
5760
5816
 
5761
5817
  SNAPSHOT TIPS:
5762
5818
  - Use snapshot (no flags) when looking for VISUAL elements (images, logos, text)
@@ -5809,7 +5865,8 @@ function parseVerdict(output) {
5809
5865
  if (matches.length === 0) {
5810
5866
  return { verdict: "fail", reason: "No verdict found in output" };
5811
5867
  }
5812
- const lastMatch = matches[matches.length - 1];
5868
+ const real = matches.filter((m) => !(m[2] ?? "").includes("PROVE_VERDICT"));
5869
+ const lastMatch = real.length > 0 ? real[real.length - 1] : matches[matches.length - 1];
5813
5870
  return {
5814
5871
  verdict: lastMatch[1].toLowerCase(),
5815
5872
  reason: lastMatch[2]?.trim() || ""
@@ -5832,14 +5889,14 @@ function installSkills(workspacePath, agent, skills) {
5832
5889
  });
5833
5890
  log9.debug(`installed skill: ${skill}`);
5834
5891
  } catch (err) {
5835
- const error = new SkillInstallError(skill, err instanceof Error ? err : void 0);
5892
+ const detail = (err?.stderr ?? err?.stdout)?.toString().trim() || err?.message || "";
5893
+ const error = new SkillInstallError(skill, detail, err instanceof Error ? err : void 0);
5836
5894
  log9.warn(error.message);
5837
5895
  }
5838
5896
  }
5839
5897
  }
5840
5898
  var DEFAULT_SKILLS = ["vercel-labs/agent-browser@agent-browser"];
5841
5899
  var DEFAULT_OPTIONS = {
5842
- agent: DEFAULTS.AGENT,
5843
5900
  outputDir: "./prove-results",
5844
5901
  video: true,
5845
5902
  screenshots: true,
@@ -5858,7 +5915,8 @@ async function prove(assertion, options = {}) {
5858
5915
  throw new ValidationError("Assertion cannot be empty", "assertion");
5859
5916
  }
5860
5917
  const opts = { ...DEFAULT_OPTIONS, ...options };
5861
- const agent = opts.agent;
5918
+ const adapter = opts.agent ? getAdapter(opts.agent) : await findAvailableAgent();
5919
+ const agent = adapter.name;
5862
5920
  const outputDir = createOutputDir(opts.outputDir, agent, assertion);
5863
5921
  log9.info(`proving: "${assertion}" (agent=${agent})`);
5864
5922
  const browserConfig = {
@@ -6098,12 +6156,14 @@ function normalizeReviewOutput(output) {
6098
6156
  async function runReview(request) {
6099
6157
  const hypothesesCount = request.hypotheses ?? DEFAULTS.HYPOTHESES_COUNT;
6100
6158
  const includeWorktree = request.includeWorktree ?? true;
6101
- log11.info(`reviewing: base=${request.baseRef} agent=${request.agent} hypotheses=${hypothesesCount}`);
6159
+ const adapter = request.agent ? getAdapter(request.agent) : await findAvailableAgent();
6160
+ const agent = adapter.name;
6161
+ log11.info(`reviewing: base=${request.baseRef} agent=${agent} hypotheses=${hypothesesCount}`);
6102
6162
  const gitContext = await buildGitContext(request.baseRef, includeWorktree, request.cwd);
6103
6163
  const planPrompt = buildPlanPrompt(gitContext, hypothesesCount, request.promptHint);
6104
6164
  log11.debug("generating review plan...");
6105
6165
  const plan = await runAgent({
6106
- agent: request.agent,
6166
+ agent,
6107
6167
  model: request.model,
6108
6168
  prompt: planPrompt,
6109
6169
  cwd: request.cwd,
@@ -6112,7 +6172,7 @@ async function runReview(request) {
6112
6172
  });
6113
6173
  log11.debug("executing review...");
6114
6174
  const execution = await runAgent({
6115
- agent: request.agent,
6175
+ agent,
6116
6176
  model: request.model,
6117
6177
  prompt: plan.json.stdout,
6118
6178
  cwd: request.cwd,
@@ -6121,7 +6181,7 @@ async function runReview(request) {
6121
6181
  });
6122
6182
  const outputJson = {
6123
6183
  baseRef: request.baseRef,
6124
- agent: request.agent,
6184
+ agent,
6125
6185
  model: request.model,
6126
6186
  plan: plan.json,
6127
6187
  execution: execution.json
@@ -6170,7 +6230,7 @@ HELPER SCRIPTS:
6170
6230
 
6171
6231
  AGENT-BROWSER COMMANDS:
6172
6232
  - agent-browser snapshot -i # Get interactive elements with refs (@e1, @e2)
6173
- - agent-browser click @e1 # Click element
6233
+ - ab-click @e1 # Click element (smooth cursor animation)
6174
6234
  - agent-browser fill @e2 "text" # Fill input
6175
6235
  - agent-browser get text @e1 # Get element text
6176
6236
  - agent-browser eval "<js>" # Run JavaScript
@@ -6248,13 +6308,15 @@ async function provePr(request) {
6248
6308
  const baseRef = request.baseRef || detectDefaultBranch();
6249
6309
  const outputDir = createOutputDir2(request.outputDir);
6250
6310
  const { width, height } = request.viewport;
6251
- log12.info(`proving PR: base=${baseRef} agent=${request.agent} hypotheses=${request.hypothesesCount}`);
6311
+ const adapter = request.agent ? getAdapter(request.agent) : await findAvailableAgent();
6312
+ const agent = adapter.name;
6313
+ log12.info(`proving PR: base=${baseRef} agent=${agent} hypotheses=${request.hypothesesCount}`);
6252
6314
  const startTime = Date.now();
6253
6315
  const gitContext = await buildGitContext(baseRef, true);
6254
6316
  log12.debug("generating hypotheses...");
6255
6317
  const planPrompt = buildPlanPrompt(gitContext, request.hypothesesCount, request.hint);
6256
6318
  const planResult = await runAgent({
6257
- agent: request.agent,
6319
+ agent,
6258
6320
  model: request.model,
6259
6321
  prompt: planPrompt,
6260
6322
  streamOutput: true,
@@ -6276,7 +6338,7 @@ async function provePr(request) {
6276
6338
  const provePrompt = buildProvePrPrompt(gitContext, hypothesesPlan, outputDir, request.url);
6277
6339
  try {
6278
6340
  const proveResult = await runAgent({
6279
- agent: request.agent,
6341
+ agent,
6280
6342
  model: request.model,
6281
6343
  prompt: provePrompt,
6282
6344
  approveMcps: true,
@@ -6319,7 +6381,7 @@ var viewportSchema2 = external_exports.object({
6319
6381
  });
6320
6382
  var provePrRequestSchema = external_exports.object({
6321
6383
  baseRef: external_exports.string().optional(),
6322
- agent: external_exports.enum(AGENT_NAMES),
6384
+ agent: external_exports.enum(AGENT_NAMES).optional(),
6323
6385
  url: external_exports.string().url("Invalid URL").optional(),
6324
6386
  outputDir: external_exports.string().min(1, "Output directory cannot be empty"),
6325
6387
  hypothesesCount: external_exports.number().int().positive("Hypotheses count must be a positive integer"),
@@ -6331,14 +6393,25 @@ var provePrRequestSchema = external_exports.object({
6331
6393
 
6332
6394
  export {
6333
6395
  AgexError,
6396
+ ValidationError,
6397
+ SkillInstallError,
6334
6398
  isAgexError,
6399
+ createLogger,
6335
6400
  AGENT_NAMES,
6336
6401
  OUTPUT_MODES,
6337
6402
  APPROVAL_POLICIES,
6403
+ DEFAULTS,
6404
+ external_exports,
6338
6405
  viewportStringSchema,
6339
6406
  validate,
6407
+ slugify,
6408
+ timestamp,
6409
+ buildPathEnv,
6340
6410
  detectDefaultBranch,
6411
+ getAdapter,
6412
+ findAvailableAgent,
6341
6413
  runAgent,
6414
+ createWorkspaceFiles,
6342
6415
  prove,
6343
6416
  proveOptionsSchema,
6344
6417
  runReview,