browser-automation-skill 0.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +144 -0
  3. package/SECURITY.md +39 -0
  4. package/SKILL.md +206 -0
  5. package/bin/cli.mjs +55 -0
  6. package/install.sh +143 -0
  7. package/package.json +54 -0
  8. package/references/adapter-candidates.md +40 -0
  9. package/references/browser-mcp-cheatsheet.md +132 -0
  10. package/references/browser-stats-cheatsheet.md +155 -0
  11. package/references/chrome-devtools-mcp-cheatsheet.md +232 -0
  12. package/references/midscene-integration.md +359 -0
  13. package/references/obscura-cheatsheet.md +103 -0
  14. package/references/playwright-cli-cheatsheet.md +64 -0
  15. package/references/playwright-lib-cheatsheet.md +90 -0
  16. package/references/recipes/add-a-tool-adapter.md +134 -0
  17. package/references/recipes/agent-workflows/README.md +37 -0
  18. package/references/recipes/agent-workflows/cache-driven-bulk-operation.md +110 -0
  19. package/references/recipes/agent-workflows/flow-record-and-replay.md +102 -0
  20. package/references/recipes/agent-workflows/incremental-pattern-discovery.md +125 -0
  21. package/references/recipes/agent-workflows/login-then-scrape.md +100 -0
  22. package/references/recipes/anti-patterns-tool-extension.md +182 -0
  23. package/references/recipes/body-bytes-not-body.md +139 -0
  24. package/references/recipes/cache-write-security.md +210 -0
  25. package/references/recipes/fingerprint-rescue.md +154 -0
  26. package/references/recipes/model-routing.md +143 -0
  27. package/references/recipes/path-security.md +138 -0
  28. package/references/recipes/privacy-canary.md +96 -0
  29. package/references/recipes/visual-rescue-hook.md +182 -0
  30. package/references/stats-prices.json +42 -0
  31. package/references/stats-schema.json +77 -0
  32. package/references/tool-versions.md +8 -0
  33. package/scripts/browser-add-site.sh +113 -0
  34. package/scripts/browser-assert.sh +106 -0
  35. package/scripts/browser-audit.sh +68 -0
  36. package/scripts/browser-baseline.sh +135 -0
  37. package/scripts/browser-click.sh +100 -0
  38. package/scripts/browser-creds-add.sh +254 -0
  39. package/scripts/browser-creds-list.sh +67 -0
  40. package/scripts/browser-creds-migrate.sh +122 -0
  41. package/scripts/browser-creds-remove.sh +69 -0
  42. package/scripts/browser-creds-rotate-totp.sh +109 -0
  43. package/scripts/browser-creds-show.sh +82 -0
  44. package/scripts/browser-creds-totp.sh +94 -0
  45. package/scripts/browser-do.sh +630 -0
  46. package/scripts/browser-doctor.sh +365 -0
  47. package/scripts/browser-drag.sh +90 -0
  48. package/scripts/browser-extract.sh +192 -0
  49. package/scripts/browser-fill.sh +142 -0
  50. package/scripts/browser-flow.sh +316 -0
  51. package/scripts/browser-history.sh +187 -0
  52. package/scripts/browser-hover.sh +92 -0
  53. package/scripts/browser-inspect.sh +188 -0
  54. package/scripts/browser-list-sessions.sh +78 -0
  55. package/scripts/browser-list-sites.sh +42 -0
  56. package/scripts/browser-login.sh +279 -0
  57. package/scripts/browser-mcp.sh +65 -0
  58. package/scripts/browser-migrate.sh +195 -0
  59. package/scripts/browser-open.sh +134 -0
  60. package/scripts/browser-press.sh +80 -0
  61. package/scripts/browser-remove-session.sh +72 -0
  62. package/scripts/browser-remove-site.sh +68 -0
  63. package/scripts/browser-replay.sh +206 -0
  64. package/scripts/browser-route.sh +174 -0
  65. package/scripts/browser-select.sh +122 -0
  66. package/scripts/browser-show-session.sh +57 -0
  67. package/scripts/browser-show-site.sh +37 -0
  68. package/scripts/browser-snapshot.sh +176 -0
  69. package/scripts/browser-stats.sh +522 -0
  70. package/scripts/browser-tab-close.sh +112 -0
  71. package/scripts/browser-tab-list.sh +70 -0
  72. package/scripts/browser-tab-switch.sh +111 -0
  73. package/scripts/browser-upload.sh +132 -0
  74. package/scripts/browser-use.sh +60 -0
  75. package/scripts/browser-vlm.sh +707 -0
  76. package/scripts/browser-wait.sh +97 -0
  77. package/scripts/install-git-hooks.sh +16 -0
  78. package/scripts/lib/capture.sh +356 -0
  79. package/scripts/lib/common.sh +262 -0
  80. package/scripts/lib/credential.sh +237 -0
  81. package/scripts/lib/fingerprint-rescue.js +123 -0
  82. package/scripts/lib/flow.sh +448 -0
  83. package/scripts/lib/flow_record.sh +210 -0
  84. package/scripts/lib/mask.sh +49 -0
  85. package/scripts/lib/memory.sh +427 -0
  86. package/scripts/lib/migrate.sh +390 -0
  87. package/scripts/lib/migrators/README.md +23 -0
  88. package/scripts/lib/migrators/memory/v1_to_v2.sh +15 -0
  89. package/scripts/lib/migrators/recent_urls/README.md +13 -0
  90. package/scripts/lib/migrators/stats/README.md +24 -0
  91. package/scripts/lib/node/chrome-devtools-bridge.mjs +1812 -0
  92. package/scripts/lib/node/mcp-server.mjs +531 -0
  93. package/scripts/lib/node/mcp-tools.json +68 -0
  94. package/scripts/lib/node/playwright-driver.mjs +1104 -0
  95. package/scripts/lib/node/totp-core.mjs +52 -0
  96. package/scripts/lib/node/totp.mjs +52 -0
  97. package/scripts/lib/node/url-pattern-cluster.mjs +102 -0
  98. package/scripts/lib/node/url-pattern-resolver.mjs +77 -0
  99. package/scripts/lib/output.sh +79 -0
  100. package/scripts/lib/router.sh +342 -0
  101. package/scripts/lib/sanitize.sh +107 -0
  102. package/scripts/lib/secret/keychain.sh +91 -0
  103. package/scripts/lib/secret/libsecret.sh +74 -0
  104. package/scripts/lib/secret/plaintext.sh +75 -0
  105. package/scripts/lib/secret_backend_select.sh +57 -0
  106. package/scripts/lib/session.sh +153 -0
  107. package/scripts/lib/site.sh +126 -0
  108. package/scripts/lib/stats.sh +419 -0
  109. package/scripts/lib/tool/.gitkeep +0 -0
  110. package/scripts/lib/tool/chrome-devtools-mcp.sh +349 -0
  111. package/scripts/lib/tool/obscura.sh +249 -0
  112. package/scripts/lib/tool/playwright-cli.sh +155 -0
  113. package/scripts/lib/tool/playwright-lib.sh +106 -0
  114. package/scripts/lib/verb_helpers.sh +222 -0
  115. package/scripts/lib/visual-rescue-default.sh +145 -0
  116. package/scripts/regenerate-docs.sh +99 -0
  117. package/uninstall.sh +51 -0
@@ -0,0 +1,155 @@
1
+ # scripts/lib/tool/playwright-cli.sh — Playwright CLI tool adapter.
2
+ #
3
+ # Implements the Tool Adapter Extension Model contract from
4
+ # docs/superpowers/specs/2026-04-30-tool-adapter-extension-model-design.md §2.
5
+ #
6
+ # Identity: tool_metadata, tool_capabilities, tool_doctor_check
7
+ # Verb dispatch: tool_open, tool_click, tool_fill, tool_snapshot, tool_inspect,
8
+ # tool_audit, tool_extract, tool_eval
9
+ # All verb-dispatch functions in this file currently shell to the playwright
10
+ # binary (real path) OR to ${PLAYWRIGHT_CLI_BIN:-playwright} (overridable for
11
+ # tests, which set it to tests/stubs/playwright-cli).
12
+ #
13
+ # Adapters are LEAVES — never source another adapter. Shared logic factors into
14
+ # scripts/lib/<concern>.sh (sibling to lib/tool/).
15
+
16
+ [ -n "${_BROWSER_TOOL_PLAYWRIGHT_CLI_LOADED:-}" ] && return 0
17
+ readonly _BROWSER_TOOL_PLAYWRIGHT_CLI_LOADED=1
18
+
19
+ # Required by spec 2026-05-01-token-efficient-adapter-output-design §8: every
20
+ # adapter sources output.sh so verb-dispatch emits JSON via emit_summary /
21
+ # emit_event rather than hand-rolled printf. Lint tier 3 enforces this.
22
+ # shellcheck source=../output.sh
23
+ # shellcheck disable=SC1091
24
+ source "$(dirname "${BASH_SOURCE[0]}")/../output.sh"
25
+
26
+ readonly _BROWSER_TOOL_PLAYWRIGHT_CLI_BIN="${PLAYWRIGHT_CLI_BIN:-playwright-cli}"
27
+ readonly _BROWSER_TOOL_PLAYWRIGHT_CLI_DEFAULT_VIEWPORT="1280x800"
28
+
29
+ # --- Identity functions (called by framework once or for queries) ---
30
+
31
+ tool_metadata() {
32
+ cat <<'EOF'
33
+ {
34
+ "name": "playwright-cli",
35
+ "abi_version": 1,
36
+ "version_pin": "1.49.x",
37
+ "cheatsheet_path": "references/playwright-cli-cheatsheet.md",
38
+ "install_hint": "npm i -g playwright @playwright/test @playwright/cli && playwright install chromium"
39
+ }
40
+ EOF
41
+ }
42
+
43
+ tool_capabilities() {
44
+ cat <<'EOF'
45
+ {
46
+ "verbs": {
47
+ "open": { "flags": ["--headed", "--viewport", "--user-agent"] },
48
+ "click": { "flags": ["--ref", "--selector"] },
49
+ "fill": { "flags": ["--ref", "--text"] },
50
+ "snapshot": { "flags": ["--depth"] }
51
+ }
52
+ }
53
+ EOF
54
+ }
55
+
56
+ tool_doctor_check() {
57
+ if ! command -v "${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}" >/dev/null 2>&1; then
58
+ cat <<EOF
59
+ { "ok": false, "binary": "${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}", "error": "not on PATH",
60
+ "install_hint": "npm i -g playwright @playwright/test @playwright/cli && playwright install chromium" }
61
+ EOF
62
+ return 0
63
+ fi
64
+ local version
65
+ version="$("${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}" --version 2>/dev/null || printf 'unknown')"
66
+ printf '{"ok":true,"binary":"%s","version":"%s"}\n' \
67
+ "${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}" "${version}"
68
+ }
69
+
70
+ # --- Verb-dispatch functions ---
71
+ # Each function:
72
+ # - Reads named flags from "$@".
73
+ # - Never accepts secrets in argv (uses --secret-stdin pattern).
74
+ # - Emits zero-or-more streaming JSON lines to stdout.
75
+ # - Returns 41 if it cannot handle the op (defensive — router shouldn't route
76
+ # here, but the guard is cheap).
77
+
78
+ # Skill→tool argv translation: real playwright-cli takes most args as positional
79
+ # (e.g. `open <url>`, `click <ref>`, `fill <ref> <text>`). Adapters are the
80
+ # translation boundary — verb scripts speak skill-flag surface, adapters convert.
81
+
82
+ tool_open() {
83
+ local url=""
84
+ local rest=()
85
+ while [ "$#" -gt 0 ]; do
86
+ case "$1" in
87
+ --url) url="$2"; shift 2 ;;
88
+ *) rest+=("$1"); shift ;;
89
+ esac
90
+ done
91
+ if [ -n "${url}" ]; then
92
+ "${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}" open "${url}" "${rest[@]}"
93
+ else
94
+ "${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}" open "${rest[@]}"
95
+ fi
96
+ }
97
+
98
+ tool_click() {
99
+ local target=""
100
+ local rest=()
101
+ while [ "$#" -gt 0 ]; do
102
+ case "$1" in
103
+ --ref|--selector) target="$2"; shift 2 ;;
104
+ *) rest+=("$1"); shift ;;
105
+ esac
106
+ done
107
+ [ -n "${target}" ] || return 41
108
+ "${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}" click "${target}" "${rest[@]}"
109
+ }
110
+
111
+ tool_fill() {
112
+ local target="" text="" use_stdin=0
113
+ local rest=()
114
+ while [ "$#" -gt 0 ]; do
115
+ case "$1" in
116
+ --ref|--selector) target="$2"; shift 2 ;;
117
+ --text) text="$2"; shift 2 ;;
118
+ --secret-stdin) use_stdin=1; shift ;;
119
+ *) rest+=("$1"); shift ;;
120
+ esac
121
+ done
122
+ if [ "${use_stdin}" = "1" ]; then
123
+ # playwright-cli has no stdin-secret mode; passing the secret as a
124
+ # positional arg would leak it via argv (anti-pattern AP-7). Reject —
125
+ # routing should pick playwright-lib (Phase 4) which reads stdin in node.
126
+ return 41
127
+ fi
128
+ [ -n "${target}" ] && [ -n "${text}" ] || return 41
129
+ "${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}" fill "${target}" "${text}" "${rest[@]}"
130
+ }
131
+
132
+ tool_snapshot() {
133
+ # snapshot takes no required args; --depth N pass-through is a real
134
+ # playwright-cli flag (recognised by the binary natively).
135
+ "${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}" snapshot "$@"
136
+ }
137
+
138
+ tool_inspect() {
139
+ # Real playwright-cli has no `inspect` subcommand; the closest composition
140
+ # (snapshot + eval per-ref) is non-trivial and lives in Phase 5 chrome-
141
+ # devtools-mcp adapter (which has first-class console + network + eval).
142
+ return 41
143
+ }
144
+
145
+ tool_audit() {
146
+ return 41
147
+ }
148
+
149
+ tool_extract() {
150
+ return 41
151
+ }
152
+
153
+ tool_eval() {
154
+ "${_BROWSER_TOOL_PLAYWRIGHT_CLI_BIN}" eval "$@"
155
+ }
@@ -0,0 +1,106 @@
1
+ # scripts/lib/tool/playwright-lib.sh — Playwright (node-bridge) tool adapter.
2
+ #
3
+ # Implements the Tool Adapter Extension Model contract from
4
+ # docs/superpowers/specs/2026-04-30-tool-adapter-extension-model-design.md §2.
5
+ #
6
+ # Routes verb dispatch to scripts/lib/node/playwright-driver.mjs which speaks
7
+ # the real Playwright API. Stub mode (BROWSER_SKILL_LIB_STUB=1) is used by
8
+ # tests + CI; real mode lands when the driver's real branch ships.
9
+ #
10
+ # Distinction from playwright-cli adapter:
11
+ # - playwright-cli shells to a binary that takes positional args (translation
12
+ # needed at adapter boundary).
13
+ # - playwright-lib shells to a node script that speaks skill-flag surface
14
+ # directly (driver constructs Playwright API calls), so no translation here.
15
+ # - playwright-lib supports --secret-stdin natively (driver reads stdin in node).
16
+ # - playwright-lib supports session loading via BROWSER_SKILL_STORAGE_STATE env.
17
+
18
+ [ -n "${_BROWSER_TOOL_PLAYWRIGHT_LIB_LOADED:-}" ] && return 0
19
+ readonly _BROWSER_TOOL_PLAYWRIGHT_LIB_LOADED=1
20
+
21
+ # Required by spec 2026-05-01-token-efficient-adapter-output-design §8.
22
+ # shellcheck source=../output.sh
23
+ # shellcheck disable=SC1091
24
+ source "$(dirname "${BASH_SOURCE[0]}")/../output.sh"
25
+
26
+ readonly _BROWSER_TOOL_PLAYWRIGHT_LIB_NODE_BIN="${BROWSER_SKILL_NODE_BIN:-node}"
27
+ readonly _BROWSER_TOOL_PLAYWRIGHT_LIB_DRIVER="$(dirname "${BASH_SOURCE[0]}")/../node/playwright-driver.mjs"
28
+
29
+ # --- Identity functions ---
30
+
31
+ tool_metadata() {
32
+ cat <<'EOF'
33
+ {
34
+ "name": "playwright-lib",
35
+ "abi_version": 1,
36
+ "version_pin": "1.59.x",
37
+ "cheatsheet_path": "references/playwright-lib-cheatsheet.md",
38
+ "install_hint": "npm i -g playwright @playwright/test && playwright install chromium"
39
+ }
40
+ EOF
41
+ }
42
+
43
+ tool_capabilities() {
44
+ cat <<'EOF'
45
+ {
46
+ "verbs": {
47
+ "open": { "flags": ["--headed", "--viewport", "--user-agent", "--storage-state"] },
48
+ "click": { "flags": ["--ref", "--selector"] },
49
+ "fill": { "flags": ["--ref", "--text", "--secret-stdin"] },
50
+ "snapshot": { "flags": ["--depth"] },
51
+ "login": { "flags": ["--storage-state"] }
52
+ },
53
+ "session_load": true
54
+ }
55
+ EOF
56
+ }
57
+
58
+ tool_doctor_check() {
59
+ if ! command -v "${_BROWSER_TOOL_PLAYWRIGHT_LIB_NODE_BIN}" >/dev/null 2>&1; then
60
+ cat <<EOF
61
+ { "ok": false, "binary": "${_BROWSER_TOOL_PLAYWRIGHT_LIB_NODE_BIN}", "error": "node not on PATH",
62
+ "install_hint": "brew install node (>=20)" }
63
+ EOF
64
+ return 0
65
+ fi
66
+ if [ ! -f "${_BROWSER_TOOL_PLAYWRIGHT_LIB_DRIVER}" ]; then
67
+ printf '{"ok":false,"binary":"%s","error":"driver missing","driver_path":"%s"}\n' \
68
+ "${_BROWSER_TOOL_PLAYWRIGHT_LIB_NODE_BIN}" "${_BROWSER_TOOL_PLAYWRIGHT_LIB_DRIVER}"
69
+ return 0
70
+ fi
71
+ local node_version
72
+ node_version="$("${_BROWSER_TOOL_PLAYWRIGHT_LIB_NODE_BIN}" --version 2>/dev/null || printf 'unknown')"
73
+ printf '{"ok":true,"binary":"%s","node_version":"%s"}\n' \
74
+ "${_BROWSER_TOOL_PLAYWRIGHT_LIB_NODE_BIN}" "${node_version}"
75
+ }
76
+
77
+ # --- Verb-dispatch functions ---
78
+ # Driver receives skill-flag argv directly; no translation needed.
79
+ # BROWSER_SKILL_STORAGE_STATE (set by verb script when --site/--as resolved)
80
+ # is forwarded as --storage-state PATH to the driver when present.
81
+
82
+ _drive() {
83
+ local verb="$1"
84
+ shift
85
+ local extra=()
86
+ if [ -n "${BROWSER_SKILL_STORAGE_STATE:-}" ]; then
87
+ extra+=(--storage-state "${BROWSER_SKILL_STORAGE_STATE}")
88
+ fi
89
+ "${_BROWSER_TOOL_PLAYWRIGHT_LIB_NODE_BIN}" "${_BROWSER_TOOL_PLAYWRIGHT_LIB_DRIVER}" \
90
+ "${verb}" "${extra[@]}" "$@"
91
+ }
92
+
93
+ tool_open() { _drive open "$@"; }
94
+ tool_click() { _drive click "$@"; }
95
+ tool_fill() { _drive fill "$@"; }
96
+ tool_snapshot() { _drive snapshot "$@"; }
97
+ tool_inspect() { return 41; } # Phase 5 chrome-devtools-mcp territory.
98
+ tool_audit() { return 41; }
99
+ tool_extract() { return 41; }
100
+ tool_eval() { _drive eval "$@"; }
101
+
102
+ # Phase-2 carry-forward: login was emitted with tool=playwright-lib-stub before
103
+ # this adapter existed. Now login routes here; verb script's tool field becomes
104
+ # tool=playwright-lib. The driver's stub mode currently echoes a canned login
105
+ # fixture; real mode launches a headed browser for storageState capture.
106
+ tool_login() { _drive login "$@"; }
@@ -0,0 +1,222 @@
1
+ # scripts/lib/verb_helpers.sh — shared verb-script boilerplate.
2
+ # Every scripts/browser-<verb>.sh sources this AFTER common.sh + router.sh.
3
+ # See: docs/superpowers/plans/2026-05-01-phase-03-part-2-real-verbs.md Task 1
4
+ # and docs/superpowers/plans/2026-05-01-phase-04-real-playwright-and-sessions.md Task 3.
5
+
6
+ [ -n "${BROWSER_SKILL_VERB_HELPERS_LOADED:-}" ] && return 0
7
+ readonly BROWSER_SKILL_VERB_HELPERS_LOADED=1
8
+
9
+ # Site + session libs are needed by resolve_session_storage_state. Source
10
+ # guards in those files prevent double-loading.
11
+ # shellcheck source=site.sh
12
+ # shellcheck disable=SC1091
13
+ source "$(dirname "${BASH_SOURCE[0]}")/site.sh"
14
+ # shellcheck source=session.sh
15
+ # shellcheck disable=SC1091
16
+ source "$(dirname "${BASH_SOURCE[0]}")/session.sh"
17
+
18
+ # parse_verb_globals "$@" — peels off the global flags every verb supports:
19
+ # --site NAME — site profile name (overrides 'current')
20
+ # --tool NAME — force a specific adapter (sets ARG_TOOL → router)
21
+ # --dry-run — print planned action, write nothing
22
+ # --raw — strip streaming + summary; emit only the value (spec §4)
23
+ # Exports ARG_SITE / ARG_TOOL / ARG_DRY_RUN / ARG_RAW (unset if not present).
24
+ # Remaining argv (non-global flags) goes into REMAINING_ARGV[].
25
+ parse_verb_globals() {
26
+ REMAINING_ARGV=()
27
+ while [ "$#" -gt 0 ]; do
28
+ case "$1" in
29
+ --site)
30
+ [ -n "${2:-}" ] || die "${EXIT_USAGE_ERROR}" "--site requires a value"
31
+ ARG_SITE="$2"; export ARG_SITE
32
+ shift 2
33
+ ;;
34
+ --tool)
35
+ [ -n "${2:-}" ] || die "${EXIT_USAGE_ERROR}" "--tool requires a value"
36
+ ARG_TOOL="$2"; export ARG_TOOL
37
+ shift 2
38
+ ;;
39
+ --as)
40
+ [ -n "${2:-}" ] || die "${EXIT_USAGE_ERROR}" "--as requires a value"
41
+ ARG_AS="$2"; export ARG_AS
42
+ shift 2
43
+ ;;
44
+ --dry-run)
45
+ ARG_DRY_RUN=1; export ARG_DRY_RUN
46
+ shift
47
+ ;;
48
+ --raw)
49
+ ARG_RAW=1; export ARG_RAW
50
+ shift
51
+ ;;
52
+ *)
53
+ REMAINING_ARGV+=("$1")
54
+ shift
55
+ ;;
56
+ esac
57
+ done
58
+ }
59
+
60
+ # source_picked_adapter TOOL_NAME — source $LIB_TOOL_DIR/<name>.sh in the
61
+ # current shell. Dies with EXIT_TOOL_MISSING if the file is absent.
62
+ # Caller MUST have called init_paths first (sets LIB_TOOL_DIR).
63
+ source_picked_adapter() {
64
+ local tool="$1"
65
+ local file="${LIB_TOOL_DIR}/${tool}.sh"
66
+ if [ ! -f "${file}" ]; then
67
+ die "${EXIT_TOOL_MISSING}" "adapter file not found: ${tool} (no ${file})"
68
+ fi
69
+ # shellcheck source=/dev/null
70
+ source "${file}"
71
+ }
72
+
73
+ # resolve_session_storage_state — maps ARG_SITE / ARG_AS to a storageState
74
+ # file path; exports BROWSER_SKILL_STORAGE_STATE when applicable. The router's
75
+ # rule_session_required reads that env var to prefer playwright-lib (the only
76
+ # adapter declaring session_load: true).
77
+ #
78
+ # Resolution order:
79
+ # 1. If neither ARG_SITE nor ARG_AS set → no-op (export nothing).
80
+ # 2. If ARG_AS without ARG_SITE → EXIT_USAGE_ERROR (which site?).
81
+ # 3. ARG_SITE missing on disk → EXIT_SITE_NOT_FOUND (23).
82
+ # 4. Pick session: ARG_AS > site.default_session > nothing (no-op).
83
+ # 5. Session missing on disk → EXIT_SESSION_EXPIRED (22) with self-healing hint.
84
+ # 6. Session origin doesn't match site URL → EXIT_SESSION_EXPIRED (22).
85
+ # 7. Otherwise: export BROWSER_SKILL_STORAGE_STATE=<sessions-dir>/<name>.json.
86
+ resolve_session_storage_state() {
87
+ if [ -z "${ARG_SITE:-}" ] && [ -z "${ARG_AS:-}" ]; then
88
+ return 0
89
+ fi
90
+ if [ -z "${ARG_SITE:-}" ]; then
91
+ die "${EXIT_USAGE_ERROR}" "--as requires --site (which site does this session belong to?)"
92
+ fi
93
+
94
+ if ! site_exists "${ARG_SITE}"; then
95
+ die "${EXIT_SITE_NOT_FOUND}" "site '${ARG_SITE}' not registered (try: ${0##*/} add-site --name ${ARG_SITE} --url ...)"
96
+ fi
97
+
98
+ local profile site_url default_session session_name
99
+ profile="$(site_load "${ARG_SITE}")"
100
+ site_url="$(jq -r .url <<<"${profile}")"
101
+ default_session="$(jq -r '.default_session // ""' <<<"${profile}")"
102
+
103
+ if [ -n "${ARG_AS:-}" ]; then
104
+ session_name="${ARG_AS}"
105
+ elif [ -n "${default_session}" ]; then
106
+ session_name="${default_session}"
107
+ else
108
+ return 0
109
+ fi
110
+
111
+ if ! session_exists "${session_name}"; then
112
+ die "${EXIT_SESSION_EXPIRED}" "session '${session_name}' not found (run: ${0##*/} login --site ${ARG_SITE} --as ${session_name} --storage-state-file PATH)"
113
+ fi
114
+
115
+ # session_origin_check `die`s on mismatch — wrap in subshell so failure is
116
+ # caught here and we can emit a verb-aware self-healing hint.
117
+ if ! ( session_origin_check "${session_name}" "${site_url}" >/dev/null 2>&1 ); then
118
+ die "${EXIT_SESSION_EXPIRED}" "session '${session_name}' origins do not match site '${ARG_SITE}' (URL ${site_url}); re-login required"
119
+ fi
120
+
121
+ BROWSER_SKILL_STORAGE_STATE="${SESSIONS_DIR}/${session_name}.json"
122
+ export BROWSER_SKILL_STORAGE_STATE
123
+ }
124
+
125
+ # --- Phase 5 part 3-ii: transparent verb-retry on EXIT_SESSION_EXPIRED -------
126
+ #
127
+ # When a verb's adapter dispatch (tool_VERB) exits 22 (EXIT_SESSION_EXPIRED)
128
+ # AND the current --site / --as has a credential with auto_relogin: true,
129
+ # silently re-login via `bash browser-login.sh --auto` and retry the verb
130
+ # EXACTLY ONCE. Per parent spec §4.4: every verb call → silent re-login →
131
+ # retry, exactly one attempt. Wires into one verb (snapshot) in this PR;
132
+ # remaining verbs in follow-ups.
133
+
134
+ # invoke_with_retry VERB ARGS... — runs tool_${VERB} ARGS, returning its
135
+ # stdout + exit code. On EXIT_SESSION_EXPIRED (22), if a credential with
136
+ # auto_relogin: true exists for the resolved site/cred, runs login --auto
137
+ # silently then retries the verb ONCE. Caller sees a single stdout + final rc.
138
+ invoke_with_retry() {
139
+ local verb="$1"
140
+ shift
141
+
142
+ local out rc
143
+ set +e
144
+ out="$(tool_"${verb}" "$@")"
145
+ rc=$?
146
+ set -e
147
+
148
+ if [ "${rc}" -ne "${EXIT_SESSION_EXPIRED}" ]; then
149
+ printf '%s' "${out}"
150
+ return "${rc}"
151
+ fi
152
+ if ! _can_auto_relogin; then
153
+ printf '%s' "${out}"
154
+ return "${rc}"
155
+ fi
156
+ if ! _silent_relogin >/dev/null 2>&1; then
157
+ printf '%s' "${out}"
158
+ return "${rc}"
159
+ fi
160
+
161
+ # Re-resolve session storage state so the retry picks up the fresh file.
162
+ resolve_session_storage_state
163
+
164
+ set +e
165
+ out="$(tool_"${verb}" "$@")"
166
+ rc=$?
167
+ set -e
168
+ printf '%s' "${out}"
169
+ return "${rc}"
170
+ }
171
+
172
+ # _can_auto_relogin — returns 0 iff: ARG_SITE set + a credential exists
173
+ # (resolved name = ARG_AS or site.default_session) + that credential's
174
+ # metadata declares auto_relogin: true (default for new creds per part 2d).
175
+ _can_auto_relogin() {
176
+ [ -n "${ARG_SITE:-}" ] || return 1
177
+ local cred_name
178
+ cred_name="$(_resolve_relogin_cred_name 2>/dev/null)" || return 1
179
+ [ -n "${cred_name}" ] || return 1
180
+
181
+ # credential.sh may not be sourced in every verb script. Source on demand.
182
+ if ! command -v credential_load >/dev/null 2>&1; then
183
+ # shellcheck source=credential.sh
184
+ # shellcheck disable=SC1091
185
+ source "$(dirname "${BASH_SOURCE[0]}")/credential.sh" 2>/dev/null || return 1
186
+ fi
187
+
188
+ local cred_meta auto_relogin
189
+ cred_meta="$(credential_load "${cred_name}" 2>/dev/null)" || return 1
190
+ auto_relogin="$(jq -r '.auto_relogin // false' <<<"${cred_meta}" 2>/dev/null)"
191
+ [ "${auto_relogin}" = "true" ]
192
+ }
193
+
194
+ # _resolve_relogin_cred_name — resolves the credential name for retry. Mirrors
195
+ # session-resolution: prefer ARG_AS; fall back to site's default_session;
196
+ # return non-zero if neither.
197
+ _resolve_relogin_cred_name() {
198
+ if [ -n "${ARG_AS:-}" ]; then
199
+ printf '%s' "${ARG_AS}"
200
+ return 0
201
+ fi
202
+ if [ -n "${ARG_SITE:-}" ] && site_exists "${ARG_SITE}"; then
203
+ local profile default_session
204
+ profile="$(site_load "${ARG_SITE}")"
205
+ default_session="$(jq -r '.default_session // ""' <<<"${profile}" 2>/dev/null)"
206
+ if [ -n "${default_session}" ]; then
207
+ printf '%s' "${default_session}"
208
+ return 0
209
+ fi
210
+ fi
211
+ return 1
212
+ }
213
+
214
+ # _silent_relogin — runs `bash browser-login.sh --auto` for the resolved cred.
215
+ # Stdout/stderr suppressed by caller (`>/dev/null 2>&1`). Returns its exit code.
216
+ _silent_relogin() {
217
+ local cred_name
218
+ cred_name="$(_resolve_relogin_cred_name)" || return 1
219
+ local helpers_dir
220
+ helpers_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
221
+ bash "${helpers_dir}/../browser-login.sh" --auto --site "${ARG_SITE}" --as "${cred_name}"
222
+ }
@@ -0,0 +1,145 @@
1
+ #!/usr/bin/env bash
2
+ # scripts/lib/visual-rescue-default.sh — canonical Path 3 probe (text-mode).
3
+ #
4
+ # Implements the BROWSER_SKILL_VISUAL_RESCUE_CMD hook contract from
5
+ # scripts/browser-do.sh (Phase 14 Path 3). Decides whether a cached selector
6
+ # is still semantically present on the page by sending the CURRENT
7
+ # accessibility snapshot + the original intent to a local OpenAI-compatible
8
+ # VLM endpoint (default: http://127.0.0.1:8080 — same as scripts/browser-vlm.sh).
9
+ #
10
+ # Mode: text-based (v1). Reads the accessibility-tree YAML snapshot (cheap,
11
+ # ~2KB) and asks the VLM yes/no. NO screenshot is sent — a true vision-mode
12
+ # default ships in a future commit once the screenshot-from-live-session
13
+ # infrastructure lands.
14
+ #
15
+ # Why this is the right v1 default:
16
+ # - llama-server's text completion is much faster than vision (~200ms vs ~1500ms)
17
+ # - works against ANY OpenAI-compatible LLM, not just VLMs
18
+ # - accessibility snapshots already encode what UI is visible
19
+ # - no new infrastructure needed (browser-snapshot.sh is shipped)
20
+ #
21
+ # Hook contract (per browser-do.sh):
22
+ # $1 SITE $2 INTENT $3 CACHED_SELECTOR
23
+ # exit 0 + stdout "yes" → cache rescued
24
+ # exit 0 + stdout "no" → fall through to cloud LLM
25
+ # non-zero exit → fall through (treated as "unreachable")
26
+ #
27
+ # Env overrides:
28
+ # BROWSER_SKILL_VLM_HOST 127.0.0.1
29
+ # BROWSER_SKILL_VLM_PORT 8080
30
+ # BROWSER_SKILL_VLM_RESCUE_MODEL "q" (arbitrary tag; llama-server ignores)
31
+ # BROWSER_SKILL_VLM_RESCUE_TIMEOUT 30 (seconds, end-to-end)
32
+ # BROWSER_SKILL_SCRIPTS_DIR derived from BASH_SOURCE if unset
33
+ # BROWSER_SKILL_RESCUE_SNAPSHOT_BYTES 2048 (truncation cap for snapshot text)
34
+
35
+ set -euo pipefail
36
+ IFS=$'\n\t'
37
+
38
+ site="${1:-}"
39
+ intent="${2:-}"
40
+ selector="${3:-}"
41
+
42
+ if [ -z "${site}" ] || [ -z "${intent}" ] || [ -z "${selector}" ]; then
43
+ echo "no"
44
+ exit 2
45
+ fi
46
+
47
+ vlm_host="${BROWSER_SKILL_VLM_HOST:-127.0.0.1}"
48
+ vlm_port="${BROWSER_SKILL_VLM_PORT:-8080}"
49
+ vlm_model="${BROWSER_SKILL_VLM_RESCUE_MODEL:-q}"
50
+ vlm_timeout="${BROWSER_SKILL_VLM_RESCUE_TIMEOUT:-30}"
51
+ snap_cap="${BROWSER_SKILL_RESCUE_SNAPSHOT_BYTES:-2048}"
52
+ endpoint="http://${vlm_host}:${vlm_port}/v1/chat/completions"
53
+
54
+ # Gate 1: reachability. With lazy auto-start (default ON), the probe will
55
+ # try to spawn llama-server via browser-vlm.sh if it's down, and poll
56
+ # /health up to BROWSER_SKILL_LAZY_START_TIMEOUT seconds (default 60).
57
+ # Disable lazy-start by setting BROWSER_SKILL_LAZY_START=0 (the probe then
58
+ # fails fast like v1).
59
+ if ! curl -sfm 2 "http://${vlm_host}:${vlm_port}/health" >/dev/null 2>&1; then
60
+ if [ "${BROWSER_SKILL_LAZY_START:-1}" = "1" ]; then
61
+ SCRIPTS_DIR_FOR_VLM="${BROWSER_SKILL_SCRIPTS_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
62
+ vlm_script="${SCRIPTS_DIR_FOR_VLM}/browser-vlm.sh"
63
+ if [ -f "${vlm_script}" ]; then
64
+ # Start in background — browser-vlm.sh handles nohup + pidfile.
65
+ bash "${vlm_script}" start >/dev/null 2>&1 || true
66
+ # Poll until /health responds OR timeout.
67
+ timeout_s="${BROWSER_SKILL_LAZY_START_TIMEOUT:-60}"
68
+ waited=0
69
+ while [ "${waited}" -lt "${timeout_s}" ]; do
70
+ if curl -sfm 2 "http://${vlm_host}:${vlm_port}/health" >/dev/null 2>&1; then
71
+ break
72
+ fi
73
+ sleep 2
74
+ waited=$((waited + 2))
75
+ done
76
+ fi
77
+ fi
78
+ # Final reachability check — if still down, give up gracefully.
79
+ if ! curl -sfm 2 "http://${vlm_host}:${vlm_port}/health" >/dev/null 2>&1; then
80
+ echo "no"
81
+ exit 1
82
+ fi
83
+ fi
84
+
85
+ # Gate 2: locate browser-snapshot.sh. Default to the skill's own scripts dir
86
+ # resolved from this file's location.
87
+ SCRIPTS_DIR="${BROWSER_SKILL_SCRIPTS_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}"
88
+ snap_script="${SCRIPTS_DIR}/browser-snapshot.sh"
89
+ if [ ! -x "${snap_script}" ] && [ ! -f "${snap_script}" ]; then
90
+ echo "no"
91
+ exit 1
92
+ fi
93
+
94
+ # Gate 3: snapshot. browser-snapshot.sh emits NDJSON with summary as final
95
+ # line; large snapshots get a snapshot_path reference (Phase 14 #1).
96
+ snap_out="$(bash "${snap_script}" --site "${site}" 2>/dev/null | tail -1)"
97
+ [ -n "${snap_out}" ] || { echo "no"; exit 1; }
98
+
99
+ snap_text=""
100
+ snap_path="$(printf '%s' "${snap_out}" | jq -r '.snapshot_path // ""' 2>/dev/null)"
101
+ if [ -n "${snap_path}" ] && [ -f "${snap_path}" ]; then
102
+ snap_text="$(head -c "${snap_cap}" "${snap_path}")"
103
+ fi
104
+
105
+ # Fallback: no snapshot_path means inline (small page); just use whatever
106
+ # the summary itself carried as observed text. If neither path nor inline
107
+ # data lands, treat as unreachable.
108
+ if [ -z "${snap_text}" ]; then
109
+ snap_text="$(printf '%s' "${snap_out}" \
110
+ | jq -r '.url // "", .title // ""' 2>/dev/null \
111
+ | tr '\n' ' ' \
112
+ | head -c "${snap_cap}")"
113
+ fi
114
+
115
+ [ -n "${snap_text}" ] || { echo "no"; exit 1; }
116
+
117
+ # Gate 4: VLM probe. Yes/no prompt.
118
+ prompt="A user wants to: '${intent}'. The cached element selector was '${selector}'. Here is the current page's accessibility snapshot (first ${snap_cap} bytes):
119
+
120
+ ${snap_text}
121
+
122
+ Based ONLY on the snapshot, is there still an element on the page that matches the user's intent? Reply with ONLY one word: 'yes' or 'no'."
123
+
124
+ resp="$(curl -sS -m "${vlm_timeout}" "${endpoint}" \
125
+ -H 'Content-Type: application/json' \
126
+ -d "$(jq -nc --arg p "${prompt}" --arg m "${vlm_model}" '
127
+ {model:$m, max_tokens:5,
128
+ messages:[{role:"user",content:$p}]}')" 2>/dev/null)" \
129
+ || { echo "no"; exit 1; }
130
+
131
+ completion="$(printf '%s' "${resp}" | jq -r '.choices[0].message.content // ""' 2>/dev/null)"
132
+
133
+ case "${completion,,}" in
134
+ *yes*) echo "yes"; ;;
135
+ *) echo "no"; ;;
136
+ esac
137
+
138
+ # Phase 14+: touch a tracker file so the idle-stop watchdog (browser-vlm.sh
139
+ # start spawns one) can tell when the VLM was last actually used. Without
140
+ # this, /health pings from doctor + manual status checks would keep the
141
+ # server alive forever.
142
+ BROWSER_SKILL_HOME="${BROWSER_SKILL_HOME:-${HOME}/.browser-skill}"
143
+ mkdir -p "${BROWSER_SKILL_HOME}" 2>/dev/null || true
144
+ : > "${BROWSER_SKILL_HOME}/vlm.last-used" 2>/dev/null || true
145
+ exit 0