browser-automation-skill 0.71.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +144 -0
- package/SECURITY.md +39 -0
- package/SKILL.md +206 -0
- package/bin/cli.mjs +55 -0
- package/install.sh +143 -0
- package/package.json +54 -0
- package/references/adapter-candidates.md +40 -0
- package/references/browser-mcp-cheatsheet.md +132 -0
- package/references/browser-stats-cheatsheet.md +155 -0
- package/references/chrome-devtools-mcp-cheatsheet.md +232 -0
- package/references/midscene-integration.md +359 -0
- package/references/obscura-cheatsheet.md +103 -0
- package/references/playwright-cli-cheatsheet.md +64 -0
- package/references/playwright-lib-cheatsheet.md +90 -0
- package/references/recipes/add-a-tool-adapter.md +134 -0
- package/references/recipes/agent-workflows/README.md +37 -0
- package/references/recipes/agent-workflows/cache-driven-bulk-operation.md +110 -0
- package/references/recipes/agent-workflows/flow-record-and-replay.md +102 -0
- package/references/recipes/agent-workflows/incremental-pattern-discovery.md +125 -0
- package/references/recipes/agent-workflows/login-then-scrape.md +100 -0
- package/references/recipes/anti-patterns-tool-extension.md +182 -0
- package/references/recipes/body-bytes-not-body.md +139 -0
- package/references/recipes/cache-write-security.md +210 -0
- package/references/recipes/fingerprint-rescue.md +154 -0
- package/references/recipes/model-routing.md +143 -0
- package/references/recipes/path-security.md +138 -0
- package/references/recipes/privacy-canary.md +96 -0
- package/references/recipes/visual-rescue-hook.md +182 -0
- package/references/stats-prices.json +42 -0
- package/references/stats-schema.json +77 -0
- package/references/tool-versions.md +8 -0
- package/scripts/browser-add-site.sh +113 -0
- package/scripts/browser-assert.sh +106 -0
- package/scripts/browser-audit.sh +68 -0
- package/scripts/browser-baseline.sh +135 -0
- package/scripts/browser-click.sh +100 -0
- package/scripts/browser-creds-add.sh +254 -0
- package/scripts/browser-creds-list.sh +67 -0
- package/scripts/browser-creds-migrate.sh +122 -0
- package/scripts/browser-creds-remove.sh +69 -0
- package/scripts/browser-creds-rotate-totp.sh +109 -0
- package/scripts/browser-creds-show.sh +82 -0
- package/scripts/browser-creds-totp.sh +94 -0
- package/scripts/browser-do.sh +630 -0
- package/scripts/browser-doctor.sh +365 -0
- package/scripts/browser-drag.sh +90 -0
- package/scripts/browser-extract.sh +192 -0
- package/scripts/browser-fill.sh +142 -0
- package/scripts/browser-flow.sh +316 -0
- package/scripts/browser-history.sh +187 -0
- package/scripts/browser-hover.sh +92 -0
- package/scripts/browser-inspect.sh +188 -0
- package/scripts/browser-list-sessions.sh +78 -0
- package/scripts/browser-list-sites.sh +42 -0
- package/scripts/browser-login.sh +279 -0
- package/scripts/browser-mcp.sh +65 -0
- package/scripts/browser-migrate.sh +195 -0
- package/scripts/browser-open.sh +134 -0
- package/scripts/browser-press.sh +80 -0
- package/scripts/browser-remove-session.sh +72 -0
- package/scripts/browser-remove-site.sh +68 -0
- package/scripts/browser-replay.sh +206 -0
- package/scripts/browser-route.sh +174 -0
- package/scripts/browser-select.sh +122 -0
- package/scripts/browser-show-session.sh +57 -0
- package/scripts/browser-show-site.sh +37 -0
- package/scripts/browser-snapshot.sh +176 -0
- package/scripts/browser-stats.sh +522 -0
- package/scripts/browser-tab-close.sh +112 -0
- package/scripts/browser-tab-list.sh +70 -0
- package/scripts/browser-tab-switch.sh +111 -0
- package/scripts/browser-upload.sh +132 -0
- package/scripts/browser-use.sh +60 -0
- package/scripts/browser-vlm.sh +707 -0
- package/scripts/browser-wait.sh +97 -0
- package/scripts/install-git-hooks.sh +16 -0
- package/scripts/lib/capture.sh +356 -0
- package/scripts/lib/common.sh +262 -0
- package/scripts/lib/credential.sh +237 -0
- package/scripts/lib/fingerprint-rescue.js +123 -0
- package/scripts/lib/flow.sh +448 -0
- package/scripts/lib/flow_record.sh +210 -0
- package/scripts/lib/mask.sh +49 -0
- package/scripts/lib/memory.sh +427 -0
- package/scripts/lib/migrate.sh +390 -0
- package/scripts/lib/migrators/README.md +23 -0
- package/scripts/lib/migrators/memory/v1_to_v2.sh +15 -0
- package/scripts/lib/migrators/recent_urls/README.md +13 -0
- package/scripts/lib/migrators/stats/README.md +24 -0
- package/scripts/lib/node/chrome-devtools-bridge.mjs +1812 -0
- package/scripts/lib/node/mcp-server.mjs +531 -0
- package/scripts/lib/node/mcp-tools.json +68 -0
- package/scripts/lib/node/playwright-driver.mjs +1104 -0
- package/scripts/lib/node/totp-core.mjs +52 -0
- package/scripts/lib/node/totp.mjs +52 -0
- package/scripts/lib/node/url-pattern-cluster.mjs +102 -0
- package/scripts/lib/node/url-pattern-resolver.mjs +77 -0
- package/scripts/lib/output.sh +79 -0
- package/scripts/lib/router.sh +342 -0
- package/scripts/lib/sanitize.sh +107 -0
- package/scripts/lib/secret/keychain.sh +91 -0
- package/scripts/lib/secret/libsecret.sh +74 -0
- package/scripts/lib/secret/plaintext.sh +75 -0
- package/scripts/lib/secret_backend_select.sh +57 -0
- package/scripts/lib/session.sh +153 -0
- package/scripts/lib/site.sh +126 -0
- package/scripts/lib/stats.sh +419 -0
- package/scripts/lib/tool/.gitkeep +0 -0
- package/scripts/lib/tool/chrome-devtools-mcp.sh +349 -0
- package/scripts/lib/tool/obscura.sh +249 -0
- package/scripts/lib/tool/playwright-cli.sh +155 -0
- package/scripts/lib/tool/playwright-lib.sh +106 -0
- package/scripts/lib/verb_helpers.sh +222 -0
- package/scripts/lib/visual-rescue-default.sh +145 -0
- package/scripts/regenerate-docs.sh +99 -0
- package/uninstall.sh +51 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# scripts/browser-wait.sh — explicit wait for element state.
|
|
3
|
+
# Usage: bash scripts/browser-wait.sh [--site NAME] [--tool NAME] [--dry-run]
|
|
4
|
+
# [--raw] --selector CSS
|
|
5
|
+
# [--state visible|hidden|attached|detached]
|
|
6
|
+
# [--timeout MS]
|
|
7
|
+
#
|
|
8
|
+
# Routes to chrome-devtools-mcp by default (Phase 6 part 4). Stateless —
|
|
9
|
+
# works one-shot or daemon-routed (parallel to eval/audit/snapshot).
|
|
10
|
+
|
|
11
|
+
set -euo pipefail
|
|
12
|
+
IFS=$'\n\t'
|
|
13
|
+
|
|
14
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
15
|
+
# shellcheck source=lib/common.sh
|
|
16
|
+
# shellcheck disable=SC1091
|
|
17
|
+
source "${SCRIPT_DIR}/lib/common.sh"
|
|
18
|
+
# shellcheck source=lib/output.sh
|
|
19
|
+
# shellcheck disable=SC1091
|
|
20
|
+
source "${SCRIPT_DIR}/lib/output.sh"
|
|
21
|
+
# shellcheck source=lib/router.sh
|
|
22
|
+
# shellcheck disable=SC1091
|
|
23
|
+
source "${SCRIPT_DIR}/lib/router.sh"
|
|
24
|
+
# shellcheck source=lib/verb_helpers.sh
|
|
25
|
+
# shellcheck disable=SC1091
|
|
26
|
+
source "${SCRIPT_DIR}/lib/verb_helpers.sh"
|
|
27
|
+
|
|
28
|
+
init_paths
|
|
29
|
+
|
|
30
|
+
SUMMARY_T0="$(now_ms)"; export SUMMARY_T0
|
|
31
|
+
|
|
32
|
+
parse_verb_globals "$@"
|
|
33
|
+
|
|
34
|
+
resolve_session_storage_state
|
|
35
|
+
|
|
36
|
+
selector="" state="" timeout=""
|
|
37
|
+
verb_argv=()
|
|
38
|
+
i=0
|
|
39
|
+
while [ "${i}" -lt "${#REMAINING_ARGV[@]}" ]; do
|
|
40
|
+
case "${REMAINING_ARGV[i]}" in
|
|
41
|
+
--selector)
|
|
42
|
+
selector="${REMAINING_ARGV[i+1]:-}"
|
|
43
|
+
[ -n "${selector}" ] || die "${EXIT_USAGE_ERROR}" "--selector requires a value"
|
|
44
|
+
verb_argv+=(--selector "${selector}")
|
|
45
|
+
i=$((i + 2))
|
|
46
|
+
;;
|
|
47
|
+
--state)
|
|
48
|
+
state="${REMAINING_ARGV[i+1]:-}"
|
|
49
|
+
[ -n "${state}" ] || die "${EXIT_USAGE_ERROR}" "--state requires a value"
|
|
50
|
+
case "${state}" in
|
|
51
|
+
visible|hidden|attached|detached) ;;
|
|
52
|
+
*) die "${EXIT_USAGE_ERROR}" "--state must be one of {visible, hidden, attached, detached} (got: ${state})" ;;
|
|
53
|
+
esac
|
|
54
|
+
verb_argv+=(--state "${state}")
|
|
55
|
+
i=$((i + 2))
|
|
56
|
+
;;
|
|
57
|
+
--timeout)
|
|
58
|
+
timeout="${REMAINING_ARGV[i+1]:-}"
|
|
59
|
+
[ -n "${timeout}" ] || die "${EXIT_USAGE_ERROR}" "--timeout requires a value (milliseconds)"
|
|
60
|
+
verb_argv+=(--timeout "${timeout}")
|
|
61
|
+
i=$((i + 2))
|
|
62
|
+
;;
|
|
63
|
+
*)
|
|
64
|
+
verb_argv+=("${REMAINING_ARGV[i]}")
|
|
65
|
+
i=$((i + 1))
|
|
66
|
+
;;
|
|
67
|
+
esac
|
|
68
|
+
done
|
|
69
|
+
|
|
70
|
+
[ -n "${selector}" ] || die "${EXIT_USAGE_ERROR}" "wait requires --selector CSS"
|
|
71
|
+
|
|
72
|
+
if [ "${ARG_DRY_RUN:-0}" = "1" ]; then
|
|
73
|
+
ok "dry-run: would wait for ${selector} (state=${state:-visible}, timeout=${timeout:-default})"
|
|
74
|
+
emit_summary verb=wait tool=none why=dry-run status=ok selector="${selector}" \
|
|
75
|
+
state="${state}" timeout="${timeout}" dry_run=true
|
|
76
|
+
exit 0
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
picked="$(pick_tool wait "${verb_argv[@]}")"
|
|
80
|
+
tool_name="${picked%%$'\t'*}"
|
|
81
|
+
why="${picked#*$'\t'}"
|
|
82
|
+
|
|
83
|
+
source_picked_adapter "${tool_name}"
|
|
84
|
+
|
|
85
|
+
set +e
|
|
86
|
+
adapter_out="$(invoke_with_retry wait "${verb_argv[@]}")"
|
|
87
|
+
adapter_rc=$?
|
|
88
|
+
set -e
|
|
89
|
+
|
|
90
|
+
[ -n "${adapter_out}" ] && printf '%s\n' "${adapter_out}"
|
|
91
|
+
|
|
92
|
+
if [ "${adapter_rc}" -eq 0 ]; then
|
|
93
|
+
emit_summary verb=wait tool="${tool_name}" why="${why}" status=ok selector="${selector}"
|
|
94
|
+
exit 0
|
|
95
|
+
fi
|
|
96
|
+
emit_summary verb=wait tool="${tool_name}" why="${why}" status=error selector="${selector}"
|
|
97
|
+
exit "${adapter_rc}"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
4
|
+
|
|
5
|
+
if [ ! -d "${REPO_ROOT}/.git" ] && [ ! -f "${REPO_ROOT}/.git" ]; then
|
|
6
|
+
printf 'not a git checkout: %s\n' "${REPO_ROOT}" >&2
|
|
7
|
+
exit 0
|
|
8
|
+
fi
|
|
9
|
+
if ! command -v git >/dev/null 2>&1; then
|
|
10
|
+
printf 'git not on PATH; cannot install hooks\n' >&2
|
|
11
|
+
exit 0
|
|
12
|
+
fi
|
|
13
|
+
|
|
14
|
+
git -C "${REPO_ROOT}" config core.hooksPath .githooks
|
|
15
|
+
chmod +x "${REPO_ROOT}/.githooks/pre-commit"
|
|
16
|
+
printf 'pre-commit hook active (.githooks/pre-commit)\n'
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
# scripts/lib/capture.sh — capture artifact pipeline (Phase 7 part 1-i).
|
|
2
|
+
#
|
|
3
|
+
# Three-function API:
|
|
4
|
+
# capture_init_dir — idempotent mkdir 0700 of ${CAPTURES_DIR}
|
|
5
|
+
# capture_start <verb> — atomic NNN allocation + meta.json (in_progress)
|
|
6
|
+
# capture_finish [status] — finalize meta.json + update _index.json
|
|
7
|
+
#
|
|
8
|
+
# Verbs sandwich their per-aspect file writes between capture_start and
|
|
9
|
+
# capture_finish. After capture_start: ${CAPTURE_ID} + ${CAPTURE_DIR} are
|
|
10
|
+
# exported; the verb writes any per-aspect files (snapshot.json,
|
|
11
|
+
# console.json, network.har, screenshot.png, etc.) into ${CAPTURE_DIR};
|
|
12
|
+
# capture_finish recomputes total_bytes + files inventory.
|
|
13
|
+
#
|
|
14
|
+
# 7-i scope: no sanitization (7-iii), no retention/prune (7-v), no
|
|
15
|
+
# --unsanitized (7-iv). Wired only to snapshot — structurally safe (no
|
|
16
|
+
# headers, no cookies). Sanitization arrives when console.json + network.har
|
|
17
|
+
# enter the picture.
|
|
18
|
+
#
|
|
19
|
+
# Atomicity: NNN allocation uses tmpfile + rename(2) per parent spec §4.5
|
|
20
|
+
# ("tmpfile + mv, no flock"). Single-process per invocation expected; two
|
|
21
|
+
# concurrent capture_starts could race on the same NNN. v1 design doesn't
|
|
22
|
+
# pay flock complexity. Future hardening: mkdir without -p so the second
|
|
23
|
+
# loser fails fast → retry with bumped id.
|
|
24
|
+
|
|
25
|
+
[ -n "${_BROWSER_LIB_CAPTURE_LOADED:-}" ] && return 0
|
|
26
|
+
readonly _BROWSER_LIB_CAPTURE_LOADED=1
|
|
27
|
+
|
|
28
|
+
# capture_init_dir
|
|
29
|
+
# Ensure ${CAPTURES_DIR} exists, mode 0700. No-op if already correct.
|
|
30
|
+
capture_init_dir() {
|
|
31
|
+
if [ ! -d "${CAPTURES_DIR}" ]; then
|
|
32
|
+
mkdir -p "${CAPTURES_DIR}"
|
|
33
|
+
chmod 700 "${CAPTURES_DIR}"
|
|
34
|
+
fi
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# _capture_iso_now — UTC ISO 8601, second precision. Cross-platform.
|
|
38
|
+
_capture_iso_now() {
|
|
39
|
+
date -u +'%Y-%m-%dT%H:%M:%SZ'
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
# _capture_iso_to_epoch — ISO 8601 string → epoch seconds. GNU date first
|
|
43
|
+
# (Linux + coreutils-installed-Mac); BSD date fallback. Returns 0 on parse
|
|
44
|
+
# failure (caller treats 0 as "very old", which is safe for prune logic).
|
|
45
|
+
_capture_iso_to_epoch() {
|
|
46
|
+
local iso="$1"
|
|
47
|
+
date -d "${iso}" +%s 2>/dev/null \
|
|
48
|
+
|| date -j -f '%Y-%m-%dT%H:%M:%SZ' "${iso}" +%s 2>/dev/null \
|
|
49
|
+
|| printf '0'
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
# _capture_now_epoch — current epoch seconds.
|
|
53
|
+
# Honors BROWSER_SKILL_CAPTURE_NOW_EPOCH (test-only seam) so age-threshold
|
|
54
|
+
# tests can anchor to a fixed wall-clock and survive calendar drift. Without
|
|
55
|
+
# the override the prod code path remains a single `date +%s` fork.
|
|
56
|
+
_capture_now_epoch() {
|
|
57
|
+
if [ -n "${BROWSER_SKILL_CAPTURE_NOW_EPOCH:-}" ]; then
|
|
58
|
+
printf '%s\n' "${BROWSER_SKILL_CAPTURE_NOW_EPOCH}"
|
|
59
|
+
else
|
|
60
|
+
date +%s
|
|
61
|
+
fi
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
# _capture_read_config — emits {retention_count, retention_days, warn_at_pct}
|
|
65
|
+
# from ${CONFIG_FILE}, falling back to spec §4.5 defaults on missing file or
|
|
66
|
+
# missing fields.
|
|
67
|
+
_capture_read_config() {
|
|
68
|
+
if [ -f "${CONFIG_FILE}" ]; then
|
|
69
|
+
jq '{
|
|
70
|
+
retention_count: (.retention_count // 500),
|
|
71
|
+
retention_days: (.retention_days // 14),
|
|
72
|
+
warn_at_pct: (.warn_at_pct // 90)
|
|
73
|
+
}' "${CONFIG_FILE}" 2>/dev/null \
|
|
74
|
+
|| printf '{"retention_count":500,"retention_days":14,"warn_at_pct":90}'
|
|
75
|
+
else
|
|
76
|
+
printf '{"retention_count":500,"retention_days":14,"warn_at_pct":90}'
|
|
77
|
+
fi
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# _capture_pad_id N → "001" (3-digit zero-padded; %d at >=1000)
|
|
81
|
+
_capture_pad_id() {
|
|
82
|
+
local n="$1"
|
|
83
|
+
if [ "${n}" -lt 1000 ]; then
|
|
84
|
+
printf '%03d' "${n}"
|
|
85
|
+
else
|
|
86
|
+
printf '%d' "${n}"
|
|
87
|
+
fi
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
# _capture_read_next_id → echoes the next unused integer id.
|
|
91
|
+
# Reads ${CAPTURES_DIR}/_index.json; defaults to 1 if absent or unreadable.
|
|
92
|
+
_capture_read_next_id() {
|
|
93
|
+
local idx="${CAPTURES_DIR}/_index.json"
|
|
94
|
+
if [ -f "${idx}" ]; then
|
|
95
|
+
jq -r '.next_id // 1' "${idx}" 2>/dev/null || printf '1'
|
|
96
|
+
else
|
|
97
|
+
printf '1'
|
|
98
|
+
fi
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# _capture_write_index <next_id> <count> <latest> <total_bytes>
|
|
102
|
+
# Atomic write via tmpfile + mv (same FS guarantees rename(2) atomicity).
|
|
103
|
+
_capture_write_index() {
|
|
104
|
+
local next_id="$1" count="$2" latest="$3" total_bytes="$4"
|
|
105
|
+
local idx="${CAPTURES_DIR}/_index.json"
|
|
106
|
+
local tmp="${idx}.tmp.$$"
|
|
107
|
+
jq -n \
|
|
108
|
+
--argjson schema_version 1 \
|
|
109
|
+
--argjson next_id "${next_id}" \
|
|
110
|
+
--argjson count "${count}" \
|
|
111
|
+
--arg latest "${latest}" \
|
|
112
|
+
--argjson total_bytes "${total_bytes}" \
|
|
113
|
+
'{schema_version: $schema_version, next_id: $next_id, count: $count, latest: $latest, total_bytes: $total_bytes}' \
|
|
114
|
+
> "${tmp}"
|
|
115
|
+
chmod 600 "${tmp}"
|
|
116
|
+
mv "${tmp}" "${idx}"
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# capture_start <verb>
|
|
120
|
+
# Allocates the next NNN, mkdir 0700, writes meta.json (status=in_progress),
|
|
121
|
+
# bumps _index.next_id, exports CAPTURE_ID + CAPTURE_DIR.
|
|
122
|
+
capture_start() {
|
|
123
|
+
local verb="${1:-unknown}"
|
|
124
|
+
capture_init_dir
|
|
125
|
+
|
|
126
|
+
local next_id padded
|
|
127
|
+
next_id="$(_capture_read_next_id)"
|
|
128
|
+
padded="$(_capture_pad_id "${next_id}")"
|
|
129
|
+
|
|
130
|
+
CAPTURE_ID="${padded}"
|
|
131
|
+
CAPTURE_DIR="${CAPTURES_DIR}/${padded}"
|
|
132
|
+
export CAPTURE_ID CAPTURE_DIR
|
|
133
|
+
|
|
134
|
+
mkdir -p "${CAPTURE_DIR}"
|
|
135
|
+
chmod 700 "${CAPTURE_DIR}"
|
|
136
|
+
|
|
137
|
+
local meta="${CAPTURE_DIR}/meta.json"
|
|
138
|
+
local tmp="${meta}.tmp.$$"
|
|
139
|
+
jq -n \
|
|
140
|
+
--arg capture_id "${padded}" \
|
|
141
|
+
--arg verb "${verb}" \
|
|
142
|
+
--argjson schema_version 1 \
|
|
143
|
+
--arg started_at "$(_capture_iso_now)" \
|
|
144
|
+
--arg status "in_progress" \
|
|
145
|
+
'{capture_id: $capture_id, verb: $verb, schema_version: $schema_version, started_at: $started_at, status: $status}' \
|
|
146
|
+
> "${tmp}"
|
|
147
|
+
chmod 600 "${tmp}"
|
|
148
|
+
mv "${tmp}" "${meta}"
|
|
149
|
+
|
|
150
|
+
# Bump _index.next_id immediately (allocation-time bump). count + latest +
|
|
151
|
+
# total_bytes will be authoritative after capture_finish; for now hold the
|
|
152
|
+
# prior values where present. New _index gets count=0 here; capture_finish
|
|
153
|
+
# increments it.
|
|
154
|
+
local idx="${CAPTURES_DIR}/_index.json"
|
|
155
|
+
local count latest total_bytes
|
|
156
|
+
if [ -f "${idx}" ]; then
|
|
157
|
+
count="$(jq -r '.count // 0' "${idx}" 2>/dev/null || printf '0')"
|
|
158
|
+
latest="$(jq -r '.latest // ""' "${idx}" 2>/dev/null || printf '')"
|
|
159
|
+
total_bytes="$(jq -r '.total_bytes // 0' "${idx}" 2>/dev/null || printf '0')"
|
|
160
|
+
else
|
|
161
|
+
count=0
|
|
162
|
+
latest=""
|
|
163
|
+
total_bytes=0
|
|
164
|
+
fi
|
|
165
|
+
_capture_write_index "$((next_id + 1))" "${count}" "${latest}" "${total_bytes}"
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
# _capture_file_size <path> → byte count (cross-platform).
|
|
169
|
+
_capture_file_size() {
|
|
170
|
+
local p="$1"
|
|
171
|
+
stat -c '%s' "${p}" 2>/dev/null || stat -f '%z' "${p}" 2>/dev/null || printf '0'
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
# _capture_inventory <dir> → JSON array of {name, bytes} for every regular file
|
|
175
|
+
# in <dir> (sorted by name). meta.json is NOT excluded — its size is part of
|
|
176
|
+
# total_bytes. Subdirectories (downloads/) handled in a future sub-part.
|
|
177
|
+
_capture_inventory() {
|
|
178
|
+
local dir="$1"
|
|
179
|
+
local entries=()
|
|
180
|
+
local f name bytes
|
|
181
|
+
# /usr/bin/find avoids any rtk fff aliasing.
|
|
182
|
+
while IFS= read -r f; do
|
|
183
|
+
name="$(basename "${f}")"
|
|
184
|
+
bytes="$(_capture_file_size "${f}")"
|
|
185
|
+
entries+=("$(jq -n --arg name "${name}" --argjson bytes "${bytes}" '{name: $name, bytes: $bytes}')")
|
|
186
|
+
done < <(/usr/bin/find "${dir}" -maxdepth 1 -type f | sort)
|
|
187
|
+
|
|
188
|
+
if [ "${#entries[@]}" -eq 0 ]; then
|
|
189
|
+
printf '[]'
|
|
190
|
+
else
|
|
191
|
+
printf '%s\n' "${entries[@]}" | jq -s '.'
|
|
192
|
+
fi
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
# capture_finish [status] [sanitized]
|
|
196
|
+
# Default status: "ok". Default sanitized: "true".
|
|
197
|
+
# Updates meta.json (finished_at, status, sanitized, total_bytes, files);
|
|
198
|
+
# updates _index.json (count, latest, total_bytes).
|
|
199
|
+
#
|
|
200
|
+
# Phase 7 part 1-iv: optional 2nd arg `sanitized` ∈ {true, false}. Writes
|
|
201
|
+
# meta.sanitized field for audit. Field is always present in v1+ schema;
|
|
202
|
+
# default true matches the sanitized-by-default contract.
|
|
203
|
+
capture_finish() {
|
|
204
|
+
local status="${1:-ok}"
|
|
205
|
+
local sanitized="${2:-true}"
|
|
206
|
+
: "${CAPTURE_DIR:?capture_finish: CAPTURE_DIR not set (call capture_start first)}"
|
|
207
|
+
: "${CAPTURE_ID:?capture_finish: CAPTURE_ID not set (call capture_start first)}"
|
|
208
|
+
|
|
209
|
+
local meta="${CAPTURE_DIR}/meta.json"
|
|
210
|
+
[ -f "${meta}" ] || die "${EXIT_GENERIC_ERROR:-1}" "capture_finish: meta.json missing for ${CAPTURE_ID}"
|
|
211
|
+
|
|
212
|
+
local files_json total_bytes
|
|
213
|
+
files_json="$(_capture_inventory "${CAPTURE_DIR}")"
|
|
214
|
+
total_bytes="$(printf '%s' "${files_json}" | jq '[.[].bytes] | add // 0')"
|
|
215
|
+
|
|
216
|
+
local tmp="${meta}.tmp.$$"
|
|
217
|
+
jq \
|
|
218
|
+
--arg finished_at "$(_capture_iso_now)" \
|
|
219
|
+
--arg status "${status}" \
|
|
220
|
+
--argjson sanitized "${sanitized}" \
|
|
221
|
+
--argjson total_bytes "${total_bytes}" \
|
|
222
|
+
--argjson files "${files_json}" \
|
|
223
|
+
'. + {finished_at: $finished_at, status: $status, sanitized: $sanitized, total_bytes: $total_bytes, files: $files}' \
|
|
224
|
+
"${meta}" > "${tmp}"
|
|
225
|
+
chmod 600 "${tmp}"
|
|
226
|
+
mv "${tmp}" "${meta}"
|
|
227
|
+
|
|
228
|
+
# Update _index.json: count is the count of capture dirs on disk; latest is
|
|
229
|
+
# this CAPTURE_ID; total_bytes is the sum across all capture dirs (cached
|
|
230
|
+
# for doctor UX). Pruning will keep this honest in 7-v.
|
|
231
|
+
local idx="${CAPTURES_DIR}/_index.json"
|
|
232
|
+
local next_id
|
|
233
|
+
if [ -f "${idx}" ]; then
|
|
234
|
+
next_id="$(jq -r '.next_id // 1' "${idx}" 2>/dev/null || printf '1')"
|
|
235
|
+
else
|
|
236
|
+
next_id=1
|
|
237
|
+
fi
|
|
238
|
+
local on_disk_count
|
|
239
|
+
on_disk_count="$(/usr/bin/find "${CAPTURES_DIR}" -maxdepth 1 -mindepth 1 -type d | wc -l | tr -d ' ')"
|
|
240
|
+
local on_disk_total
|
|
241
|
+
on_disk_total="$(/usr/bin/find "${CAPTURES_DIR}" -mindepth 2 -type f -exec stat -c '%s' {} + 2>/dev/null \
|
|
242
|
+
|| /usr/bin/find "${CAPTURES_DIR}" -mindepth 2 -type f -exec stat -f '%z' {} + 2>/dev/null \
|
|
243
|
+
|| printf '')"
|
|
244
|
+
local on_disk_total_sum
|
|
245
|
+
on_disk_total_sum="$(printf '%s\n' "${on_disk_total}" | awk '{s+=$1} END {print s+0}')"
|
|
246
|
+
|
|
247
|
+
_capture_write_index "${next_id}" "${on_disk_count}" "${CAPTURE_ID}" "${on_disk_total_sum}"
|
|
248
|
+
|
|
249
|
+
# Phase 7 part 1-v: auto-prune at the end of every successful finalize.
|
|
250
|
+
# Idempotent — no-op when state is under thresholds.
|
|
251
|
+
capture_prune
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# capture_prune (Phase 7 part 1-v)
|
|
255
|
+
# Reads ${CONFIG_FILE} thresholds (defaults if missing/null per spec §4.5).
|
|
256
|
+
# Walks ${CAPTURES_DIR}/*/meta.json; computes age + count.
|
|
257
|
+
# Splices oldest-first while EITHER threshold exceeded.
|
|
258
|
+
# Skip rules:
|
|
259
|
+
# - meta.is_baseline == true (Phase 8 forward-compat — never prune)
|
|
260
|
+
# - meta.status == "in_progress" (in-flight; never prune)
|
|
261
|
+
# After prune: recomputes _index.json (count, latest, total_bytes).
|
|
262
|
+
# Idempotent.
|
|
263
|
+
capture_prune() {
|
|
264
|
+
[ -d "${CAPTURES_DIR}" ] || return 0
|
|
265
|
+
|
|
266
|
+
local config retention_count retention_days
|
|
267
|
+
config="$(_capture_read_config)"
|
|
268
|
+
retention_count="$(printf '%s' "${config}" | jq -r '.retention_count')"
|
|
269
|
+
retention_days="$(printf '%s' "${config}" | jq -r '.retention_days')"
|
|
270
|
+
|
|
271
|
+
local now_epoch age_threshold_sec
|
|
272
|
+
now_epoch="$(_capture_now_epoch)"
|
|
273
|
+
age_threshold_sec=$(( retention_days * 86400 ))
|
|
274
|
+
|
|
275
|
+
# Build sorted (oldest-first) prunable-id list.
|
|
276
|
+
# Format: "<epoch>\t<id>\t<is_baseline>\t<status>"
|
|
277
|
+
local entries
|
|
278
|
+
entries="$(
|
|
279
|
+
/usr/bin/find "${CAPTURES_DIR}" -mindepth 1 -maxdepth 1 -type d 2>/dev/null \
|
|
280
|
+
| while read -r dir; do
|
|
281
|
+
local id meta started_at started_epoch is_baseline status
|
|
282
|
+
id="$(basename "${dir}")"
|
|
283
|
+
meta="${dir}/meta.json"
|
|
284
|
+
[ -f "${meta}" ] || continue
|
|
285
|
+
started_at="$(jq -r '.started_at // ""' "${meta}" 2>/dev/null)"
|
|
286
|
+
[ -n "${started_at}" ] || continue
|
|
287
|
+
started_epoch="$(_capture_iso_to_epoch "${started_at}")"
|
|
288
|
+
is_baseline="$(jq -r '.is_baseline // false' "${meta}" 2>/dev/null || printf 'false')"
|
|
289
|
+
status="$(jq -r '.status // "in_progress"' "${meta}" 2>/dev/null || printf 'in_progress')"
|
|
290
|
+
printf '%s\t%s\t%s\t%s\n' "${started_epoch}" "${id}" "${is_baseline}" "${status}"
|
|
291
|
+
done \
|
|
292
|
+
| sort -n
|
|
293
|
+
)"
|
|
294
|
+
|
|
295
|
+
[ -n "${entries}" ] || return 0
|
|
296
|
+
|
|
297
|
+
# First pass: collect prunable + protected counts.
|
|
298
|
+
local total_count protected_count prunable_count
|
|
299
|
+
total_count="$(printf '%s\n' "${entries}" | wc -l | tr -d ' ')"
|
|
300
|
+
protected_count="$(printf '%s\n' "${entries}" | awk -F'\t' '$3 == "true" || $4 == "in_progress"' | wc -l | tr -d ' ')"
|
|
301
|
+
prunable_count=$(( total_count - protected_count ))
|
|
302
|
+
|
|
303
|
+
# Walk oldest-first; prune while either threshold exceeded AND entry is prunable.
|
|
304
|
+
local pruned_any=0
|
|
305
|
+
while IFS=$'\t' read -r entry_epoch entry_id entry_baseline entry_status; do
|
|
306
|
+
[ -n "${entry_id}" ] || continue
|
|
307
|
+
# Skip protected.
|
|
308
|
+
if [ "${entry_baseline}" = "true" ] || [ "${entry_status}" = "in_progress" ]; then
|
|
309
|
+
continue
|
|
310
|
+
fi
|
|
311
|
+
# Should we prune this one?
|
|
312
|
+
local age_sec=$(( now_epoch - entry_epoch ))
|
|
313
|
+
local exceeds_count=0 exceeds_age=0
|
|
314
|
+
if [ "${total_count}" -gt "${retention_count}" ]; then
|
|
315
|
+
exceeds_count=1
|
|
316
|
+
fi
|
|
317
|
+
if [ "${age_sec}" -gt "${age_threshold_sec}" ]; then
|
|
318
|
+
exceeds_age=1
|
|
319
|
+
fi
|
|
320
|
+
if [ "${exceeds_count}" = "0" ] && [ "${exceeds_age}" = "0" ]; then
|
|
321
|
+
# Under both thresholds — stop (subsequent entries are newer).
|
|
322
|
+
break
|
|
323
|
+
fi
|
|
324
|
+
# Prune.
|
|
325
|
+
rm -rf "${CAPTURES_DIR:?}/${entry_id}"
|
|
326
|
+
warn "capture_prune: pruned captures/${entry_id}/ (age=${age_sec}s, baseline=${entry_baseline})" >&2
|
|
327
|
+
pruned_any=1
|
|
328
|
+
total_count=$(( total_count - 1 ))
|
|
329
|
+
done <<< "${entries}"
|
|
330
|
+
|
|
331
|
+
# Recompute _index.json post-prune.
|
|
332
|
+
if [ "${pruned_any}" = "1" ]; then
|
|
333
|
+
local idx="${CAPTURES_DIR}/_index.json"
|
|
334
|
+
local next_id
|
|
335
|
+
if [ -f "${idx}" ]; then
|
|
336
|
+
next_id="$(jq -r '.next_id // 1' "${idx}" 2>/dev/null || printf '1')"
|
|
337
|
+
else
|
|
338
|
+
next_id=1
|
|
339
|
+
fi
|
|
340
|
+
local on_disk_count
|
|
341
|
+
on_disk_count="$(/usr/bin/find "${CAPTURES_DIR}" -maxdepth 1 -mindepth 1 -type d | wc -l | tr -d ' ')"
|
|
342
|
+
local on_disk_total
|
|
343
|
+
on_disk_total="$(/usr/bin/find "${CAPTURES_DIR}" -mindepth 2 -type f -exec stat -c '%s' {} + 2>/dev/null \
|
|
344
|
+
|| /usr/bin/find "${CAPTURES_DIR}" -mindepth 2 -type f -exec stat -f '%z' {} + 2>/dev/null \
|
|
345
|
+
|| printf '')"
|
|
346
|
+
local on_disk_total_sum
|
|
347
|
+
on_disk_total_sum="$(printf '%s\n' "${on_disk_total}" | awk '{s+=$1} END {print s+0}')"
|
|
348
|
+
|
|
349
|
+
# Latest = highest-id surviving capture (lexicographic on padded NNN).
|
|
350
|
+
local latest_id
|
|
351
|
+
latest_id="$(/usr/bin/find "${CAPTURES_DIR}" -maxdepth 1 -mindepth 1 -type d | xargs -I{} basename {} | sort -r | head -1)"
|
|
352
|
+
[ -n "${latest_id}" ] || latest_id=""
|
|
353
|
+
|
|
354
|
+
_capture_write_index "${next_id}" "${on_disk_count}" "${latest_id}" "${on_disk_total_sum}"
|
|
355
|
+
fi
|
|
356
|
+
}
|