browser-automation-skill 0.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +144 -0
  3. package/SECURITY.md +39 -0
  4. package/SKILL.md +206 -0
  5. package/bin/cli.mjs +55 -0
  6. package/install.sh +143 -0
  7. package/package.json +54 -0
  8. package/references/adapter-candidates.md +40 -0
  9. package/references/browser-mcp-cheatsheet.md +132 -0
  10. package/references/browser-stats-cheatsheet.md +155 -0
  11. package/references/chrome-devtools-mcp-cheatsheet.md +232 -0
  12. package/references/midscene-integration.md +359 -0
  13. package/references/obscura-cheatsheet.md +103 -0
  14. package/references/playwright-cli-cheatsheet.md +64 -0
  15. package/references/playwright-lib-cheatsheet.md +90 -0
  16. package/references/recipes/add-a-tool-adapter.md +134 -0
  17. package/references/recipes/agent-workflows/README.md +37 -0
  18. package/references/recipes/agent-workflows/cache-driven-bulk-operation.md +110 -0
  19. package/references/recipes/agent-workflows/flow-record-and-replay.md +102 -0
  20. package/references/recipes/agent-workflows/incremental-pattern-discovery.md +125 -0
  21. package/references/recipes/agent-workflows/login-then-scrape.md +100 -0
  22. package/references/recipes/anti-patterns-tool-extension.md +182 -0
  23. package/references/recipes/body-bytes-not-body.md +139 -0
  24. package/references/recipes/cache-write-security.md +210 -0
  25. package/references/recipes/fingerprint-rescue.md +154 -0
  26. package/references/recipes/model-routing.md +143 -0
  27. package/references/recipes/path-security.md +138 -0
  28. package/references/recipes/privacy-canary.md +96 -0
  29. package/references/recipes/visual-rescue-hook.md +182 -0
  30. package/references/stats-prices.json +42 -0
  31. package/references/stats-schema.json +77 -0
  32. package/references/tool-versions.md +8 -0
  33. package/scripts/browser-add-site.sh +113 -0
  34. package/scripts/browser-assert.sh +106 -0
  35. package/scripts/browser-audit.sh +68 -0
  36. package/scripts/browser-baseline.sh +135 -0
  37. package/scripts/browser-click.sh +100 -0
  38. package/scripts/browser-creds-add.sh +254 -0
  39. package/scripts/browser-creds-list.sh +67 -0
  40. package/scripts/browser-creds-migrate.sh +122 -0
  41. package/scripts/browser-creds-remove.sh +69 -0
  42. package/scripts/browser-creds-rotate-totp.sh +109 -0
  43. package/scripts/browser-creds-show.sh +82 -0
  44. package/scripts/browser-creds-totp.sh +94 -0
  45. package/scripts/browser-do.sh +630 -0
  46. package/scripts/browser-doctor.sh +365 -0
  47. package/scripts/browser-drag.sh +90 -0
  48. package/scripts/browser-extract.sh +192 -0
  49. package/scripts/browser-fill.sh +142 -0
  50. package/scripts/browser-flow.sh +316 -0
  51. package/scripts/browser-history.sh +187 -0
  52. package/scripts/browser-hover.sh +92 -0
  53. package/scripts/browser-inspect.sh +188 -0
  54. package/scripts/browser-list-sessions.sh +78 -0
  55. package/scripts/browser-list-sites.sh +42 -0
  56. package/scripts/browser-login.sh +279 -0
  57. package/scripts/browser-mcp.sh +65 -0
  58. package/scripts/browser-migrate.sh +195 -0
  59. package/scripts/browser-open.sh +134 -0
  60. package/scripts/browser-press.sh +80 -0
  61. package/scripts/browser-remove-session.sh +72 -0
  62. package/scripts/browser-remove-site.sh +68 -0
  63. package/scripts/browser-replay.sh +206 -0
  64. package/scripts/browser-route.sh +174 -0
  65. package/scripts/browser-select.sh +122 -0
  66. package/scripts/browser-show-session.sh +57 -0
  67. package/scripts/browser-show-site.sh +37 -0
  68. package/scripts/browser-snapshot.sh +176 -0
  69. package/scripts/browser-stats.sh +522 -0
  70. package/scripts/browser-tab-close.sh +112 -0
  71. package/scripts/browser-tab-list.sh +70 -0
  72. package/scripts/browser-tab-switch.sh +111 -0
  73. package/scripts/browser-upload.sh +132 -0
  74. package/scripts/browser-use.sh +60 -0
  75. package/scripts/browser-vlm.sh +707 -0
  76. package/scripts/browser-wait.sh +97 -0
  77. package/scripts/install-git-hooks.sh +16 -0
  78. package/scripts/lib/capture.sh +356 -0
  79. package/scripts/lib/common.sh +262 -0
  80. package/scripts/lib/credential.sh +237 -0
  81. package/scripts/lib/fingerprint-rescue.js +123 -0
  82. package/scripts/lib/flow.sh +448 -0
  83. package/scripts/lib/flow_record.sh +210 -0
  84. package/scripts/lib/mask.sh +49 -0
  85. package/scripts/lib/memory.sh +427 -0
  86. package/scripts/lib/migrate.sh +390 -0
  87. package/scripts/lib/migrators/README.md +23 -0
  88. package/scripts/lib/migrators/memory/v1_to_v2.sh +15 -0
  89. package/scripts/lib/migrators/recent_urls/README.md +13 -0
  90. package/scripts/lib/migrators/stats/README.md +24 -0
  91. package/scripts/lib/node/chrome-devtools-bridge.mjs +1812 -0
  92. package/scripts/lib/node/mcp-server.mjs +531 -0
  93. package/scripts/lib/node/mcp-tools.json +68 -0
  94. package/scripts/lib/node/playwright-driver.mjs +1104 -0
  95. package/scripts/lib/node/totp-core.mjs +52 -0
  96. package/scripts/lib/node/totp.mjs +52 -0
  97. package/scripts/lib/node/url-pattern-cluster.mjs +102 -0
  98. package/scripts/lib/node/url-pattern-resolver.mjs +77 -0
  99. package/scripts/lib/output.sh +79 -0
  100. package/scripts/lib/router.sh +342 -0
  101. package/scripts/lib/sanitize.sh +107 -0
  102. package/scripts/lib/secret/keychain.sh +91 -0
  103. package/scripts/lib/secret/libsecret.sh +74 -0
  104. package/scripts/lib/secret/plaintext.sh +75 -0
  105. package/scripts/lib/secret_backend_select.sh +57 -0
  106. package/scripts/lib/session.sh +153 -0
  107. package/scripts/lib/site.sh +126 -0
  108. package/scripts/lib/stats.sh +419 -0
  109. package/scripts/lib/tool/.gitkeep +0 -0
  110. package/scripts/lib/tool/chrome-devtools-mcp.sh +349 -0
  111. package/scripts/lib/tool/obscura.sh +249 -0
  112. package/scripts/lib/tool/playwright-cli.sh +155 -0
  113. package/scripts/lib/tool/playwright-lib.sh +106 -0
  114. package/scripts/lib/verb_helpers.sh +222 -0
  115. package/scripts/lib/visual-rescue-default.sh +145 -0
  116. package/scripts/regenerate-docs.sh +99 -0
  117. package/uninstall.sh +51 -0
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env bash
2
+ # scripts/browser-wait.sh — explicit wait for element state.
3
+ # Usage: bash scripts/browser-wait.sh [--site NAME] [--tool NAME] [--dry-run]
4
+ # [--raw] --selector CSS
5
+ # [--state visible|hidden|attached|detached]
6
+ # [--timeout MS]
7
+ #
8
+ # Routes to chrome-devtools-mcp by default (Phase 6 part 4). Stateless —
9
+ # works one-shot or daemon-routed (parallel to eval/audit/snapshot).
10
+
11
+ set -euo pipefail
12
+ IFS=$'\n\t'
13
+
14
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15
+ # shellcheck source=lib/common.sh
16
+ # shellcheck disable=SC1091
17
+ source "${SCRIPT_DIR}/lib/common.sh"
18
+ # shellcheck source=lib/output.sh
19
+ # shellcheck disable=SC1091
20
+ source "${SCRIPT_DIR}/lib/output.sh"
21
+ # shellcheck source=lib/router.sh
22
+ # shellcheck disable=SC1091
23
+ source "${SCRIPT_DIR}/lib/router.sh"
24
+ # shellcheck source=lib/verb_helpers.sh
25
+ # shellcheck disable=SC1091
26
+ source "${SCRIPT_DIR}/lib/verb_helpers.sh"
27
+
28
+ init_paths
29
+
30
+ SUMMARY_T0="$(now_ms)"; export SUMMARY_T0
31
+
32
+ parse_verb_globals "$@"
33
+
34
+ resolve_session_storage_state
35
+
36
+ selector="" state="" timeout=""
37
+ verb_argv=()
38
+ i=0
39
+ while [ "${i}" -lt "${#REMAINING_ARGV[@]}" ]; do
40
+ case "${REMAINING_ARGV[i]}" in
41
+ --selector)
42
+ selector="${REMAINING_ARGV[i+1]:-}"
43
+ [ -n "${selector}" ] || die "${EXIT_USAGE_ERROR}" "--selector requires a value"
44
+ verb_argv+=(--selector "${selector}")
45
+ i=$((i + 2))
46
+ ;;
47
+ --state)
48
+ state="${REMAINING_ARGV[i+1]:-}"
49
+ [ -n "${state}" ] || die "${EXIT_USAGE_ERROR}" "--state requires a value"
50
+ case "${state}" in
51
+ visible|hidden|attached|detached) ;;
52
+ *) die "${EXIT_USAGE_ERROR}" "--state must be one of {visible, hidden, attached, detached} (got: ${state})" ;;
53
+ esac
54
+ verb_argv+=(--state "${state}")
55
+ i=$((i + 2))
56
+ ;;
57
+ --timeout)
58
+ timeout="${REMAINING_ARGV[i+1]:-}"
59
+ [ -n "${timeout}" ] || die "${EXIT_USAGE_ERROR}" "--timeout requires a value (milliseconds)"
60
+ verb_argv+=(--timeout "${timeout}")
61
+ i=$((i + 2))
62
+ ;;
63
+ *)
64
+ verb_argv+=("${REMAINING_ARGV[i]}")
65
+ i=$((i + 1))
66
+ ;;
67
+ esac
68
+ done
69
+
70
+ [ -n "${selector}" ] || die "${EXIT_USAGE_ERROR}" "wait requires --selector CSS"
71
+
72
+ if [ "${ARG_DRY_RUN:-0}" = "1" ]; then
73
+ ok "dry-run: would wait for ${selector} (state=${state:-visible}, timeout=${timeout:-default})"
74
+ emit_summary verb=wait tool=none why=dry-run status=ok selector="${selector}" \
75
+ state="${state}" timeout="${timeout}" dry_run=true
76
+ exit 0
77
+ fi
78
+
79
+ picked="$(pick_tool wait "${verb_argv[@]}")"
80
+ tool_name="${picked%%$'\t'*}"
81
+ why="${picked#*$'\t'}"
82
+
83
+ source_picked_adapter "${tool_name}"
84
+
85
+ set +e
86
+ adapter_out="$(invoke_with_retry wait "${verb_argv[@]}")"
87
+ adapter_rc=$?
88
+ set -e
89
+
90
+ [ -n "${adapter_out}" ] && printf '%s\n' "${adapter_out}"
91
+
92
+ if [ "${adapter_rc}" -eq 0 ]; then
93
+ emit_summary verb=wait tool="${tool_name}" why="${why}" status=ok selector="${selector}"
94
+ exit 0
95
+ fi
96
+ emit_summary verb=wait tool="${tool_name}" why="${why}" status=error selector="${selector}"
97
+ exit "${adapter_rc}"
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
4
+
5
+ if [ ! -d "${REPO_ROOT}/.git" ] && [ ! -f "${REPO_ROOT}/.git" ]; then
6
+ printf 'not a git checkout: %s\n' "${REPO_ROOT}" >&2
7
+ exit 0
8
+ fi
9
+ if ! command -v git >/dev/null 2>&1; then
10
+ printf 'git not on PATH; cannot install hooks\n' >&2
11
+ exit 0
12
+ fi
13
+
14
+ git -C "${REPO_ROOT}" config core.hooksPath .githooks
15
+ chmod +x "${REPO_ROOT}/.githooks/pre-commit"
16
+ printf 'pre-commit hook active (.githooks/pre-commit)\n'
@@ -0,0 +1,356 @@
1
+ # scripts/lib/capture.sh — capture artifact pipeline (Phase 7 part 1-i).
2
+ #
3
+ # Three-function API:
4
+ # capture_init_dir — idempotent mkdir 0700 of ${CAPTURES_DIR}
5
+ # capture_start <verb> — atomic NNN allocation + meta.json (in_progress)
6
+ # capture_finish [status] — finalize meta.json + update _index.json
7
+ #
8
+ # Verbs sandwich their per-aspect file writes between capture_start and
9
+ # capture_finish. After capture_start: ${CAPTURE_ID} + ${CAPTURE_DIR} are
10
+ # exported; the verb writes any per-aspect files (snapshot.json,
11
+ # console.json, network.har, screenshot.png, etc.) into ${CAPTURE_DIR};
12
+ # capture_finish recomputes total_bytes + files inventory.
13
+ #
14
+ # 7-i scope: no sanitization (7-iii), no retention/prune (7-v), no
15
+ # --unsanitized (7-iv). Wired only to snapshot — structurally safe (no
16
+ # headers, no cookies). Sanitization arrives when console.json + network.har
17
+ # enter the picture.
18
+ #
19
+ # Atomicity: NNN allocation uses tmpfile + rename(2) per parent spec §4.5
20
+ # ("tmpfile + mv, no flock"). Single-process per invocation expected; two
21
+ # concurrent capture_starts could race on the same NNN. v1 design doesn't
22
+ # pay flock complexity. Future hardening: mkdir without -p so the second
23
+ # loser fails fast → retry with bumped id.
24
+
25
+ [ -n "${_BROWSER_LIB_CAPTURE_LOADED:-}" ] && return 0
26
+ readonly _BROWSER_LIB_CAPTURE_LOADED=1
27
+
28
+ # capture_init_dir
29
+ # Ensure ${CAPTURES_DIR} exists, mode 0700. No-op if already correct.
30
+ capture_init_dir() {
31
+ if [ ! -d "${CAPTURES_DIR}" ]; then
32
+ mkdir -p "${CAPTURES_DIR}"
33
+ chmod 700 "${CAPTURES_DIR}"
34
+ fi
35
+ }
36
+
37
+ # _capture_iso_now — UTC ISO 8601, second precision. Cross-platform.
38
+ _capture_iso_now() {
39
+ date -u +'%Y-%m-%dT%H:%M:%SZ'
40
+ }
41
+
42
+ # _capture_iso_to_epoch — ISO 8601 string → epoch seconds. GNU date first
43
+ # (Linux + coreutils-installed-Mac); BSD date fallback. Returns 0 on parse
44
+ # failure (caller treats 0 as "very old", which is safe for prune logic).
45
+ _capture_iso_to_epoch() {
46
+ local iso="$1"
47
+ date -d "${iso}" +%s 2>/dev/null \
48
+ || date -j -f '%Y-%m-%dT%H:%M:%SZ' "${iso}" +%s 2>/dev/null \
49
+ || printf '0'
50
+ }
51
+
52
+ # _capture_now_epoch — current epoch seconds.
53
+ # Honors BROWSER_SKILL_CAPTURE_NOW_EPOCH (test-only seam) so age-threshold
54
+ # tests can anchor to a fixed wall-clock and survive calendar drift. Without
55
+ # the override the prod code path remains a single `date +%s` fork.
56
+ _capture_now_epoch() {
57
+ if [ -n "${BROWSER_SKILL_CAPTURE_NOW_EPOCH:-}" ]; then
58
+ printf '%s\n' "${BROWSER_SKILL_CAPTURE_NOW_EPOCH}"
59
+ else
60
+ date +%s
61
+ fi
62
+ }
63
+
64
+ # _capture_read_config — emits {retention_count, retention_days, warn_at_pct}
65
+ # from ${CONFIG_FILE}, falling back to spec §4.5 defaults on missing file or
66
+ # missing fields.
67
+ _capture_read_config() {
68
+ if [ -f "${CONFIG_FILE}" ]; then
69
+ jq '{
70
+ retention_count: (.retention_count // 500),
71
+ retention_days: (.retention_days // 14),
72
+ warn_at_pct: (.warn_at_pct // 90)
73
+ }' "${CONFIG_FILE}" 2>/dev/null \
74
+ || printf '{"retention_count":500,"retention_days":14,"warn_at_pct":90}'
75
+ else
76
+ printf '{"retention_count":500,"retention_days":14,"warn_at_pct":90}'
77
+ fi
78
+ }
79
+
80
+ # _capture_pad_id N → "001" (3-digit zero-padded; %d at >=1000)
81
+ _capture_pad_id() {
82
+ local n="$1"
83
+ if [ "${n}" -lt 1000 ]; then
84
+ printf '%03d' "${n}"
85
+ else
86
+ printf '%d' "${n}"
87
+ fi
88
+ }
89
+
90
+ # _capture_read_next_id → echoes the next unused integer id.
91
+ # Reads ${CAPTURES_DIR}/_index.json; defaults to 1 if absent or unreadable.
92
+ _capture_read_next_id() {
93
+ local idx="${CAPTURES_DIR}/_index.json"
94
+ if [ -f "${idx}" ]; then
95
+ jq -r '.next_id // 1' "${idx}" 2>/dev/null || printf '1'
96
+ else
97
+ printf '1'
98
+ fi
99
+ }
100
+
101
+ # _capture_write_index <next_id> <count> <latest> <total_bytes>
102
+ # Atomic write via tmpfile + mv (same FS guarantees rename(2) atomicity).
103
+ _capture_write_index() {
104
+ local next_id="$1" count="$2" latest="$3" total_bytes="$4"
105
+ local idx="${CAPTURES_DIR}/_index.json"
106
+ local tmp="${idx}.tmp.$$"
107
+ jq -n \
108
+ --argjson schema_version 1 \
109
+ --argjson next_id "${next_id}" \
110
+ --argjson count "${count}" \
111
+ --arg latest "${latest}" \
112
+ --argjson total_bytes "${total_bytes}" \
113
+ '{schema_version: $schema_version, next_id: $next_id, count: $count, latest: $latest, total_bytes: $total_bytes}' \
114
+ > "${tmp}"
115
+ chmod 600 "${tmp}"
116
+ mv "${tmp}" "${idx}"
117
+ }
118
+
119
+ # capture_start <verb>
120
+ # Allocates the next NNN, mkdir 0700, writes meta.json (status=in_progress),
121
+ # bumps _index.next_id, exports CAPTURE_ID + CAPTURE_DIR.
122
+ capture_start() {
123
+ local verb="${1:-unknown}"
124
+ capture_init_dir
125
+
126
+ local next_id padded
127
+ next_id="$(_capture_read_next_id)"
128
+ padded="$(_capture_pad_id "${next_id}")"
129
+
130
+ CAPTURE_ID="${padded}"
131
+ CAPTURE_DIR="${CAPTURES_DIR}/${padded}"
132
+ export CAPTURE_ID CAPTURE_DIR
133
+
134
+ mkdir -p "${CAPTURE_DIR}"
135
+ chmod 700 "${CAPTURE_DIR}"
136
+
137
+ local meta="${CAPTURE_DIR}/meta.json"
138
+ local tmp="${meta}.tmp.$$"
139
+ jq -n \
140
+ --arg capture_id "${padded}" \
141
+ --arg verb "${verb}" \
142
+ --argjson schema_version 1 \
143
+ --arg started_at "$(_capture_iso_now)" \
144
+ --arg status "in_progress" \
145
+ '{capture_id: $capture_id, verb: $verb, schema_version: $schema_version, started_at: $started_at, status: $status}' \
146
+ > "${tmp}"
147
+ chmod 600 "${tmp}"
148
+ mv "${tmp}" "${meta}"
149
+
150
+ # Bump _index.next_id immediately (allocation-time bump). count + latest +
151
+ # total_bytes will be authoritative after capture_finish; for now hold the
152
+ # prior values where present. New _index gets count=0 here; capture_finish
153
+ # increments it.
154
+ local idx="${CAPTURES_DIR}/_index.json"
155
+ local count latest total_bytes
156
+ if [ -f "${idx}" ]; then
157
+ count="$(jq -r '.count // 0' "${idx}" 2>/dev/null || printf '0')"
158
+ latest="$(jq -r '.latest // ""' "${idx}" 2>/dev/null || printf '')"
159
+ total_bytes="$(jq -r '.total_bytes // 0' "${idx}" 2>/dev/null || printf '0')"
160
+ else
161
+ count=0
162
+ latest=""
163
+ total_bytes=0
164
+ fi
165
+ _capture_write_index "$((next_id + 1))" "${count}" "${latest}" "${total_bytes}"
166
+ }
167
+
168
+ # _capture_file_size <path> → byte count (cross-platform).
169
+ _capture_file_size() {
170
+ local p="$1"
171
+ stat -c '%s' "${p}" 2>/dev/null || stat -f '%z' "${p}" 2>/dev/null || printf '0'
172
+ }
173
+
174
+ # _capture_inventory <dir> → JSON array of {name, bytes} for every regular file
175
+ # in <dir> (sorted by name). meta.json is NOT excluded — its size is part of
176
+ # total_bytes. Subdirectories (downloads/) handled in a future sub-part.
177
+ _capture_inventory() {
178
+ local dir="$1"
179
+ local entries=()
180
+ local f name bytes
181
+ # /usr/bin/find avoids any rtk fff aliasing.
182
+ while IFS= read -r f; do
183
+ name="$(basename "${f}")"
184
+ bytes="$(_capture_file_size "${f}")"
185
+ entries+=("$(jq -n --arg name "${name}" --argjson bytes "${bytes}" '{name: $name, bytes: $bytes}')")
186
+ done < <(/usr/bin/find "${dir}" -maxdepth 1 -type f | sort)
187
+
188
+ if [ "${#entries[@]}" -eq 0 ]; then
189
+ printf '[]'
190
+ else
191
+ printf '%s\n' "${entries[@]}" | jq -s '.'
192
+ fi
193
+ }
194
+
195
+ # capture_finish [status] [sanitized]
196
+ # Default status: "ok". Default sanitized: "true".
197
+ # Updates meta.json (finished_at, status, sanitized, total_bytes, files);
198
+ # updates _index.json (count, latest, total_bytes).
199
+ #
200
+ # Phase 7 part 1-iv: optional 2nd arg `sanitized` ∈ {true, false}. Writes
201
+ # meta.sanitized field for audit. Field is always present in v1+ schema;
202
+ # default true matches the sanitized-by-default contract.
203
+ capture_finish() {
204
+ local status="${1:-ok}"
205
+ local sanitized="${2:-true}"
206
+ : "${CAPTURE_DIR:?capture_finish: CAPTURE_DIR not set (call capture_start first)}"
207
+ : "${CAPTURE_ID:?capture_finish: CAPTURE_ID not set (call capture_start first)}"
208
+
209
+ local meta="${CAPTURE_DIR}/meta.json"
210
+ [ -f "${meta}" ] || die "${EXIT_GENERIC_ERROR:-1}" "capture_finish: meta.json missing for ${CAPTURE_ID}"
211
+
212
+ local files_json total_bytes
213
+ files_json="$(_capture_inventory "${CAPTURE_DIR}")"
214
+ total_bytes="$(printf '%s' "${files_json}" | jq '[.[].bytes] | add // 0')"
215
+
216
+ local tmp="${meta}.tmp.$$"
217
+ jq \
218
+ --arg finished_at "$(_capture_iso_now)" \
219
+ --arg status "${status}" \
220
+ --argjson sanitized "${sanitized}" \
221
+ --argjson total_bytes "${total_bytes}" \
222
+ --argjson files "${files_json}" \
223
+ '. + {finished_at: $finished_at, status: $status, sanitized: $sanitized, total_bytes: $total_bytes, files: $files}' \
224
+ "${meta}" > "${tmp}"
225
+ chmod 600 "${tmp}"
226
+ mv "${tmp}" "${meta}"
227
+
228
+ # Update _index.json: count is the count of capture dirs on disk; latest is
229
+ # this CAPTURE_ID; total_bytes is the sum across all capture dirs (cached
230
+ # for doctor UX). Pruning will keep this honest in 7-v.
231
+ local idx="${CAPTURES_DIR}/_index.json"
232
+ local next_id
233
+ if [ -f "${idx}" ]; then
234
+ next_id="$(jq -r '.next_id // 1' "${idx}" 2>/dev/null || printf '1')"
235
+ else
236
+ next_id=1
237
+ fi
238
+ local on_disk_count
239
+ on_disk_count="$(/usr/bin/find "${CAPTURES_DIR}" -maxdepth 1 -mindepth 1 -type d | wc -l | tr -d ' ')"
240
+ local on_disk_total
241
+ on_disk_total="$(/usr/bin/find "${CAPTURES_DIR}" -mindepth 2 -type f -exec stat -c '%s' {} + 2>/dev/null \
242
+ || /usr/bin/find "${CAPTURES_DIR}" -mindepth 2 -type f -exec stat -f '%z' {} + 2>/dev/null \
243
+ || printf '')"
244
+ local on_disk_total_sum
245
+ on_disk_total_sum="$(printf '%s\n' "${on_disk_total}" | awk '{s+=$1} END {print s+0}')"
246
+
247
+ _capture_write_index "${next_id}" "${on_disk_count}" "${CAPTURE_ID}" "${on_disk_total_sum}"
248
+
249
+ # Phase 7 part 1-v: auto-prune at the end of every successful finalize.
250
+ # Idempotent — no-op when state is under thresholds.
251
+ capture_prune
252
+ }
253
+
254
+ # capture_prune (Phase 7 part 1-v)
255
+ # Reads ${CONFIG_FILE} thresholds (defaults if missing/null per spec §4.5).
256
+ # Walks ${CAPTURES_DIR}/*/meta.json; computes age + count.
257
+ # Splices oldest-first while EITHER threshold exceeded.
258
+ # Skip rules:
259
+ # - meta.is_baseline == true (Phase 8 forward-compat — never prune)
260
+ # - meta.status == "in_progress" (in-flight; never prune)
261
+ # After prune: recomputes _index.json (count, latest, total_bytes).
262
+ # Idempotent.
263
+ capture_prune() {
264
+ [ -d "${CAPTURES_DIR}" ] || return 0
265
+
266
+ local config retention_count retention_days
267
+ config="$(_capture_read_config)"
268
+ retention_count="$(printf '%s' "${config}" | jq -r '.retention_count')"
269
+ retention_days="$(printf '%s' "${config}" | jq -r '.retention_days')"
270
+
271
+ local now_epoch age_threshold_sec
272
+ now_epoch="$(_capture_now_epoch)"
273
+ age_threshold_sec=$(( retention_days * 86400 ))
274
+
275
+ # Build sorted (oldest-first) prunable-id list.
276
+ # Format: "<epoch>\t<id>\t<is_baseline>\t<status>"
277
+ local entries
278
+ entries="$(
279
+ /usr/bin/find "${CAPTURES_DIR}" -mindepth 1 -maxdepth 1 -type d 2>/dev/null \
280
+ | while read -r dir; do
281
+ local id meta started_at started_epoch is_baseline status
282
+ id="$(basename "${dir}")"
283
+ meta="${dir}/meta.json"
284
+ [ -f "${meta}" ] || continue
285
+ started_at="$(jq -r '.started_at // ""' "${meta}" 2>/dev/null)"
286
+ [ -n "${started_at}" ] || continue
287
+ started_epoch="$(_capture_iso_to_epoch "${started_at}")"
288
+ is_baseline="$(jq -r '.is_baseline // false' "${meta}" 2>/dev/null || printf 'false')"
289
+ status="$(jq -r '.status // "in_progress"' "${meta}" 2>/dev/null || printf 'in_progress')"
290
+ printf '%s\t%s\t%s\t%s\n' "${started_epoch}" "${id}" "${is_baseline}" "${status}"
291
+ done \
292
+ | sort -n
293
+ )"
294
+
295
+ [ -n "${entries}" ] || return 0
296
+
297
+ # First pass: collect prunable + protected counts.
298
+ local total_count protected_count prunable_count
299
+ total_count="$(printf '%s\n' "${entries}" | wc -l | tr -d ' ')"
300
+ protected_count="$(printf '%s\n' "${entries}" | awk -F'\t' '$3 == "true" || $4 == "in_progress"' | wc -l | tr -d ' ')"
301
+ prunable_count=$(( total_count - protected_count ))
302
+
303
+ # Walk oldest-first; prune while either threshold exceeded AND entry is prunable.
304
+ local pruned_any=0
305
+ while IFS=$'\t' read -r entry_epoch entry_id entry_baseline entry_status; do
306
+ [ -n "${entry_id}" ] || continue
307
+ # Skip protected.
308
+ if [ "${entry_baseline}" = "true" ] || [ "${entry_status}" = "in_progress" ]; then
309
+ continue
310
+ fi
311
+ # Should we prune this one?
312
+ local age_sec=$(( now_epoch - entry_epoch ))
313
+ local exceeds_count=0 exceeds_age=0
314
+ if [ "${total_count}" -gt "${retention_count}" ]; then
315
+ exceeds_count=1
316
+ fi
317
+ if [ "${age_sec}" -gt "${age_threshold_sec}" ]; then
318
+ exceeds_age=1
319
+ fi
320
+ if [ "${exceeds_count}" = "0" ] && [ "${exceeds_age}" = "0" ]; then
321
+ # Under both thresholds — stop (subsequent entries are newer).
322
+ break
323
+ fi
324
+ # Prune.
325
+ rm -rf "${CAPTURES_DIR:?}/${entry_id}"
326
+ warn "capture_prune: pruned captures/${entry_id}/ (age=${age_sec}s, baseline=${entry_baseline})" >&2
327
+ pruned_any=1
328
+ total_count=$(( total_count - 1 ))
329
+ done <<< "${entries}"
330
+
331
+ # Recompute _index.json post-prune.
332
+ if [ "${pruned_any}" = "1" ]; then
333
+ local idx="${CAPTURES_DIR}/_index.json"
334
+ local next_id
335
+ if [ -f "${idx}" ]; then
336
+ next_id="$(jq -r '.next_id // 1' "${idx}" 2>/dev/null || printf '1')"
337
+ else
338
+ next_id=1
339
+ fi
340
+ local on_disk_count
341
+ on_disk_count="$(/usr/bin/find "${CAPTURES_DIR}" -maxdepth 1 -mindepth 1 -type d | wc -l | tr -d ' ')"
342
+ local on_disk_total
343
+ on_disk_total="$(/usr/bin/find "${CAPTURES_DIR}" -mindepth 2 -type f -exec stat -c '%s' {} + 2>/dev/null \
344
+ || /usr/bin/find "${CAPTURES_DIR}" -mindepth 2 -type f -exec stat -f '%z' {} + 2>/dev/null \
345
+ || printf '')"
346
+ local on_disk_total_sum
347
+ on_disk_total_sum="$(printf '%s\n' "${on_disk_total}" | awk '{s+=$1} END {print s+0}')"
348
+
349
+ # Latest = highest-id surviving capture (lexicographic on padded NNN).
350
+ local latest_id
351
+ latest_id="$(/usr/bin/find "${CAPTURES_DIR}" -maxdepth 1 -mindepth 1 -type d | xargs -I{} basename {} | sort -r | head -1)"
352
+ [ -n "${latest_id}" ] || latest_id=""
353
+
354
+ _capture_write_index "${next_id}" "${on_disk_count}" "${latest_id}" "${on_disk_total_sum}"
355
+ fi
356
+ }