eagle-mem 4.12.1 → 4.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/README.md +4 -0
- package/db/migrate.sh +11 -1
- package/docs/agent-compatibility/claude-code.md +27 -0
- package/docs/agent-compatibility/codex.md +1 -0
- package/docs/reviews/2026-06-10-full-spectrum-hardening.md +90 -0
- package/hooks/post-tool-use.sh +73 -22
- package/hooks/session-end.sh +10 -0
- package/hooks/session-start.sh +24 -1
- package/hooks/stop.sh +7 -2
- package/integrations/google_antigravity_hook.py +61 -26
- package/lib/codex-hooks.sh +5 -1
- package/lib/common.sh +104 -4
- package/lib/db-core.sh +28 -0
- package/lib/db-events.sh +13 -0
- package/lib/db-observations.sh +10 -3
- package/lib/db-sessions.sh +10 -1
- package/lib/db-summaries.sh +4 -1
- package/lib/hooks-sessionstart.sh +32 -13
- package/lib/provider.sh +10 -2
- package/lib/updater.sh +16 -2
- package/package.json +1 -1
- package/scripts/enrich-summary.sh +4 -1
- package/scripts/logs.sh +44 -12
- package/scripts/orchestrate.sh +34 -4
- package/scripts/session.sh +5 -0
- package/scripts/statusline-em.sh +5 -1
- package/scripts/tasks.sh +6 -3
- package/scripts/test.sh +11 -1
- package/tests/test_context_budget.sh +117 -0
- package/tests/test_data_integrity_hardening.sh +115 -0
- package/tests/test_mod_tracker_concurrency.sh +142 -0
- package/tests/test_redaction_coverage.sh +183 -0
- package/tests/test_reliability_retention.sh +75 -0
- package/tests/test_test_runner_no_abort.sh +86 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Phase 3 reliability hardening regressions:
|
|
3
|
+
# A. Auto-scan/index in-flight vs freshness marker separation — a crashed or
|
|
4
|
+
# output-less job must NOT block retry for a day (only a genuine success
|
|
5
|
+
# sets the freshness marker; the foreground touch is a short-lived debounce).
|
|
6
|
+
# B. eagle_events retention — the hook-observability table is pruned by age.
|
|
7
|
+
set -euo pipefail
|
|
8
|
+
|
|
9
|
+
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
10
|
+
tmp_dir=$(mktemp -d "$ROOT_DIR/.tmp-reliability.XXXXXX")
|
|
11
|
+
trap 'rm -rf "$tmp_dir"' EXIT
|
|
12
|
+
|
|
13
|
+
export HOME="$tmp_dir/home"
|
|
14
|
+
export EAGLE_MEM_DIR="$tmp_dir/eagle-mem"
|
|
15
|
+
mkdir -p "$HOME" "$EAGLE_MEM_DIR"
|
|
16
|
+
|
|
17
|
+
. "$ROOT_DIR/lib/common.sh"
|
|
18
|
+
"$ROOT_DIR/db/migrate.sh" >/dev/null
|
|
19
|
+
. "$ROOT_DIR/lib/db.sh"
|
|
20
|
+
. "$ROOT_DIR/lib/hooks-sessionstart.sh"
|
|
21
|
+
|
|
22
|
+
pass=0; fail=0
|
|
23
|
+
ok() { echo " ok: $1"; pass=$((pass+1)); }
|
|
24
|
+
bad() { echo " FAIL: $1"; fail=$((fail+1)); }
|
|
25
|
+
assert() { if eval "$1"; then ok "$2"; else bad "$2"; fi; }
|
|
26
|
+
assert_not() { if eval "$1"; then bad "$2"; else ok "$2"; fi; }
|
|
27
|
+
|
|
28
|
+
project="test/reliability"
|
|
29
|
+
|
|
30
|
+
# ── A. In-flight vs freshness ───────────────────────────────────────────────
|
|
31
|
+
# Crashed/output-less job: only the in-flight marker exists, never the freshness one.
|
|
32
|
+
_eagle_state_touch "scan-inflight" "$project"
|
|
33
|
+
assert_not '_eagle_state_fresh "scan" "$project" 1' \
|
|
34
|
+
"A1: in-flight-only does NOT satisfy the 1-day freshness gate (retry not blocked for a day)"
|
|
35
|
+
assert '_eagle_state_inflight_fresh "scan" "$project" 15' \
|
|
36
|
+
"A2: a recent in-flight marker debounces concurrent spawns"
|
|
37
|
+
|
|
38
|
+
# Genuine success sets the freshness marker.
|
|
39
|
+
_eagle_state_touch "scan" "$project"
|
|
40
|
+
assert '_eagle_state_fresh "scan" "$project" 1' \
|
|
41
|
+
"A3: a success-set freshness marker satisfies the freshness gate"
|
|
42
|
+
|
|
43
|
+
# A stale in-flight marker (job died long ago) must age out so retry reopens.
|
|
44
|
+
stale_inflight=$(_eagle_state_file "index-inflight" "$project")
|
|
45
|
+
mkdir -p "$(dirname "$stale_inflight")"
|
|
46
|
+
: > "$stale_inflight"
|
|
47
|
+
touch -t 202001010000 "$stale_inflight"
|
|
48
|
+
assert_not '_eagle_state_inflight_fresh "index" "$project" 15' \
|
|
49
|
+
"A4: a stale in-flight marker is not 'fresh' — retry is allowed after the debounce window"
|
|
50
|
+
|
|
51
|
+
# ── B. eagle_events retention ───────────────────────────────────────────────
|
|
52
|
+
p1=$(eagle_sql_escape "$project")
|
|
53
|
+
p2=$(eagle_sql_escape "other/project")
|
|
54
|
+
eagle_db "INSERT INTO eagle_events (project, session_id, agent, event_type, status, created_at)
|
|
55
|
+
VALUES ('$p1','s-old','codex','hook_started','ok', strftime('%Y-%m-%dT%H:%M:%fZ','now','-60 days'));" >/dev/null
|
|
56
|
+
eagle_db "INSERT INTO eagle_events (project, session_id, agent, event_type, status, created_at)
|
|
57
|
+
VALUES ('$p1','s-new','codex','hook_started','ok', strftime('%Y-%m-%dT%H:%M:%fZ','now'));" >/dev/null
|
|
58
|
+
eagle_db "INSERT INTO eagle_events (project, session_id, agent, event_type, status, created_at)
|
|
59
|
+
VALUES ('$p2','s-old2','codex','hook_started','ok', strftime('%Y-%m-%dT%H:%M:%fZ','now','-60 days'));" >/dev/null
|
|
60
|
+
|
|
61
|
+
eagle_prune_events 30 "$project"
|
|
62
|
+
old_p1=$(eagle_db "SELECT COUNT(*) FROM eagle_events WHERE project='$p1' AND session_id='s-old';")
|
|
63
|
+
new_p1=$(eagle_db "SELECT COUNT(*) FROM eagle_events WHERE project='$p1' AND session_id='s-new';")
|
|
64
|
+
old_p2=$(eagle_db "SELECT COUNT(*) FROM eagle_events WHERE project='$p2' AND session_id='s-old2';")
|
|
65
|
+
[ "$old_p1" = "0" ] && ok "B1: old event (60d) pruned for the target project" || bad "B1: old event not pruned (got $old_p1)"
|
|
66
|
+
[ "$new_p1" = "1" ] && ok "B2: recent event retained" || bad "B2: recent event lost (got $new_p1)"
|
|
67
|
+
[ "$old_p2" = "1" ] && ok "B3: project filter — other project's old event untouched" || bad "B3: project filter leaked (got $old_p2)"
|
|
68
|
+
|
|
69
|
+
eagle_prune_events 30
|
|
70
|
+
old_p2b=$(eagle_db "SELECT COUNT(*) FROM eagle_events WHERE project='$p2' AND session_id='s-old2';")
|
|
71
|
+
[ "$old_p2b" = "0" ] && ok "B4: unscoped prune removes other project's old event" || bad "B4: unscoped prune missed it (got $old_p2b)"
|
|
72
|
+
|
|
73
|
+
echo ""
|
|
74
|
+
echo "test_reliability_retention: $pass passed, $fail failed"
|
|
75
|
+
[ "$fail" -eq 0 ] || exit 1
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# ═══════════════════════════════════════════════════════════
|
|
3
|
+
# Regression: scripts/test.sh must NOT abort mid-run when a
|
|
4
|
+
# single check fails.
|
|
5
|
+
#
|
|
6
|
+
# Guards against the `set -e` masking bug where the failure
|
|
7
|
+
# accumulator used `((errors++))`. Under `set -euo pipefail`,
|
|
8
|
+
# `((errors++))` returns exit status 1 when the pre-increment
|
|
9
|
+
# value is 0 (post-increment evaluates the old, falsy value),
|
|
10
|
+
# so the FIRST failing check aborted the entire runner —
|
|
11
|
+
# skipping every later check and the failure-count summary.
|
|
12
|
+
# The fix uses the assignment form `errors=$((errors + 1))`,
|
|
13
|
+
# which always returns 0.
|
|
14
|
+
# ═══════════════════════════════════════════════════════════
|
|
15
|
+
set -uo pipefail
|
|
16
|
+
|
|
17
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
18
|
+
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
19
|
+
TEST_SH="$REPO_DIR/scripts/test.sh"
|
|
20
|
+
|
|
21
|
+
pass=0
|
|
22
|
+
fail=0
|
|
23
|
+
ok() { echo " ok: $1"; pass=$((pass+1)); }
|
|
24
|
+
bad() { echo " FAIL: $1"; fail=$((fail+1)); }
|
|
25
|
+
|
|
26
|
+
# 1. The buggy idiom must be gone from the runner's executable code.
|
|
27
|
+
# Match only non-comment lines so the explanatory comment that names
|
|
28
|
+
# the old idiom does not trip the check.
|
|
29
|
+
if grep -vE '^\s*#' "$TEST_SH" | grep -qE '\(\(errors\+\+\)\)'; then
|
|
30
|
+
bad "scripts/test.sh still uses ((errors++)) in code (aborts under set -e)"
|
|
31
|
+
else
|
|
32
|
+
ok "scripts/test.sh no longer uses ((errors++)) in code"
|
|
33
|
+
fi
|
|
34
|
+
if grep -qF 'errors=$((errors + 1))' "$TEST_SH"; then
|
|
35
|
+
ok "scripts/test.sh uses the safe assignment form"
|
|
36
|
+
else
|
|
37
|
+
bad "scripts/test.sh missing safe assignment accumulator"
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# 2. Behavioral proof: replicate the runner's exact accumulation loop
|
|
41
|
+
# under `set -euo pipefail`, force the FIRST check to fail, and
|
|
42
|
+
# confirm execution reaches the end with a correct count.
|
|
43
|
+
result=$(
|
|
44
|
+
set -euo pipefail
|
|
45
|
+
errors=0
|
|
46
|
+
run_check() {
|
|
47
|
+
if eval "$2" >/dev/null 2>&1; then
|
|
48
|
+
:
|
|
49
|
+
else
|
|
50
|
+
errors=$((errors + 1))
|
|
51
|
+
fi
|
|
52
|
+
}
|
|
53
|
+
run_check "first-fails" "false" # errors goes 0 -> 1
|
|
54
|
+
run_check "second-ok" "true"
|
|
55
|
+
run_check "third-fails" "false" # errors goes 1 -> 2
|
|
56
|
+
echo "REACHED_END errors=$errors"
|
|
57
|
+
)
|
|
58
|
+
status=$?
|
|
59
|
+
|
|
60
|
+
if [ "$status" -eq 0 ] && printf '%s' "$result" | grep -qF 'REACHED_END errors=2'; then
|
|
61
|
+
ok "runner reaches summary after failures with correct count (errors=2)"
|
|
62
|
+
else
|
|
63
|
+
bad "runner aborted early or miscounted (status=$status, out='$result')"
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
# 3. Sanity: the old idiom genuinely aborts (proves the test is meaningful).
|
|
67
|
+
# Run as a separate `bash` process so `set -e` is fully active (a `|| ...`
|
|
68
|
+
# on a subshell would disable `set -e` inside it and hide the abort).
|
|
69
|
+
control_tmp=$(mktemp)
|
|
70
|
+
cat > "$control_tmp" <<'CTRL'
|
|
71
|
+
set -euo pipefail
|
|
72
|
+
errors=0
|
|
73
|
+
((errors++)) # pre-value 0 -> command returns 1 -> set -e aborts here
|
|
74
|
+
echo "SHOULD_NOT_PRINT"
|
|
75
|
+
CTRL
|
|
76
|
+
control_out=$(bash "$control_tmp" 2>/dev/null); control_status=$?
|
|
77
|
+
rm -f "$control_tmp"
|
|
78
|
+
if [ "$control_status" -ne 0 ] && ! printf '%s' "$control_out" | grep -qF 'SHOULD_NOT_PRINT'; then
|
|
79
|
+
ok "control: ((errors++)) from 0 aborts under set -e (bug is real)"
|
|
80
|
+
else
|
|
81
|
+
bad "control: ((errors++)) did not abort (status=$control_status) — test premise invalid"
|
|
82
|
+
fi
|
|
83
|
+
|
|
84
|
+
echo ""
|
|
85
|
+
echo "test_test_runner_no_abort: $pass passed, $fail failed"
|
|
86
|
+
[ "$fail" -eq 0 ] || exit 1
|