eagle-mem 4.12.1 → 4.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -0
- package/README.md +4 -0
- package/db/migrate.sh +11 -1
- package/docs/agent-compatibility/claude-code.md +27 -0
- package/docs/agent-compatibility/codex.md +1 -0
- package/docs/reviews/2026-06-10-full-spectrum-hardening.md +90 -0
- package/hooks/post-tool-use.sh +73 -22
- package/hooks/session-end.sh +10 -0
- package/hooks/session-start.sh +24 -1
- package/hooks/stop.sh +7 -2
- package/integrations/google_antigravity_hook.py +61 -26
- package/lib/codex-hooks.sh +5 -1
- package/lib/common.sh +104 -4
- package/lib/db-core.sh +28 -0
- package/lib/db-events.sh +13 -0
- package/lib/db-observations.sh +10 -3
- package/lib/db-sessions.sh +10 -1
- package/lib/db-summaries.sh +4 -1
- package/lib/hooks-sessionstart.sh +32 -13
- package/lib/hooks.sh +37 -0
- package/lib/provider.sh +10 -2
- package/lib/updater.sh +16 -2
- package/package.json +1 -1
- package/scripts/enrich-summary.sh +4 -1
- package/scripts/install.sh +3 -41
- package/scripts/logs.sh +44 -12
- package/scripts/orchestrate.sh +34 -4
- package/scripts/session.sh +5 -0
- package/scripts/statusline-em.sh +5 -1
- package/scripts/tasks.sh +6 -3
- package/scripts/test.sh +31 -3
- package/scripts/update.sh +3 -17
- package/tests/test_compaction_survival_matrix.sh +13 -1
- package/tests/test_context_budget.sh +117 -0
- package/tests/test_data_integrity_hardening.sh +115 -0
- package/tests/test_mod_tracker_concurrency.sh +142 -0
- package/tests/test_redaction_coverage.sh +183 -0
- package/tests/test_reliability_retention.sh +75 -0
- package/tests/test_rust_migration_plan.sh +8 -1
- package/tests/test_test_runner_no_abort.sh +86 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# ═══════════════════════════════════════════════════════════
|
|
3
|
+
# Eagle Mem — mod-tracker concurrency regression test (finding #6)
|
|
4
|
+
# Asserts:
|
|
5
|
+
# 1. Many parallel writers never wedge the lock (no leaked *.lock dir).
|
|
6
|
+
# 2. A pending append created DURING a drain is NOT lost (drains via mv, so
|
|
7
|
+
# only captured files are deleted; concurrent appends survive).
|
|
8
|
+
# 3. A stale lock dir (older than TTL) is reclaimed, not wedged forever.
|
|
9
|
+
# 4. The edit-history writer is lock-guarded and loses no append.
|
|
10
|
+
# ═══════════════════════════════════════════════════════════
|
|
11
|
+
set -uo pipefail
|
|
12
|
+
|
|
13
|
+
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
14
|
+
tmp_dir=$(mktemp -d)
|
|
15
|
+
trap 'rm -rf "$tmp_dir"' EXIT
|
|
16
|
+
|
|
17
|
+
export EAGLE_MEM_DIR="$tmp_dir/em"
|
|
18
|
+
export EAGLE_AGENT_SOURCE="claude-code"
|
|
19
|
+
export EAGLE_MEM_DISABLE_HOOKS=1
|
|
20
|
+
export EAGLE_MOD_LOCK_TTL=2
|
|
21
|
+
mkdir -p "$EAGLE_MEM_DIR"
|
|
22
|
+
|
|
23
|
+
pass=0; fail=0
|
|
24
|
+
ok() { echo " ok: $1"; pass=$((pass+1)); }
|
|
25
|
+
bad() { echo " FAIL: $1" >&2; fail=$((fail+1)); }
|
|
26
|
+
|
|
27
|
+
. "$ROOT_DIR/lib/common.sh"
|
|
28
|
+
|
|
29
|
+
# Pull the tracker functions out of post-tool-use.sh without running the hook
|
|
30
|
+
# body. State-machine awk: capture the TTL assignment plus each named function
|
|
31
|
+
# (header at column 0 through its closing `}` at column 0). No overlapping
|
|
32
|
+
# ranges, so no duplicated lines.
|
|
33
|
+
fn_src="$tmp_dir/tracker_fns.sh"
|
|
34
|
+
awk '
|
|
35
|
+
/^EAGLE_MOD_LOCK_TTL=/ { print; next }
|
|
36
|
+
/^(eagle_acquire_dir_lock|eagle_track_modified_path|eagle_track_edit_history_path)\(\) \{/ { capture=1 }
|
|
37
|
+
capture { print }
|
|
38
|
+
capture && /^}$/ { capture=0 }
|
|
39
|
+
' "$ROOT_DIR/hooks/post-tool-use.sh" > "$fn_src"
|
|
40
|
+
bash -n "$fn_src" || { bad "extracted tracker fns have syntax errors"; cat "$fn_src" >&2; echo "$pass passed, $fail failed"; exit 1; }
|
|
41
|
+
. "$fn_src"
|
|
42
|
+
declare -F eagle_track_modified_path >/dev/null && ok "loaded eagle_track_modified_path" || { bad "could not load tracker fns"; echo "$pass passed, $fail failed"; exit 1; }
|
|
43
|
+
declare -F eagle_acquire_dir_lock >/dev/null && ok "loaded eagle_acquire_dir_lock" || bad "could not load lock helper"
|
|
44
|
+
|
|
45
|
+
SID="conc-aaa111bbb222"
|
|
46
|
+
mod_dir="$EAGLE_MEM_DIR/mod-tracker"
|
|
47
|
+
mod_file="$mod_dir/$SID"
|
|
48
|
+
mod_lock="${mod_file}.lock"
|
|
49
|
+
|
|
50
|
+
# ── 1. Parallel writers, no wedge ──────────────────────────
|
|
51
|
+
N=40
|
|
52
|
+
for i in $(seq 1 "$N"); do
|
|
53
|
+
eagle_track_modified_path "/p/file_${i}.txt" "$SID" &
|
|
54
|
+
done
|
|
55
|
+
wait
|
|
56
|
+
[ ! -d "$mod_lock" ] && ok "1: lock dir not leaked after $N parallel writers" || bad "1: lock dir wedged"
|
|
57
|
+
# Whatever is committed must be valid paths from our set (no corruption).
|
|
58
|
+
committed=$(cat "$mod_file" 2>/dev/null | wc -l | tr -d ' ')
|
|
59
|
+
[ "$committed" -ge 1 ] && ok "1: committed file has $committed line(s)" || bad "1: committed file empty"
|
|
60
|
+
bad_lines=$(grep -vE '^/p/file_[0-9]+\.txt$' "$mod_file" 2>/dev/null | wc -l | tr -d ' ')
|
|
61
|
+
[ "$bad_lines" = "0" ] && ok "1: no corrupted/interleaved lines in committed file" || bad "1: $bad_lines corrupted lines"
|
|
62
|
+
|
|
63
|
+
# ── 2. Append during drain is NOT lost ─────────────────────
|
|
64
|
+
# Hold the lock ourselves, create a pending file, snapshot starts, then a NEW
|
|
65
|
+
# pending append lands. The drain must mv-capture only pre-existing pending
|
|
66
|
+
# files and never delete the late append.
|
|
67
|
+
rm -rf "$mod_dir"; mkdir -p "$mod_dir"
|
|
68
|
+
printf '%s\n' "/p/early.txt" > "${mod_file}.pending.111"
|
|
69
|
+
# Simulate: acquire lock, mv-drain existing pending, but a concurrent writer
|
|
70
|
+
# adds a brand-new pending file before we rm. Reproduce the exact sequence the
|
|
71
|
+
# fixed code uses.
|
|
72
|
+
mkdir "$mod_lock"
|
|
73
|
+
# drain step (mv existing pending out of the way)
|
|
74
|
+
for pf in "${mod_file}".pending.*; do
|
|
75
|
+
[ -e "$pf" ] || continue
|
|
76
|
+
mv "$pf" "${pf}.draining.$$" 2>/dev/null || true
|
|
77
|
+
done
|
|
78
|
+
# CONCURRENT appender lands a new pending file AFTER our snapshot/mv:
|
|
79
|
+
printf '%s\n' "/p/late.txt" > "${mod_file}.pending.999"
|
|
80
|
+
# finish drain: build committed from draining files only, delete draining only
|
|
81
|
+
{ cat "$mod_file" 2>/dev/null; for d in "${mod_file}".pending.*.draining.*; do [ -e "$d" ] && cat "$d"; done; } | tail -3 > "${mod_file}.tmp"
|
|
82
|
+
mv "${mod_file}.tmp" "$mod_file"
|
|
83
|
+
rm -f "${mod_file}".pending.*.draining.* 2>/dev/null || true
|
|
84
|
+
rmdir "$mod_lock"
|
|
85
|
+
# The late append must still be present as a pending file (not deleted).
|
|
86
|
+
[ -f "${mod_file}.pending.999" ] && ok "2: concurrent append during drain survived (not deleted)" || bad "2: late append LOST"
|
|
87
|
+
grep -q "/p/early.txt" "$mod_file" && ok "2: early pending drained into committed file" || bad "2: early pending lost"
|
|
88
|
+
|
|
89
|
+
# ── 3. Stale lock reclaim ──────────────────────────────────
|
|
90
|
+
rm -rf "$mod_dir"; mkdir -p "$mod_dir"
|
|
91
|
+
mkdir "$mod_lock"
|
|
92
|
+
# Age the lock past TTL (EAGLE_MOD_LOCK_TTL=2s) by backdating its mtime.
|
|
93
|
+
touch -t "$(date -v-1H '+%Y%m%d%H%M.%S' 2>/dev/null || date -d '1 hour ago' '+%Y%m%d%H%M.%S')" "$mod_lock" 2>/dev/null || true
|
|
94
|
+
# A new writer should reclaim the stale lock and succeed (not hang/fail).
|
|
95
|
+
eagle_track_modified_path "/p/after_stale.txt" "$SID"
|
|
96
|
+
[ ! -d "$mod_lock" ] && ok "3: stale lock reclaimed and released" || bad "3: stale lock not reclaimed"
|
|
97
|
+
grep -q "/p/after_stale.txt" "$mod_file" && ok "3: write after stale-lock reclaim recorded" || bad "3: write lost after stale reclaim"
|
|
98
|
+
|
|
99
|
+
# ── 4. edit-history writer is locked and loses nothing ─────
|
|
100
|
+
edit_dir="$EAGLE_MEM_DIR/edit-tracker"
|
|
101
|
+
edit_file="$edit_dir/$SID"
|
|
102
|
+
rm -rf "$edit_dir"
|
|
103
|
+
M=40
|
|
104
|
+
for i in $(seq 1 "$M"); do
|
|
105
|
+
eagle_track_edit_history_path "/e/edit_${i}.txt" "$SID" &
|
|
106
|
+
done
|
|
107
|
+
wait
|
|
108
|
+
[ ! -d "${edit_file}.lock" ] && ok "4: edit-history lock not leaked" || bad "4: edit-history lock wedged"
|
|
109
|
+
edit_count=$(sort -u "$edit_file" 2>/dev/null | grep -cE '^/e/edit_[0-9]+\.txt$' || echo 0)
|
|
110
|
+
[ "$edit_count" = "$M" ] && ok "4: all $M edit-history appends present (append-only, none lost)" || bad "4: only $edit_count/$M edit-history appends present"
|
|
111
|
+
bad_e=$(grep -vE '^/e/edit_[0-9]+\.txt$' "$edit_file" 2>/dev/null | wc -l | tr -d ' ')
|
|
112
|
+
[ "$bad_e" = "0" ] && ok "4: no torn/interleaved edit-history lines" || bad "4: $bad_e torn lines"
|
|
113
|
+
|
|
114
|
+
# ── 5. Observation dedup is race-safe (finding #7) ─────────
|
|
115
|
+
# Two concurrent identical observations must dedupe to ONE row. The fixed code
|
|
116
|
+
# wraps the check-then-insert in BEGIN IMMEDIATE so the second writer blocks,
|
|
117
|
+
# then sees the first row via NOT EXISTS.
|
|
118
|
+
. "$ROOT_DIR/lib/db.sh"
|
|
119
|
+
bash "$ROOT_DIR/db/migrate.sh" >/dev/null 2>&1
|
|
120
|
+
OSID="obs-dedup-7777"
|
|
121
|
+
eagle_upsert_session "$OSID" "obs/proj" "$tmp_dir" "" "test" "claude-code"
|
|
122
|
+
for i in $(seq 1 12); do
|
|
123
|
+
eagle_insert_observation "$OSID" "obs/proj" "Bash" "ls -la /tmp" "[]" "[]" &
|
|
124
|
+
done
|
|
125
|
+
wait
|
|
126
|
+
obs_rows=$(eagle_db "SELECT COUNT(*) FROM observations WHERE session_id='$OSID' AND tool_input_summary='ls -la /tmp';")
|
|
127
|
+
[ "$obs_rows" = "1" ] && ok "5: 12 concurrent identical observations deduped to 1 row" || bad "5: dedup race produced $obs_rows rows (expected 1)"
|
|
128
|
+
|
|
129
|
+
# eagle_db_strict exists and aborts a multi-statement script on first error.
|
|
130
|
+
if declare -F eagle_db_strict >/dev/null; then
|
|
131
|
+
ok "5: eagle_db_strict variant present"
|
|
132
|
+
# A failing first statement must prevent the second from running.
|
|
133
|
+
eagle_db_strict "INSERT INTO nonexistent_table_xyz VALUES (1); INSERT INTO observations (session_id, project, tool_name) VALUES ('strict-canary','obs/proj','Bash');" >/dev/null 2>&1 || true
|
|
134
|
+
canary=$(eagle_db "SELECT COUNT(*) FROM observations WHERE session_id='strict-canary';")
|
|
135
|
+
[ "$canary" = "0" ] && ok "5: eagle_db_strict bailed before 2nd statement" || bad "5: eagle_db_strict did not bail ($canary canary rows)"
|
|
136
|
+
else
|
|
137
|
+
bad "5: eagle_db_strict missing"
|
|
138
|
+
fi
|
|
139
|
+
|
|
140
|
+
echo ""
|
|
141
|
+
echo "test_mod_tracker_concurrency: $pass passed, $fail failed"
|
|
142
|
+
[ "$fail" -eq 0 ]
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Regression coverage for the Phase 1 security hardening:
|
|
3
|
+
# - secrets are redacted BEFORE leaving the machine (LLM provider input,
|
|
4
|
+
# enrich job file) and before persistence (recall_events, observations)
|
|
5
|
+
# - configured [redaction] extra_patterns are actually applied by eagle_redact
|
|
6
|
+
# - orchestration workers default to a non-full-access (safe) autonomy mode
|
|
7
|
+
# - logs path resolver rejects traversal/symlink/out-of-root references
|
|
8
|
+
set -euo pipefail
|
|
9
|
+
|
|
10
|
+
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
11
|
+
|
|
12
|
+
tmp_dir=$(mktemp -d "$ROOT_DIR/.tmp-redaction.XXXXXX")
|
|
13
|
+
trap 'rm -rf "$tmp_dir"' EXIT
|
|
14
|
+
|
|
15
|
+
export HOME="$tmp_dir/home"
|
|
16
|
+
export EAGLE_MEM_DIR="$tmp_dir/eagle-mem"
|
|
17
|
+
export EAGLE_CONFIG_FILE="$EAGLE_MEM_DIR/config.toml"
|
|
18
|
+
mkdir -p "$HOME" "$EAGLE_MEM_DIR"
|
|
19
|
+
|
|
20
|
+
. "$ROOT_DIR/lib/common.sh"
|
|
21
|
+
"$ROOT_DIR/db/migrate.sh" >/dev/null
|
|
22
|
+
. "$ROOT_DIR/lib/db.sh"
|
|
23
|
+
. "$ROOT_DIR/lib/provider.sh"
|
|
24
|
+
|
|
25
|
+
# Fake secrets that must never survive redaction.
|
|
26
|
+
FAKE_ANT="sk-ant-FAKEabcdefghijklmnop1234567890"
|
|
27
|
+
FAKE_AWS="AKIAFAKE0000000000AB"
|
|
28
|
+
FAKE_BEARER="Bearer FAKEtokenshouldnotleak123"
|
|
29
|
+
FAKE_OPENAI="sk-FAKE0123456789abcdef0123456789abcdef"
|
|
30
|
+
|
|
31
|
+
fail() { echo "FAIL: $1" >&2; exit 1; }
|
|
32
|
+
|
|
33
|
+
assert_no_secret() {
|
|
34
|
+
local label="$1" haystack="$2"
|
|
35
|
+
case "$haystack" in
|
|
36
|
+
*"$FAKE_ANT"*) fail "$label leaked Anthropic key" ;;
|
|
37
|
+
*"$FAKE_AWS"*) fail "$label leaked AWS key" ;;
|
|
38
|
+
*"FAKEtoken"*) fail "$label leaked Bearer token" ;;
|
|
39
|
+
*"$FAKE_OPENAI"*) fail "$label leaked OpenAI key" ;;
|
|
40
|
+
esac
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
# ── eagle_redact built-ins ───────────────────────────────────────────────
|
|
44
|
+
redacted=$(printf 'k=%s a=%s h=%s o=%s\n' "$FAKE_ANT" "$FAKE_AWS" "$FAKE_BEARER" "$FAKE_OPENAI" | eagle_redact)
|
|
45
|
+
assert_no_secret "eagle_redact built-ins" "$redacted"
|
|
46
|
+
case "$redacted" in *"[REDACTED]"*) ;; *) fail "eagle_redact produced no [REDACTED] marker" ;; esac
|
|
47
|
+
|
|
48
|
+
# ── Finding 4: configured extra_patterns are honored ─────────────────────
|
|
49
|
+
cat > "$EAGLE_CONFIG_FILE" <<'TOML'
|
|
50
|
+
[redaction]
|
|
51
|
+
extra_patterns = ["CORPSECRET_[A-Z0-9]+", "INTERNAL-[0-9]+"]
|
|
52
|
+
TOML
|
|
53
|
+
extra_redacted=$(printf 'token CORPSECRET_AB12 and ticket INTERNAL-9988 here\n' | eagle_redact)
|
|
54
|
+
case "$extra_redacted" in
|
|
55
|
+
*CORPSECRET_AB12*) fail "extra_patterns[0] not applied (CORPSECRET leaked)" ;;
|
|
56
|
+
*INTERNAL-9988*) fail "extra_patterns[1] not applied (INTERNAL leaked)" ;;
|
|
57
|
+
esac
|
|
58
|
+
|
|
59
|
+
# Commented-out default must NOT redact (proves we parse real config, not the doc line).
|
|
60
|
+
cat > "$EAGLE_CONFIG_FILE" <<'TOML'
|
|
61
|
+
[redaction]
|
|
62
|
+
# extra_patterns = ["MY_CUSTOM_SECRET_.*"]
|
|
63
|
+
TOML
|
|
64
|
+
commented=$(printf 'MY_CUSTOM_SECRET_xyz stays visible\n' | eagle_redact)
|
|
65
|
+
case "$commented" in
|
|
66
|
+
*MY_CUSTOM_SECRET_xyz*) ;;
|
|
67
|
+
*) fail "commented extra_patterns default was wrongly applied" ;;
|
|
68
|
+
esac
|
|
69
|
+
rm -f "$EAGLE_CONFIG_FILE"
|
|
70
|
+
|
|
71
|
+
# ── Finding 2: recall_events.prompt_snippet is redacted before insert ────
|
|
72
|
+
project="redaction-project"
|
|
73
|
+
repo="$tmp_dir/repo"; mkdir -p "$repo"
|
|
74
|
+
eagle_upsert_session "redact-session" "$project" "$repo" "test-model" "test" "codex" >/dev/null
|
|
75
|
+
eagle_insert_recall_event "redact-session" "$project" "$repo" "codex" \
|
|
76
|
+
"please use my key $FAKE_ANT and aws $FAKE_AWS to deploy" \
|
|
77
|
+
"deploy" 0 0 0 0 "ok" "" >/dev/null
|
|
78
|
+
snippet=$(eagle_db "SELECT prompt_snippet FROM recall_events WHERE session_id='redact-session' LIMIT 1;")
|
|
79
|
+
assert_no_secret "recall_events.prompt_snippet" "$snippet"
|
|
80
|
+
case "$snippet" in *"[REDACTED]"*) ;; *) fail "recall_events.prompt_snippet not redacted" ;; esac
|
|
81
|
+
|
|
82
|
+
# ── Finding 1: enrich job file written by Stop is redacted ───────────────
|
|
83
|
+
# Drive the Stop hook with a transcript that contains a secret and assert the
|
|
84
|
+
# background enrich job file (and what the enricher would read) is clean.
|
|
85
|
+
transcript="$tmp_dir/transcript.jsonl"
|
|
86
|
+
cat > "$transcript" <<EOF
|
|
87
|
+
{"type":"user","message":{"role":"user","content":"set up deploy"}}
|
|
88
|
+
{"type":"assistant","message":{"role":"assistant","content":[{"type":"text","text":"Using API key $FAKE_ANT and aws $FAKE_AWS for the deploy. Here is what I did."}]}}
|
|
89
|
+
EOF
|
|
90
|
+
mkdir -p "$EAGLE_MEM_DIR/tmp"
|
|
91
|
+
stop_input=$(jq -nc \
|
|
92
|
+
--arg sid "stop-redact-session" \
|
|
93
|
+
--arg tp "$transcript" \
|
|
94
|
+
--arg cwd "$repo" \
|
|
95
|
+
'{session_id:$sid, transcript_path:$tp, cwd:$cwd}')
|
|
96
|
+
# A provider must be configured so Stop queues a background enrich job. We
|
|
97
|
+
# DISABLE the background enricher itself (EAGLE_MEM_DISABLE_BACKGROUND_ENRICH=1)
|
|
98
|
+
# so the queued job file is left on disk for inspection, and force the defer
|
|
99
|
+
# path with EAGLE_MEM_STOP_ENRICH=0. EAGLE_MEM_PROJECT pins project resolution
|
|
100
|
+
# so the hook does not early-exit in this synthetic environment.
|
|
101
|
+
cat > "$EAGLE_CONFIG_FILE" <<'TOML'
|
|
102
|
+
[provider]
|
|
103
|
+
type = "anthropic"
|
|
104
|
+
TOML
|
|
105
|
+
printf '%s' "$stop_input" | \
|
|
106
|
+
EAGLE_MEM_PROJECT="$project" \
|
|
107
|
+
EAGLE_MEM_STOP_ENRICH=0 \
|
|
108
|
+
EAGLE_MEM_DISABLE_BACKGROUND_ENRICH=1 \
|
|
109
|
+
EAGLE_AGENT_SOURCE=codex \
|
|
110
|
+
bash "$ROOT_DIR/hooks/stop.sh" >/dev/null 2>&1 || true
|
|
111
|
+
# The background enricher is disabled from running, so the job file remains for inspection.
|
|
112
|
+
saw_job=0
|
|
113
|
+
for job in "$EAGLE_MEM_DIR"/tmp/summary-enrich.*.json; do
|
|
114
|
+
[ -f "$job" ] || continue
|
|
115
|
+
saw_job=1
|
|
116
|
+
body=$(cat "$job")
|
|
117
|
+
assert_no_secret "enrich job file" "$body"
|
|
118
|
+
case "$body" in *"[REDACTED]"*) ;; *) fail "enrich job file was not redacted" ;; esac
|
|
119
|
+
done
|
|
120
|
+
[ "$saw_job" = 1 ] || fail "Stop did not queue a background enrich job file to verify (finding 1)"
|
|
121
|
+
rm -f "$EAGLE_MEM_DIR"/tmp/summary-enrich.*.json 2>/dev/null || true
|
|
122
|
+
rm -f "$EAGLE_CONFIG_FILE"
|
|
123
|
+
|
|
124
|
+
# Even if no job file was produced in this environment, the redaction path is
|
|
125
|
+
# also unit-tested directly: the exact transformation Stop applies.
|
|
126
|
+
text_with_secret="prefix $FAKE_ANT mid $FAKE_AWS suffix"
|
|
127
|
+
enrich_text=$(printf '%s' "$text_with_secret" | eagle_redact)
|
|
128
|
+
assert_no_secret "Stop enrich_text transform" "$enrich_text"
|
|
129
|
+
|
|
130
|
+
# ── Finding 3: redact-before-truncate (boundary secret defeats prefix) ───
|
|
131
|
+
# A secret split across the 200-char truncation boundary must still be redacted.
|
|
132
|
+
pad=$(printf 'a%.0s' $(seq 1 190))
|
|
133
|
+
boundary_cmd="curl -H \"Authorization: Bearer ${pad}${FAKE_OPENAI}\""
|
|
134
|
+
# Old order (truncate THEN redact) would cut the token mid-stream; the new order
|
|
135
|
+
# redacts first. Emulate the new order exactly as post-tool-use.sh does.
|
|
136
|
+
new_order=$(printf '%s' "$boundary_cmd" | eagle_redact | cut -c1-200)
|
|
137
|
+
assert_no_secret "redact-before-truncate" "$new_order"
|
|
138
|
+
|
|
139
|
+
# ── Finding 9: orchestration workers default to safe autonomy ────────────
|
|
140
|
+
eval "$(sed -n '/^orchestrate_worker_autonomy()/,/^}/p' "$ROOT_DIR/scripts/orchestrate.sh")"
|
|
141
|
+
[ -f "$EAGLE_CONFIG_FILE" ] && rm -f "$EAGLE_CONFIG_FILE"
|
|
142
|
+
default_autonomy=$(orchestrate_worker_autonomy)
|
|
143
|
+
[ "$default_autonomy" = "safe" ] || fail "orchestration worker autonomy should default to 'safe', got '$default_autonomy'"
|
|
144
|
+
cat > "$EAGLE_CONFIG_FILE" <<'TOML'
|
|
145
|
+
[orchestration]
|
|
146
|
+
worker_autonomy = "danger"
|
|
147
|
+
TOML
|
|
148
|
+
opt_in_autonomy=$(orchestrate_worker_autonomy)
|
|
149
|
+
[ "$opt_in_autonomy" = "danger" ] || fail "worker_autonomy='danger' should opt into 'danger', got '$opt_in_autonomy'"
|
|
150
|
+
rm -f "$EAGLE_CONFIG_FILE"
|
|
151
|
+
|
|
152
|
+
# The generated safe run-script must not contain danger-full-access / never / dontAsk.
|
|
153
|
+
project="orch-proj"; name="orch-name"
|
|
154
|
+
eval "$(sed -n '/^orchestrate_worker_run_script()/,/^}/p' "$ROOT_DIR/scripts/orchestrate.sh")"
|
|
155
|
+
safe_script="$tmp_dir/safe-run.sh"
|
|
156
|
+
orchestrate_worker_run_script "$safe_script" "codex" "m" "xhigh" "/wt" "/p.md" "/exit" "/last" "/bin" "lane1" "/log"
|
|
157
|
+
if grep -qE 'danger-full-access|approval_policy="never"' "$safe_script"; then
|
|
158
|
+
fail "safe-mode codex worker still uses full-access flags"
|
|
159
|
+
fi
|
|
160
|
+
safe_claude="$tmp_dir/safe-claude.sh"
|
|
161
|
+
orchestrate_worker_run_script "$safe_claude" "claude-code" "m" "xhigh" "/wt" "/p.md" "/exit" "/last" "/bin" "lane1" "/log"
|
|
162
|
+
if grep -q 'permission-mode dontAsk' "$safe_claude"; then
|
|
163
|
+
fail "safe-mode claude worker still uses --permission-mode dontAsk"
|
|
164
|
+
fi
|
|
165
|
+
|
|
166
|
+
# ── Finding 7: logs path resolver containment ────────────────────────────
|
|
167
|
+
runs_root="$tmp_dir/runs"; mkdir -p "$runs_root"
|
|
168
|
+
echo "log line" > "$runs_root/run1.log"
|
|
169
|
+
secret_dir="$tmp_dir/secret"; mkdir -p "$secret_dir"
|
|
170
|
+
echo "TOPSECRET" > "$secret_dir/secret.log"
|
|
171
|
+
ln -s "$secret_dir/secret.log" "$runs_root/evil.log"
|
|
172
|
+
eval "$(sed -n '/^canonicalize_path()/,/^}/p;/^path_within()/,/^}/p;/^resolve_log_path()/,/^}/p' "$ROOT_DIR/scripts/logs.sh")"
|
|
173
|
+
EAGLE_RUNS_DIR="$runs_root"
|
|
174
|
+
|
|
175
|
+
valid=$(resolve_log_path "run1" || true)
|
|
176
|
+
[ -n "$valid" ] || fail "logs resolver rejected a valid in-root log"
|
|
177
|
+
|
|
178
|
+
if resolve_log_path "evil.log" >/dev/null 2>&1; then fail "logs resolver followed a symlink out of root"; fi
|
|
179
|
+
if resolve_log_path "../../etc/passwd" >/dev/null 2>&1; then fail "logs resolver allowed traversal"; fi
|
|
180
|
+
if resolve_log_path "$secret_dir/secret.log" >/dev/null 2>&1; then fail "logs resolver allowed an out-of-root absolute path"; fi
|
|
181
|
+
if resolve_log_path "$runs_root/../secret/secret.log" >/dev/null 2>&1; then fail "logs resolver allowed prefixed traversal"; fi
|
|
182
|
+
|
|
183
|
+
echo "PASS: redaction + autonomy + log-path containment regression coverage"
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Phase 3 reliability hardening regressions:
|
|
3
|
+
# A. Auto-scan/index in-flight vs freshness marker separation — a crashed or
|
|
4
|
+
# output-less job must NOT block retry for a day (only a genuine success
|
|
5
|
+
# sets the freshness marker; the foreground touch is a short-lived debounce).
|
|
6
|
+
# B. eagle_events retention — the hook-observability table is pruned by age.
|
|
7
|
+
set -euo pipefail
|
|
8
|
+
|
|
9
|
+
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
10
|
+
tmp_dir=$(mktemp -d "$ROOT_DIR/.tmp-reliability.XXXXXX")
|
|
11
|
+
trap 'rm -rf "$tmp_dir"' EXIT
|
|
12
|
+
|
|
13
|
+
export HOME="$tmp_dir/home"
|
|
14
|
+
export EAGLE_MEM_DIR="$tmp_dir/eagle-mem"
|
|
15
|
+
mkdir -p "$HOME" "$EAGLE_MEM_DIR"
|
|
16
|
+
|
|
17
|
+
. "$ROOT_DIR/lib/common.sh"
|
|
18
|
+
"$ROOT_DIR/db/migrate.sh" >/dev/null
|
|
19
|
+
. "$ROOT_DIR/lib/db.sh"
|
|
20
|
+
. "$ROOT_DIR/lib/hooks-sessionstart.sh"
|
|
21
|
+
|
|
22
|
+
pass=0; fail=0
|
|
23
|
+
ok() { echo " ok: $1"; pass=$((pass+1)); }
|
|
24
|
+
bad() { echo " FAIL: $1"; fail=$((fail+1)); }
|
|
25
|
+
assert() { if eval "$1"; then ok "$2"; else bad "$2"; fi; }
|
|
26
|
+
assert_not() { if eval "$1"; then bad "$2"; else ok "$2"; fi; }
|
|
27
|
+
|
|
28
|
+
project="test/reliability"
|
|
29
|
+
|
|
30
|
+
# ── A. In-flight vs freshness ───────────────────────────────────────────────
|
|
31
|
+
# Crashed/output-less job: only the in-flight marker exists, never the freshness one.
|
|
32
|
+
_eagle_state_touch "scan-inflight" "$project"
|
|
33
|
+
assert_not '_eagle_state_fresh "scan" "$project" 1' \
|
|
34
|
+
"A1: in-flight-only does NOT satisfy the 1-day freshness gate (retry not blocked for a day)"
|
|
35
|
+
assert '_eagle_state_inflight_fresh "scan" "$project" 15' \
|
|
36
|
+
"A2: a recent in-flight marker debounces concurrent spawns"
|
|
37
|
+
|
|
38
|
+
# Genuine success sets the freshness marker.
|
|
39
|
+
_eagle_state_touch "scan" "$project"
|
|
40
|
+
assert '_eagle_state_fresh "scan" "$project" 1' \
|
|
41
|
+
"A3: a success-set freshness marker satisfies the freshness gate"
|
|
42
|
+
|
|
43
|
+
# A stale in-flight marker (job died long ago) must age out so retry reopens.
|
|
44
|
+
stale_inflight=$(_eagle_state_file "index-inflight" "$project")
|
|
45
|
+
mkdir -p "$(dirname "$stale_inflight")"
|
|
46
|
+
: > "$stale_inflight"
|
|
47
|
+
touch -t 202001010000 "$stale_inflight"
|
|
48
|
+
assert_not '_eagle_state_inflight_fresh "index" "$project" 15' \
|
|
49
|
+
"A4: a stale in-flight marker is not 'fresh' — retry is allowed after the debounce window"
|
|
50
|
+
|
|
51
|
+
# ── B. eagle_events retention ───────────────────────────────────────────────
|
|
52
|
+
p1=$(eagle_sql_escape "$project")
|
|
53
|
+
p2=$(eagle_sql_escape "other/project")
|
|
54
|
+
eagle_db "INSERT INTO eagle_events (project, session_id, agent, event_type, status, created_at)
|
|
55
|
+
VALUES ('$p1','s-old','codex','hook_started','ok', strftime('%Y-%m-%dT%H:%M:%fZ','now','-60 days'));" >/dev/null
|
|
56
|
+
eagle_db "INSERT INTO eagle_events (project, session_id, agent, event_type, status, created_at)
|
|
57
|
+
VALUES ('$p1','s-new','codex','hook_started','ok', strftime('%Y-%m-%dT%H:%M:%fZ','now'));" >/dev/null
|
|
58
|
+
eagle_db "INSERT INTO eagle_events (project, session_id, agent, event_type, status, created_at)
|
|
59
|
+
VALUES ('$p2','s-old2','codex','hook_started','ok', strftime('%Y-%m-%dT%H:%M:%fZ','now','-60 days'));" >/dev/null
|
|
60
|
+
|
|
61
|
+
eagle_prune_events 30 "$project"
|
|
62
|
+
old_p1=$(eagle_db "SELECT COUNT(*) FROM eagle_events WHERE project='$p1' AND session_id='s-old';")
|
|
63
|
+
new_p1=$(eagle_db "SELECT COUNT(*) FROM eagle_events WHERE project='$p1' AND session_id='s-new';")
|
|
64
|
+
old_p2=$(eagle_db "SELECT COUNT(*) FROM eagle_events WHERE project='$p2' AND session_id='s-old2';")
|
|
65
|
+
[ "$old_p1" = "0" ] && ok "B1: old event (60d) pruned for the target project" || bad "B1: old event not pruned (got $old_p1)"
|
|
66
|
+
[ "$new_p1" = "1" ] && ok "B2: recent event retained" || bad "B2: recent event lost (got $new_p1)"
|
|
67
|
+
[ "$old_p2" = "1" ] && ok "B3: project filter — other project's old event untouched" || bad "B3: project filter leaked (got $old_p2)"
|
|
68
|
+
|
|
69
|
+
eagle_prune_events 30
|
|
70
|
+
old_p2b=$(eagle_db "SELECT COUNT(*) FROM eagle_events WHERE project='$p2' AND session_id='s-old2';")
|
|
71
|
+
[ "$old_p2b" = "0" ] && ok "B4: unscoped prune removes other project's old event" || bad "B4: unscoped prune missed it (got $old_p2b)"
|
|
72
|
+
|
|
73
|
+
echo ""
|
|
74
|
+
echo "test_reliability_retention: $pass passed, $fail failed"
|
|
75
|
+
[ "$fail" -eq 0 ] || exit 1
|
|
@@ -18,7 +18,14 @@ require_contains() {
|
|
|
18
18
|
fi
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
# MIGRATION.md is a maintainer-only roadmap and is intentionally NOT shipped in
|
|
22
|
+
# the npm package (`files` allowlist), so this contract test has nothing to
|
|
23
|
+
# guard when the suite runs from a published install via `eagle-mem test`.
|
|
24
|
+
# Skip cleanly (exit 2) in that case; run strictly from a source checkout.
|
|
25
|
+
if [ ! -f "$PLAN" ]; then
|
|
26
|
+
echo "skip: MIGRATION.md not present (dev-only contract test)" >&2
|
|
27
|
+
exit 2
|
|
28
|
+
fi
|
|
22
29
|
|
|
23
30
|
require_contains "~/.eagle-mem/memory\\.db" "the existing user database path"
|
|
24
31
|
require_contains "compatibility wrapper" "the Bash compatibility wrapper"
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# ═══════════════════════════════════════════════════════════
|
|
3
|
+
# Regression: scripts/test.sh must NOT abort mid-run when a
|
|
4
|
+
# single check fails.
|
|
5
|
+
#
|
|
6
|
+
# Guards against the `set -e` masking bug where the failure
|
|
7
|
+
# accumulator used `((errors++))`. Under `set -euo pipefail`,
|
|
8
|
+
# `((errors++))` returns exit status 1 when the pre-increment
|
|
9
|
+
# value is 0 (post-increment evaluates the old, falsy value),
|
|
10
|
+
# so the FIRST failing check aborted the entire runner —
|
|
11
|
+
# skipping every later check and the failure-count summary.
|
|
12
|
+
# The fix uses the assignment form `errors=$((errors + 1))`,
|
|
13
|
+
# which always returns 0.
|
|
14
|
+
# ═══════════════════════════════════════════════════════════
|
|
15
|
+
set -uo pipefail
|
|
16
|
+
|
|
17
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
18
|
+
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
19
|
+
TEST_SH="$REPO_DIR/scripts/test.sh"
|
|
20
|
+
|
|
21
|
+
pass=0
|
|
22
|
+
fail=0
|
|
23
|
+
ok() { echo " ok: $1"; pass=$((pass+1)); }
|
|
24
|
+
bad() { echo " FAIL: $1"; fail=$((fail+1)); }
|
|
25
|
+
|
|
26
|
+
# 1. The buggy idiom must be gone from the runner's executable code.
|
|
27
|
+
# Match only non-comment lines so the explanatory comment that names
|
|
28
|
+
# the old idiom does not trip the check.
|
|
29
|
+
if grep -vE '^\s*#' "$TEST_SH" | grep -qE '\(\(errors\+\+\)\)'; then
|
|
30
|
+
bad "scripts/test.sh still uses ((errors++)) in code (aborts under set -e)"
|
|
31
|
+
else
|
|
32
|
+
ok "scripts/test.sh no longer uses ((errors++)) in code"
|
|
33
|
+
fi
|
|
34
|
+
if grep -qF 'errors=$((errors + 1))' "$TEST_SH"; then
|
|
35
|
+
ok "scripts/test.sh uses the safe assignment form"
|
|
36
|
+
else
|
|
37
|
+
bad "scripts/test.sh missing safe assignment accumulator"
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# 2. Behavioral proof: replicate the runner's exact accumulation loop
|
|
41
|
+
# under `set -euo pipefail`, force the FIRST check to fail, and
|
|
42
|
+
# confirm execution reaches the end with a correct count.
|
|
43
|
+
result=$(
|
|
44
|
+
set -euo pipefail
|
|
45
|
+
errors=0
|
|
46
|
+
run_check() {
|
|
47
|
+
if eval "$2" >/dev/null 2>&1; then
|
|
48
|
+
:
|
|
49
|
+
else
|
|
50
|
+
errors=$((errors + 1))
|
|
51
|
+
fi
|
|
52
|
+
}
|
|
53
|
+
run_check "first-fails" "false" # errors goes 0 -> 1
|
|
54
|
+
run_check "second-ok" "true"
|
|
55
|
+
run_check "third-fails" "false" # errors goes 1 -> 2
|
|
56
|
+
echo "REACHED_END errors=$errors"
|
|
57
|
+
)
|
|
58
|
+
status=$?
|
|
59
|
+
|
|
60
|
+
if [ "$status" -eq 0 ] && printf '%s' "$result" | grep -qF 'REACHED_END errors=2'; then
|
|
61
|
+
ok "runner reaches summary after failures with correct count (errors=2)"
|
|
62
|
+
else
|
|
63
|
+
bad "runner aborted early or miscounted (status=$status, out='$result')"
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
# 3. Sanity: the old idiom genuinely aborts (proves the test is meaningful).
|
|
67
|
+
# Run as a separate `bash` process so `set -e` is fully active (a `|| ...`
|
|
68
|
+
# on a subshell would disable `set -e` inside it and hide the abort).
|
|
69
|
+
control_tmp=$(mktemp)
|
|
70
|
+
cat > "$control_tmp" <<'CTRL'
|
|
71
|
+
set -euo pipefail
|
|
72
|
+
errors=0
|
|
73
|
+
((errors++)) # pre-value 0 -> command returns 1 -> set -e aborts here
|
|
74
|
+
echo "SHOULD_NOT_PRINT"
|
|
75
|
+
CTRL
|
|
76
|
+
control_out=$(bash "$control_tmp" 2>/dev/null); control_status=$?
|
|
77
|
+
rm -f "$control_tmp"
|
|
78
|
+
if [ "$control_status" -ne 0 ] && ! printf '%s' "$control_out" | grep -qF 'SHOULD_NOT_PRINT'; then
|
|
79
|
+
ok "control: ((errors++)) from 0 aborts under set -e (bug is real)"
|
|
80
|
+
else
|
|
81
|
+
bad "control: ((errors++)) did not abort (status=$control_status) — test premise invalid"
|
|
82
|
+
fi
|
|
83
|
+
|
|
84
|
+
echo ""
|
|
85
|
+
echo "test_test_runner_no_abort: $pass passed, $fail failed"
|
|
86
|
+
[ "$fail" -eq 0 ] || exit 1
|