eagle-mem 4.12.1 → 4.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +43 -0
- package/README.md +4 -0
- package/db/migrate.sh +11 -1
- package/docs/agent-compatibility/claude-code.md +27 -0
- package/docs/agent-compatibility/codex.md +1 -0
- package/docs/reviews/2026-06-10-full-spectrum-hardening.md +90 -0
- package/hooks/post-tool-use.sh +73 -22
- package/hooks/session-end.sh +10 -0
- package/hooks/session-start.sh +24 -1
- package/hooks/stop.sh +7 -2
- package/integrations/google_antigravity_hook.py +61 -26
- package/lib/codex-hooks.sh +5 -1
- package/lib/common.sh +104 -4
- package/lib/db-core.sh +28 -0
- package/lib/db-events.sh +13 -0
- package/lib/db-observations.sh +10 -3
- package/lib/db-sessions.sh +10 -1
- package/lib/db-summaries.sh +4 -1
- package/lib/hooks-sessionstart.sh +32 -13
- package/lib/hooks.sh +37 -0
- package/lib/provider.sh +10 -2
- package/lib/updater.sh +16 -2
- package/package.json +1 -1
- package/scripts/enrich-summary.sh +4 -1
- package/scripts/install.sh +3 -41
- package/scripts/logs.sh +44 -12
- package/scripts/orchestrate.sh +34 -4
- package/scripts/session.sh +5 -0
- package/scripts/statusline-em.sh +5 -1
- package/scripts/tasks.sh +6 -3
- package/scripts/test.sh +31 -3
- package/scripts/update.sh +3 -17
- package/tests/test_compaction_survival_matrix.sh +13 -1
- package/tests/test_context_budget.sh +117 -0
- package/tests/test_data_integrity_hardening.sh +115 -0
- package/tests/test_mod_tracker_concurrency.sh +142 -0
- package/tests/test_redaction_coverage.sh +183 -0
- package/tests/test_reliability_retention.sh +75 -0
- package/tests/test_rust_migration_plan.sh +8 -1
- package/tests/test_test_runner_no_abort.sh +86 -0
package/scripts/logs.sh
CHANGED
|
@@ -26,13 +26,52 @@ Commands:
|
|
|
26
26
|
EOF
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
+
# Canonicalize an absolute path, resolving symlinks and '..'. Prefers realpath,
|
|
30
|
+
# falls back to python3, then to a best-effort directory canonicalization.
|
|
31
|
+
canonicalize_path() {
|
|
32
|
+
local p="$1"
|
|
33
|
+
if command -v realpath >/dev/null 2>&1; then
|
|
34
|
+
realpath "$p" 2>/dev/null && return 0
|
|
35
|
+
fi
|
|
36
|
+
if command -v python3 >/dev/null 2>&1; then
|
|
37
|
+
python3 -c 'import os,sys; print(os.path.realpath(sys.argv[1]))' "$p" 2>/dev/null && return 0
|
|
38
|
+
fi
|
|
39
|
+
local dir base
|
|
40
|
+
dir=$(dirname "$p")
|
|
41
|
+
base=$(basename "$p")
|
|
42
|
+
dir=$(cd -P "$dir" 2>/dev/null && pwd) || return 1
|
|
43
|
+
printf '%s/%s\n' "$dir" "$base"
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
# Verify that $1 (a canonical path) is contained within $2 (a canonical dir).
|
|
47
|
+
path_within() {
|
|
48
|
+
local path="$1" root="$2"
|
|
49
|
+
case "$path" in
|
|
50
|
+
"$root"/*) return 0 ;;
|
|
51
|
+
*) return 1 ;;
|
|
52
|
+
esac
|
|
53
|
+
}
|
|
54
|
+
|
|
29
55
|
resolve_log_path() {
|
|
30
56
|
local ref="${1:-}"
|
|
31
57
|
local runs_root="${EAGLE_RUNS_DIR%/}" rel_ref
|
|
58
|
+
local canon_root resolved canon_resolved
|
|
59
|
+
canon_root=$(canonicalize_path "$runs_root" 2>/dev/null) || canon_root="$runs_root"
|
|
60
|
+
|
|
61
|
+
emit_if_contained() {
|
|
62
|
+
local candidate="$1"
|
|
63
|
+
[ -L "$candidate" ] && return 1
|
|
64
|
+
[ -f "$candidate" ] || return 1
|
|
65
|
+
local canon
|
|
66
|
+
canon=$(canonicalize_path "$candidate" 2>/dev/null) || return 1
|
|
67
|
+
path_within "$canon" "$canon_root" || return 1
|
|
68
|
+
printf '%s\n' "$candidate"
|
|
69
|
+
return 0
|
|
70
|
+
}
|
|
71
|
+
|
|
32
72
|
if [ -z "$ref" ]; then
|
|
33
73
|
ls -t "$runs_root"/*.log 2>/dev/null | while IFS= read -r candidate; do
|
|
34
|
-
|
|
35
|
-
[ -f "$candidate" ] && printf '%s\n' "$candidate" && break
|
|
74
|
+
emit_if_contained "$candidate" && break
|
|
36
75
|
done
|
|
37
76
|
return 0
|
|
38
77
|
fi
|
|
@@ -43,21 +82,14 @@ resolve_log_path() {
|
|
|
43
82
|
rel_ref="${ref#"$runs_root"/}"
|
|
44
83
|
[ "$rel_ref" != "$ref" ] || return 1
|
|
45
84
|
case "$rel_ref" in ""|*/*) return 1 ;; esac
|
|
46
|
-
|
|
47
|
-
[ -f "$runs_root/$rel_ref" ] && printf '%s\n' "$runs_root/$rel_ref" && return 0
|
|
85
|
+
emit_if_contained "$runs_root/$rel_ref" && return 0
|
|
48
86
|
return 1
|
|
49
87
|
;;
|
|
50
88
|
*/*) return 1 ;;
|
|
51
89
|
esac
|
|
52
90
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
return 0
|
|
56
|
-
fi
|
|
57
|
-
if [ ! -L "$runs_root/$ref.log" ] && [ -f "$runs_root/$ref.log" ]; then
|
|
58
|
-
printf '%s\n' "$runs_root/$ref.log"
|
|
59
|
-
return 0
|
|
60
|
-
fi
|
|
91
|
+
emit_if_contained "$runs_root/$ref" && return 0
|
|
92
|
+
emit_if_contained "$runs_root/$ref.log" && return 0
|
|
61
93
|
return 1
|
|
62
94
|
}
|
|
63
95
|
|
package/scripts/orchestrate.sh
CHANGED
|
@@ -203,6 +203,20 @@ orchestrate_worker_effort() {
|
|
|
203
203
|
esac
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
+
# Worker autonomy level. Defaults to "safe" so spawned workers cannot run with
|
|
207
|
+
# unattended full-access sandbox/approval settings on prompts assembled from
|
|
208
|
+
# DB-stored lane descriptions (stored-prompt-injection surface). Set
|
|
209
|
+
# [orchestration] worker_autonomy = "danger" to opt back into the previous
|
|
210
|
+
# full-access behavior.
|
|
211
|
+
orchestrate_worker_autonomy() {
|
|
212
|
+
local mode
|
|
213
|
+
mode=$(eagle_config_get "orchestration" "worker_autonomy" "safe")
|
|
214
|
+
case "$mode" in
|
|
215
|
+
danger|danger-full-access|full) echo "danger" ;;
|
|
216
|
+
*) echo "safe" ;;
|
|
217
|
+
esac
|
|
218
|
+
}
|
|
219
|
+
|
|
206
220
|
orchestrate_require_worker_cli() {
|
|
207
221
|
case "$1" in
|
|
208
222
|
codex)
|
|
@@ -775,6 +789,22 @@ orchestrate_worker_run_script() {
|
|
|
775
789
|
local complete_note="Worker exited 0; log: $log_path"
|
|
776
790
|
local block_note="Worker exited non-zero; log: $log_path"
|
|
777
791
|
|
|
792
|
+
# Autonomy gating: full-access unattended execution is opt-in (see
|
|
793
|
+
# orchestrate_worker_autonomy). The default "safe" mode keeps a sandbox and
|
|
794
|
+
# approval/permission gate in place because lane prompts come from
|
|
795
|
+
# DB-stored descriptions (stored-prompt-injection surface).
|
|
796
|
+
local autonomy codex_sandbox codex_approval claude_permission
|
|
797
|
+
autonomy=$(orchestrate_worker_autonomy)
|
|
798
|
+
if [ "$autonomy" = "danger" ]; then
|
|
799
|
+
codex_sandbox="danger-full-access"
|
|
800
|
+
codex_approval='approval_policy="never"'
|
|
801
|
+
claude_permission="dontAsk"
|
|
802
|
+
else
|
|
803
|
+
codex_sandbox="workspace-write"
|
|
804
|
+
codex_approval='approval_policy="on-request"'
|
|
805
|
+
claude_permission="acceptEdits"
|
|
806
|
+
fi
|
|
807
|
+
|
|
778
808
|
{
|
|
779
809
|
echo '#!/usr/bin/env bash'
|
|
780
810
|
echo 'set +e'
|
|
@@ -786,12 +816,12 @@ orchestrate_worker_run_script() {
|
|
|
786
816
|
printf 'export EAGLE_ORCHESTRATION_LANE=%q\n' "$lane_key"
|
|
787
817
|
printf 'export EAGLE_ORCHESTRATION_WORKTREE=%q\n' "$worktree"
|
|
788
818
|
if [ "$worker_agent" = "codex" ]; then
|
|
789
|
-
printf 'codex exec --cd %q --model %q -c %q -c %q --sandbox
|
|
790
|
-
"$worktree" "$worker_model" "$effort_config"
|
|
819
|
+
printf 'codex exec --cd %q --model %q -c %q -c %q --sandbox %q --output-last-message %q - < %q\n' \
|
|
820
|
+
"$worktree" "$worker_model" "$effort_config" "$codex_approval" "$codex_sandbox" "$last_message_path" "$prompt_file"
|
|
791
821
|
else
|
|
792
822
|
printf 'prompt=$(cat %q)\n' "$prompt_file"
|
|
793
|
-
printf 'claude -p --model %q --effort %q --permission-mode
|
|
794
|
-
"$worker_model" "$worker_effort"
|
|
823
|
+
printf 'claude -p --model %q --effort %q --permission-mode %q --output-format text "$prompt"\n' \
|
|
824
|
+
"$worker_model" "$worker_effort" "$claude_permission"
|
|
795
825
|
fi
|
|
796
826
|
echo 'rc=$?'
|
|
797
827
|
printf 'printf "%%s\\n" "$rc" > %q\n' "$exit_path"
|
package/scripts/session.sh
CHANGED
package/scripts/statusline-em.sh
CHANGED
|
@@ -65,7 +65,11 @@ eagle_mem_statusline_stats() {
|
|
|
65
65
|
project_scope=$(eagle_recall_project_scope_from_cwd "${current_dir:-$project_dir}" "$project_key")
|
|
66
66
|
project_condition=$(eagle_sql_project_scope_condition "project" "$project_scope")
|
|
67
67
|
|
|
68
|
-
|
|
68
|
+
# busy_timeout so a momentary SQLITE_BUSY (this is the hottest standalone
|
|
69
|
+
# query during live sessions) waits for the lock instead of exiting non-zero,
|
|
70
|
+
# which would otherwise escalate to an integrity-status mislabel below.
|
|
71
|
+
stats=$("$sqlite_bin" "$em_db" "PRAGMA busy_timeout=10000;
|
|
72
|
+
SELECT
|
|
69
73
|
COUNT(*) || '|' ||
|
|
70
74
|
(SELECT COUNT(*) FROM agent_memories WHERE $project_condition) || '|' ||
|
|
71
75
|
COALESCE(MAX(COALESCE(last_activity_at, started_at)), 'never')
|
package/scripts/tasks.sh
CHANGED
|
@@ -153,10 +153,13 @@ tasks_list() {
|
|
|
153
153
|
in_progress)
|
|
154
154
|
icon="${CYAN}>${RESET}"
|
|
155
155
|
marker=" ${CYAN}[in_progress]${RESET}"
|
|
156
|
-
# Staleness check for discipline
|
|
156
|
+
# Staleness check for discipline. Route through eagle_db (busy_timeout
|
|
157
|
+
# + FTS5-capable sqlite3) and escape $updated_at — it is a DB-read value
|
|
158
|
+
# interpolated back into SQL, so a quote would be second-order injection.
|
|
157
159
|
if [ -n "$updated_at" ]; then
|
|
158
|
-
local age_days
|
|
159
|
-
|
|
160
|
+
local age_days updated_at_sql
|
|
161
|
+
updated_at_sql=$(eagle_sql_escape "$updated_at")
|
|
162
|
+
age_days=$(eagle_db "SELECT (julianday('now') - julianday('$updated_at_sql'));" 2>/dev/null | cut -d. -f1)
|
|
160
163
|
if [ -n "$age_days" ] && [ "$age_days" -gt 7 ]; then
|
|
161
164
|
marker+=" ${RED}[STALE - ${age_days}d]${RESET}"
|
|
162
165
|
fi
|
package/scripts/test.sh
CHANGED
|
@@ -16,16 +16,31 @@ eagle_banner
|
|
|
16
16
|
eagle_header "Smoke Tests"
|
|
17
17
|
|
|
18
18
|
errors=0
|
|
19
|
+
skipped=0
|
|
19
20
|
|
|
20
21
|
run_check() {
|
|
21
22
|
local name="$1"
|
|
22
23
|
local cmd="$2"
|
|
23
24
|
echo -e " ${BOLD}→${RESET} $name"
|
|
24
|
-
|
|
25
|
+
local rc=0
|
|
26
|
+
eval "$cmd" >/dev/null 2>&1 || rc=$?
|
|
27
|
+
if [ "$rc" -eq 0 ]; then
|
|
25
28
|
eagle_ok "$name"
|
|
29
|
+
elif [ "$rc" -eq 2 ]; then
|
|
30
|
+
# Exit code 2 = the check skipped itself because its preconditions are
|
|
31
|
+
# absent in this environment — e.g. a dev-only contract test running
|
|
32
|
+
# from a published install, where the maintainer doc it guards is not
|
|
33
|
+
# shipped in the npm package. A skip is neither a pass nor a failure, so
|
|
34
|
+
# it must not increment the error count or fail the suite.
|
|
35
|
+
eagle_info "skipped: $name (preconditions not present here)"
|
|
36
|
+
skipped=$((skipped + 1))
|
|
26
37
|
else
|
|
27
38
|
eagle_fail "$name"
|
|
28
|
-
((errors++))
|
|
39
|
+
# Assignment form (not ((errors++))) so a failing check does not abort
|
|
40
|
+
# the whole runner under `set -e`: ((errors++)) returns exit 1 when the
|
|
41
|
+
# pre-increment value is 0, killing the suite at the first failure and
|
|
42
|
+
# skipping the failure-count summary below.
|
|
43
|
+
errors=$((errors + 1))
|
|
29
44
|
fi
|
|
30
45
|
}
|
|
31
46
|
|
|
@@ -69,11 +84,24 @@ run_check "Recall Observability (UserPromptSubmit recall event)" "bash \"$SCRIPT
|
|
|
69
84
|
run_check "Eagle Event Log (hook/action observability)" "bash \"$SCRIPTS_DIR/../tests/test_eagle_events.sh\""
|
|
70
85
|
run_check "Dashboard Surface (local HTML memory view)" "bash \"$SCRIPTS_DIR/../tests/test_dashboard.sh\""
|
|
71
86
|
run_check "Clean Session Capture (capture_source, fill-only upsert, no clobber)" "bash \"$SCRIPTS_DIR/../tests/test_clean_session_capture.sh\""
|
|
87
|
+
run_check "Context Budget (SessionStart injection ceiling: normal unchanged, pathological capped + logged)" "bash \"$SCRIPTS_DIR/../tests/test_context_budget.sh\""
|
|
72
88
|
run_check "CLAUDE.md Capture Doctrine (installer rewrites outdated section)" "bash \"$SCRIPTS_DIR/../tests/test_claude_md_capture_doctrine.sh\""
|
|
89
|
+
run_check "Redaction Coverage (provider input, recall events, enrich job, autonomy, log paths)" "bash \"$SCRIPTS_DIR/../tests/test_redaction_coverage.sh\""
|
|
90
|
+
run_check "Data Integrity Hardening (migrate idempotency, SQL escaping, summary precedence)" "bash \"$SCRIPTS_DIR/../tests/test_data_integrity_hardening.sh\""
|
|
91
|
+
run_check "Mod-Tracker Concurrency (lock TTL, no lost append, observation dedup race)" "bash \"$SCRIPTS_DIR/../tests/test_mod_tracker_concurrency.sh\""
|
|
92
|
+
run_check "Reliability Retention (scan in-flight vs freshness, eagle_events prune)" "bash \"$SCRIPTS_DIR/../tests/test_reliability_retention.sh\""
|
|
93
|
+
run_check "Test Runner No-Abort (failing check does not kill the suite under set -e)" "bash \"$SCRIPTS_DIR/../tests/test_test_runner_no_abort.sh\""
|
|
94
|
+
# Python lane: the native Antigravity hook (mocked). Subshell-wrapped so a
|
|
95
|
+
# missing python3 yields a clean skip (exit 2) instead of aborting the suite.
|
|
96
|
+
run_check "Antigravity Hook (native Python SDK lifecycle, mocked)" "( command -v python3 >/dev/null 2>&1 || exit 2; python3 \"$SCRIPTS_DIR/../tests/test_antigravity_hook.py\" )"
|
|
73
97
|
|
|
74
98
|
echo ""
|
|
75
99
|
if [ "$errors" -eq 0 ]; then
|
|
76
|
-
|
|
100
|
+
if [ "$skipped" -gt 0 ]; then
|
|
101
|
+
eagle_ok "All smoke tests passed ($skipped skipped — preconditions not present here)"
|
|
102
|
+
else
|
|
103
|
+
eagle_ok "All smoke tests passed"
|
|
104
|
+
fi
|
|
77
105
|
|
|
78
106
|
# Auto-verify the 7 core features in the database
|
|
79
107
|
for feat in "compaction-survival" "feature-verification" "grok-cli-integration" "agent-orchestration" "Cross Agent Memory" "Installer And Updater" "Code Scan And Index"; do
|
package/scripts/update.sh
CHANGED
|
@@ -86,23 +86,9 @@ fi
|
|
|
86
86
|
# ─── Re-register hooks (idempotent) ───────────────────────
|
|
87
87
|
|
|
88
88
|
if [ "$claude_found" = true ] && [ -f "$SETTINGS" ] && command -v jq &>/dev/null; then
|
|
89
|
-
#
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
eagle_clean_hook_entries "$SETTINGS" "PreToolUse" "$EAGLE_MEM_DIR/hooks/pre-tool-use.sh"
|
|
93
|
-
|
|
94
|
-
eagle_patch_hook "$SETTINGS" "SessionStart" "" "$EAGLE_MEM_DIR/hooks/session-start.sh"
|
|
95
|
-
eagle_patch_hook "$SETTINGS" "Stop" "" "bash \"$EAGLE_MEM_DIR/hooks/stop.sh\""
|
|
96
|
-
eagle_patch_hook "$SETTINGS" "PostToolUse" "Read|Write|Edit|Bash|TaskUpdate" "$EAGLE_MEM_DIR/hooks/post-tool-use.sh"
|
|
97
|
-
eagle_patch_hook "$SETTINGS" "TaskCreated" "" "$EAGLE_MEM_DIR/hooks/post-tool-use.sh"
|
|
98
|
-
eagle_patch_hook "$SETTINGS" "TaskCompleted" "" "$EAGLE_MEM_DIR/hooks/post-tool-use.sh"
|
|
99
|
-
eagle_patch_hook "$SETTINGS" "SessionEnd" "" "$EAGLE_MEM_DIR/hooks/session-end.sh"
|
|
100
|
-
eagle_patch_hook "$SETTINGS" "UserPromptSubmit" "" "$EAGLE_MEM_DIR/hooks/user-prompt-submit.sh"
|
|
101
|
-
eagle_patch_hook "$SETTINGS" "PreToolUse" "Bash|Read|Edit|Write" "$EAGLE_MEM_DIR/hooks/pre-tool-use.sh"
|
|
102
|
-
|
|
103
|
-
# Allow agent-issued session capture to run without a permission prompt
|
|
104
|
-
eagle_patch_permission_allow "$SETTINGS" "Bash(eagle-mem session save:*)"
|
|
105
|
-
|
|
89
|
+
# Single source of truth for the Claude hook set (see lib/hooks.sh); quiet
|
|
90
|
+
# mode — the updater prints one summary line instead of per-hook lines.
|
|
91
|
+
eagle_register_claude_hooks "$SETTINGS"
|
|
106
92
|
eagle_ok "Hooks registered"
|
|
107
93
|
fi
|
|
108
94
|
|
|
@@ -7,7 +7,12 @@ set -euo pipefail
|
|
|
7
7
|
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
8
8
|
EAGLE_BIN="$ROOT_DIR/bin/eagle-mem"
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
# Use a neutral system temp dir, NOT one inside $ROOT_DIR. From a published
|
|
11
|
+
# install $ROOT_DIR lives under node_modules/, and the code scanner excludes
|
|
12
|
+
# node_modules — so a fixture repo created here would index 0 files and the
|
|
13
|
+
# post-compact "Relevant Code" recall would never appear (the suite is run from
|
|
14
|
+
# the installed package via `eagle-mem test`).
|
|
15
|
+
tmp_dir=$(mktemp -d "${TMPDIR:-/tmp}/eagle-compaction-survival.XXXXXX")
|
|
11
16
|
trap 'rm -rf "$tmp_dir"' EXIT
|
|
12
17
|
|
|
13
18
|
export HOME="$tmp_dir/home"
|
|
@@ -190,6 +195,13 @@ assert_json "$compaction_json" '
|
|
|
190
195
|
and .metrics.semantic_graph_nodes >= 5
|
|
191
196
|
' "compaction --json did not report durable survival metrics"
|
|
192
197
|
|
|
198
|
+
# Index the fixture code deterministically so the post-compact recall can
|
|
199
|
+
# surface "Relevant Code". This previously relied on a background auto-index
|
|
200
|
+
# that only won the race from a warm source checkout, so the assertion failed
|
|
201
|
+
# when the suite ran from a published install via `eagle-mem test`. The
|
|
202
|
+
# `index` command is synchronous, so chunks exist by the time the hook runs.
|
|
203
|
+
( cd "$repo" && "$EAGLE_BIN" index >/dev/null 2>&1 )
|
|
204
|
+
|
|
193
205
|
eagle_upsert_session "$post_session" "$project" "$repo" "test-model" "test" "codex" >/dev/null
|
|
194
206
|
hook_input=$(jq -nc \
|
|
195
207
|
--arg sid "$post_session" \
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# ═══════════════════════════════════════════════════════════
|
|
3
|
+
# Eagle Mem — SessionStart injection budget regression suite
|
|
4
|
+
# Proves the global recall-injection ceiling:
|
|
5
|
+
# (a) normal-sized recall is emitted UNCHANGED (no trimming)
|
|
6
|
+
# (b) a pathologically large recall is capped at the budget,
|
|
7
|
+
# drops whole low-priority sections from the bottom, keeps the
|
|
8
|
+
# highest-priority top sections, and LOGS a trim (observable)
|
|
9
|
+
# ═══════════════════════════════════════════════════════════
|
|
10
|
+
set -euo pipefail
|
|
11
|
+
|
|
12
|
+
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
13
|
+
tmp_dir=$(mktemp -d)
|
|
14
|
+
trap 'rm -rf "$tmp_dir"' EXIT
|
|
15
|
+
|
|
16
|
+
fail() { echo "FAIL: $1" >&2; exit 1; }
|
|
17
|
+
|
|
18
|
+
assert_eq() {
|
|
19
|
+
[ "$1" = "$2" ] || fail "$3 (expected='$2' actual='$1')"
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
assert_contains() {
|
|
23
|
+
case "$1" in *"$2"*) ;; *) fail "$3 (missing: $2)" ;; esac
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
assert_not_contains() {
|
|
27
|
+
case "$1" in *"$2"*) fail "$3 (unexpectedly present: $2)" ;; esac
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
. "$ROOT_DIR/lib/common.sh"
|
|
31
|
+
|
|
32
|
+
trim_count_file="$tmp_dir/.trim-count"
|
|
33
|
+
export EAGLE_INJECT_TRIM_COUNT="$trim_count_file"
|
|
34
|
+
|
|
35
|
+
# ─── Budget helper: sane default + floor against self-defeating values ──
|
|
36
|
+
default_budget=$(eagle_sessionstart_inject_budget)
|
|
37
|
+
[ "$default_budget" -ge 4000 ] 2>/dev/null \
|
|
38
|
+
|| fail "default budget unexpectedly small ($default_budget)"
|
|
39
|
+
|
|
40
|
+
floored=$(EAGLE_MEM_DIR="$tmp_dir" bash -c "
|
|
41
|
+
. '$ROOT_DIR/lib/common.sh'
|
|
42
|
+
mkdir -p '$tmp_dir'
|
|
43
|
+
printf '[context_budget]\nsessionstart_chars = 10\n' > '$tmp_dir/config.toml'
|
|
44
|
+
eagle_sessionstart_inject_budget
|
|
45
|
+
")
|
|
46
|
+
[ "$floored" -ge 4000 ] 2>/dev/null \
|
|
47
|
+
|| fail "tiny configured budget was not floored ($floored)"
|
|
48
|
+
|
|
49
|
+
# ─── (a) Normal-sized recall — emitted UNCHANGED ──────────────────────
|
|
50
|
+
normal_body="=== Eagle Mem: Project Overview ===
|
|
51
|
+
A small project overview that fits comfortably.
|
|
52
|
+
=== Eagle Mem: Recent Recall ===
|
|
53
|
+
One recent session summary.
|
|
54
|
+
=== Eagle Mem: Stored Memories ===
|
|
55
|
+
- [project][claude] Some memory: short description (today)
|
|
56
|
+
=== Eagle Mem: Core Files ===
|
|
57
|
+
- main.sh
|
|
58
|
+
=== Eagle Mem: Working Set ===
|
|
59
|
+
- app.ts (2 edits)"
|
|
60
|
+
|
|
61
|
+
normal_out=$(printf '%s' "$normal_body" | eagle_trim_inject_body 24000)
|
|
62
|
+
normal_dropped=$(cat "$trim_count_file")
|
|
63
|
+
|
|
64
|
+
assert_eq "$normal_out" "$normal_body" "normal recall was modified by the budget"
|
|
65
|
+
assert_eq "$normal_dropped" "0" "normal recall reported a non-zero trim"
|
|
66
|
+
|
|
67
|
+
# ─── (b) Pathological recall — capped, low-priority dropped, top kept ──
|
|
68
|
+
# Build a body whose top section is small but whose trailing sections are
|
|
69
|
+
# huge, so the ceiling must engage.
|
|
70
|
+
huge=$(head -c 9000 /dev/zero | tr '\0' 'x')
|
|
71
|
+
big_body="=== Eagle Mem: Project Overview ===
|
|
72
|
+
KEEP_OVERVIEW_MARKER top-priority overview must survive.
|
|
73
|
+
=== Eagle Mem: Recent Recall ===
|
|
74
|
+
$huge
|
|
75
|
+
=== Eagle Mem: Tasks ===
|
|
76
|
+
$huge
|
|
77
|
+
=== Eagle Mem: Core Files ===
|
|
78
|
+
$huge
|
|
79
|
+
=== Eagle Mem: Working Set ===
|
|
80
|
+
$huge"
|
|
81
|
+
|
|
82
|
+
budget=12000
|
|
83
|
+
big_out=$(printf '%s' "$big_body" | eagle_trim_inject_body "$budget")
|
|
84
|
+
big_dropped=$(cat "$trim_count_file")
|
|
85
|
+
|
|
86
|
+
[ "${#big_out}" -le "$budget" ] 2>/dev/null \
|
|
87
|
+
|| fail "trimmed body still exceeds budget (${#big_out} > $budget)"
|
|
88
|
+
[ "$big_dropped" -gt 0 ] 2>/dev/null \
|
|
89
|
+
|| fail "pathological recall did not report any trimmed sections"
|
|
90
|
+
assert_contains "$big_out" "KEEP_OVERVIEW_MARKER" \
|
|
91
|
+
"top-priority Overview section was dropped"
|
|
92
|
+
assert_not_contains "$big_out" "=== Eagle Mem: Working Set ===" \
|
|
93
|
+
"lowest-priority Working Set section survived past the budget"
|
|
94
|
+
|
|
95
|
+
# ─── Termination safety: a single oversized first section is kept whole ──
|
|
96
|
+
oversized=$(head -c 20000 /dev/zero | tr '\0' 'y')
|
|
97
|
+
solo_body="=== Eagle Mem: Project Overview ===
|
|
98
|
+
$oversized"
|
|
99
|
+
solo_out=$(printf '%s' "$solo_body" | eagle_trim_inject_body 5000)
|
|
100
|
+
solo_dropped=$(cat "$trim_count_file")
|
|
101
|
+
assert_contains "$solo_out" "=== Eagle Mem: Project Overview ===" \
|
|
102
|
+
"oversized lone section was discarded instead of kept whole"
|
|
103
|
+
assert_eq "$solo_dropped" "0" "oversized lone section reported a phantom trim"
|
|
104
|
+
|
|
105
|
+
# A body with no section markers is returned verbatim (no infinite loop).
|
|
106
|
+
nomarker_out=$(printf '%s' "$oversized" | eagle_trim_inject_body 5000)
|
|
107
|
+
assert_eq "${#nomarker_out}" "${#oversized}" "marker-less body was altered"
|
|
108
|
+
|
|
109
|
+
# ─── Observable trim: the hook logs a WARN when it trims ──────────────
|
|
110
|
+
# The hook engages the ceiling and logs a WARN with the dropped-section count
|
|
111
|
+
# whenever it trims; assert that observable surface exists in the hook.
|
|
112
|
+
grep -q 'SessionStart: injection over budget' "$ROOT_DIR/hooks/session-start.sh" \
|
|
113
|
+
|| fail "session-start.sh missing observable over-budget WARN log"
|
|
114
|
+
grep -q 'trimmed .* low-priority section' "$ROOT_DIR/hooks/session-start.sh" \
|
|
115
|
+
|| fail "session-start.sh WARN log does not report trimmed section count"
|
|
116
|
+
|
|
117
|
+
echo "PASS: SessionStart injection budget (normal unchanged, pathological capped + logged)"
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# ═══════════════════════════════════════════════════════════
|
|
3
|
+
# Eagle Mem — Data integrity hardening regression test
|
|
4
|
+
# Covers:
|
|
5
|
+
# - migrate.sh idempotency (re-run is a no-op, _migrations unchanged)
|
|
6
|
+
# - SQL escaping units (single quotes, FTS metachars, GLOB/LIKE patterns
|
|
7
|
+
# stored verbatim and queryable — no injection, no crash)
|
|
8
|
+
# - Summary precedence: eagle_insert_summary vs _fill_only — capture_source
|
|
9
|
+
# AND project stickiness on agent rows (finding #8 clobber)
|
|
10
|
+
# ═══════════════════════════════════════════════════════════
|
|
11
|
+
set -uo pipefail
|
|
12
|
+
|
|
13
|
+
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
14
|
+
tmp_dir=$(mktemp -d)
|
|
15
|
+
trap 'rm -rf "$tmp_dir"' EXIT
|
|
16
|
+
|
|
17
|
+
export EAGLE_MEM_DIR="$tmp_dir/em"
|
|
18
|
+
export EAGLE_AGENT_SOURCE="claude-code"
|
|
19
|
+
export EAGLE_MEM_DISABLE_HOOKS=1
|
|
20
|
+
mkdir -p "$EAGLE_MEM_DIR"
|
|
21
|
+
|
|
22
|
+
pass=0; fail=0
|
|
23
|
+
ok() { echo " ok: $1"; pass=$((pass+1)); }
|
|
24
|
+
bad() { echo " FAIL: $1" >&2; fail=$((fail+1)); }
|
|
25
|
+
|
|
26
|
+
bash "$ROOT_DIR/db/migrate.sh" >/dev/null 2>&1
|
|
27
|
+
|
|
28
|
+
. "$ROOT_DIR/lib/common.sh"
|
|
29
|
+
. "$ROOT_DIR/lib/db.sh"
|
|
30
|
+
|
|
31
|
+
# ── migrate.sh idempotency ──────────────────────────────────
|
|
32
|
+
mig_before=$(eagle_db "SELECT COUNT(*) || ':' || COALESCE(MAX(id),0) FROM _migrations;")
|
|
33
|
+
mig_out=$(bash "$ROOT_DIR/db/migrate.sh" 2>&1); mig_rc=$?
|
|
34
|
+
[ "$mig_rc" -eq 0 ] && ok "migrate 2nd run exits 0" || bad "migrate 2nd run rc=$mig_rc out=$mig_out"
|
|
35
|
+
case "$mig_out" in
|
|
36
|
+
*applied:*) bad "migrate 2nd run re-applied a migration: $mig_out" ;;
|
|
37
|
+
*) ok "migrate 2nd run applied nothing" ;;
|
|
38
|
+
esac
|
|
39
|
+
mig_after=$(eagle_db "SELECT COUNT(*) || ':' || COALESCE(MAX(id),0) FROM _migrations;")
|
|
40
|
+
[ "$mig_before" = "$mig_after" ] && ok "_migrations unchanged on 2nd run ($mig_after)" || bad "_migrations drifted: $mig_before -> $mig_after"
|
|
41
|
+
|
|
42
|
+
# ── SQL escaping units ──────────────────────────────────────
|
|
43
|
+
# Each payload is stored verbatim via eagle_insert_summary (escaped) and read
|
|
44
|
+
# back; a quote/metachar that broke out of the literal would error or truncate.
|
|
45
|
+
SID_Q="esc-quote-001"
|
|
46
|
+
eagle_upsert_session "$SID_Q" "esc/proj" "$tmp_dir" "" "test" "claude-code"
|
|
47
|
+
quote_payload="it's a \"trap\"; DROP TABLE summaries;-- '' or 1=1"
|
|
48
|
+
eagle_insert_summary "$SID_Q" "esc/proj" "$quote_payload" "" "" "" "" "[]" "[]" "" "" "" "" "claude-code" "agent"
|
|
49
|
+
got_q=$(eagle_db "SELECT request FROM summaries WHERE session_id='$SID_Q';")
|
|
50
|
+
[ "$got_q" = "$quote_payload" ] && ok "single-quote/SQL-injection payload stored verbatim" || bad "quote payload mangled -> '$got_q'"
|
|
51
|
+
# summaries table still present (no DROP executed)
|
|
52
|
+
have_tbl=$(eagle_db "SELECT name FROM sqlite_master WHERE type='table' AND name='summaries';")
|
|
53
|
+
[ "$have_tbl" = "summaries" ] && ok "summaries table intact after injection attempt" || bad "summaries table missing — injection executed"
|
|
54
|
+
|
|
55
|
+
SID_F="esc-fts-002"
|
|
56
|
+
eagle_upsert_session "$SID_F" "esc/proj" "$tmp_dir" "" "test" "claude-code"
|
|
57
|
+
fts_payload='NEAR("foo" bar)* AND col:val OR -baz {set}'
|
|
58
|
+
eagle_insert_summary "$SID_F" "esc/proj" "$fts_payload" "" "" "" "" "[]" "[]" "" "" "" "" "claude-code" "agent"
|
|
59
|
+
got_f=$(eagle_db "SELECT request FROM summaries WHERE session_id='$SID_F';")
|
|
60
|
+
[ "$got_f" = "$fts_payload" ] && ok "FTS-metachar payload stored verbatim" || bad "FTS payload mangled -> '$got_f'"
|
|
61
|
+
# FTS search over a benign token must not crash even though row has metachars
|
|
62
|
+
srch=$(eagle_search_summaries "foo" "" 5 2>&1); srch_rc=$?
|
|
63
|
+
[ "$srch_rc" -eq 0 ] && ok "FTS search over metachar corpus did not crash (rc=0)" || bad "FTS search crashed rc=$srch_rc: $srch"
|
|
64
|
+
|
|
65
|
+
SID_G="esc-glob-003"
|
|
66
|
+
eagle_upsert_session "$SID_G" "esc/proj" "$tmp_dir" "" "test" "claude-code"
|
|
67
|
+
glob_payload='100% OFF _now_ [a-z]* path\to\file'
|
|
68
|
+
eagle_insert_observation "$SID_G" "esc/proj" "Bash" "$glob_payload" "[]" "[]"
|
|
69
|
+
got_g=$(eagle_db "SELECT tool_input_summary FROM observations WHERE session_id='$SID_G';")
|
|
70
|
+
[ "$got_g" = "$glob_payload" ] && ok "GLOB/LIKE-metachar payload stored verbatim in observations" || bad "glob payload mangled -> '$got_g'"
|
|
71
|
+
|
|
72
|
+
# guardrail field escaping (single quote)
|
|
73
|
+
if declare -F eagle_add_guardrail >/dev/null 2>&1; then
|
|
74
|
+
eagle_add_guardrail "esc/proj" "don't touch '; DELETE FROM guardrails;--" "*.sh" "test" >/dev/null 2>&1
|
|
75
|
+
grc=$(eagle_db "SELECT rule FROM guardrails WHERE project='esc/proj' ORDER BY id DESC LIMIT 1;")
|
|
76
|
+
case "$grc" in *"don't touch"*) ok "guardrail rule with quote stored verbatim" ;; *) bad "guardrail rule mangled -> '$grc'" ;; esac
|
|
77
|
+
gcount=$(eagle_db "SELECT name FROM sqlite_master WHERE type='table' AND name='guardrails';")
|
|
78
|
+
[ "$gcount" = "guardrails" ] && ok "guardrails table intact after injection attempt" || bad "guardrails table dropped — injection executed"
|
|
79
|
+
else
|
|
80
|
+
ok "eagle_add_guardrail not present — skipped (non-fatal)"
|
|
81
|
+
fi
|
|
82
|
+
|
|
83
|
+
# ── Summary precedence / clobber (finding #8) ───────────────
|
|
84
|
+
# An agent-authored row must NOT have its project rekeyed by a later hook-path
|
|
85
|
+
# writer that arrives with a drifted project key.
|
|
86
|
+
SID_P="precedence-004"
|
|
87
|
+
eagle_upsert_session "$SID_P" "real/project" "$tmp_dir" "" "test" "claude-code"
|
|
88
|
+
eagle_insert_summary "$SID_P" "real/project" "agent req" "" "" "agent done" "" "[]" "[]" "" "agent decision" "" "" "claude-code" "agent"
|
|
89
|
+
[ "$(eagle_db "SELECT capture_source FROM summaries WHERE session_id='$SID_P';")" = "agent" ] && ok "P: capture_source=agent after agent insert" || bad "P: capture_source not agent"
|
|
90
|
+
[ "$(eagle_db "SELECT project FROM summaries WHERE session_id='$SID_P';")" = "real/project" ] && ok "P: project=real/project after agent insert" || bad "P: project wrong"
|
|
91
|
+
|
|
92
|
+
# Later hook writer with a DRIFTED project key + heuristic capture_source.
|
|
93
|
+
eagle_insert_summary "$SID_P" "DRIFTED/wrong-key" "" "" "" "" "" "[]" "[]" "" "" "" "" "claude-code" "hook"
|
|
94
|
+
proj_after=$(eagle_db "SELECT project FROM summaries WHERE session_id='$SID_P';")
|
|
95
|
+
[ "$proj_after" = "real/project" ] && ok "P: agent row project NOT clobbered by drifted hook write" || bad "P: project clobbered -> '$proj_after'"
|
|
96
|
+
[ "$(eagle_db "SELECT capture_source FROM summaries WHERE session_id='$SID_P';")" = "agent" ] && ok "P: capture_source stays agent" || bad "P: capture_source downgraded"
|
|
97
|
+
[ "$(eagle_db "SELECT completed FROM summaries WHERE session_id='$SID_P';")" = "agent done" ] && ok "P: agent completed preserved" || bad "P: completed clobbered"
|
|
98
|
+
|
|
99
|
+
# Contrast: a hook-authored row (capture_source != agent) SHOULD still accept a
|
|
100
|
+
# project correction (the normal drift-repair path must not regress).
|
|
101
|
+
SID_H="precedence-005"
|
|
102
|
+
eagle_upsert_session "$SID_H" "hookproj/a" "$tmp_dir" "" "test" "claude-code"
|
|
103
|
+
eagle_insert_summary "$SID_H" "hookproj/a" "hook req" "" "" "hook done" "" "[]" "[]" "" "" "" "" "claude-code" "hook"
|
|
104
|
+
eagle_insert_summary "$SID_H" "hookproj/b" "" "" "" "" "" "[]" "[]" "" "" "" "" "claude-code" "hook"
|
|
105
|
+
proj_h=$(eagle_db "SELECT project FROM summaries WHERE session_id='$SID_H';")
|
|
106
|
+
[ "$proj_h" = "hookproj/b" ] && ok "H: hook row project STILL repairable (no regression)" || bad "H: hook project not updated -> '$proj_h'"
|
|
107
|
+
|
|
108
|
+
# fill_only must never change project or downgrade capture_source on agent row
|
|
109
|
+
eagle_insert_summary_fill_only "$SID_P" "ANOTHER/drift" "" "" "fill learned" "" "" "[]" "[]" "" "" "" "" "claude-code" "enrich"
|
|
110
|
+
[ "$(eagle_db "SELECT project FROM summaries WHERE session_id='$SID_P';")" = "real/project" ] && ok "P: fill_only did not change project" || bad "P: fill_only changed project"
|
|
111
|
+
case "$(eagle_db "SELECT learned FROM summaries WHERE session_id='$SID_P';")" in *"fill learned"*) ok "P: fill_only filled empty learned gap" ;; *) bad "P: fill_only did not fill learned" ;; esac
|
|
112
|
+
|
|
113
|
+
echo ""
|
|
114
|
+
echo "test_data_integrity_hardening: $pass passed, $fail failed"
|
|
115
|
+
[ "$fail" -eq 0 ]
|