@onlooker-community/ecosystem 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/.release-please-manifest.json +2 -2
- package/CHANGELOG.md +8 -0
- package/hooks/hooks.json +4 -0
- package/package.json +1 -1
- package/plugins/historian/.claude-plugin/plugin.json +2 -2
- package/plugins/historian/CHANGELOG.md +7 -0
- package/plugins/historian/README.md +21 -7
- package/plugins/historian/config.json +19 -3
- package/plugins/historian/scripts/hooks/historian-prompt-submit.sh +262 -8
- package/plugins/historian/scripts/hooks/historian-session-end.sh +31 -0
- package/plugins/historian/scripts/lib/historian-embedder.sh +126 -0
- package/plugins/historian/scripts/lib/historian-retriever.sh +191 -0
- package/plugins/historian/scripts/lib/historian-storage.sh +47 -0
- package/scripts/hooks/memory-recall-tracker.sh +206 -0
- package/test/bats/historian-prompt-submit.bats +236 -0
- package/test/bats/memory-recall-tracker.bats +189 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Similarity-search retriever for Historian.
|
|
3
|
+
#
|
|
4
|
+
# Given a query embedding and a project key, walks every JSONL chunk
|
|
5
|
+
# record under ~/.onlooker/historian/<key>/sessions/, computes cosine
|
|
6
|
+
# similarity between the query vector and each chunk's `embedding`
|
|
7
|
+
# field, and returns the top-K candidates above a similarity floor.
|
|
8
|
+
#
|
|
9
|
+
# Chunks indexed before the embedder shipped don't have an `embedding`
|
|
10
|
+
# field; the retriever silently skips them rather than treating them as
|
|
11
|
+
# zero-similarity. They'll join the index after the next SessionEnd
|
|
12
|
+
# indexing pass.
|
|
13
|
+
|
|
14
|
+
# Aggregate every chunk record for the project. Returns a JSON array.
|
|
15
|
+
historian_retriever_load_all_chunks() {
|
|
16
|
+
local key="$1"
|
|
17
|
+
[[ -z "$key" ]] && { echo '[]'; return 0; }
|
|
18
|
+
|
|
19
|
+
local dir
|
|
20
|
+
dir=$(historian_sessions_dir "$key")
|
|
21
|
+
[[ -d "$dir" ]] || { echo '[]'; return 0; }
|
|
22
|
+
|
|
23
|
+
# Walk every *.jsonl, emit one JSON array. Use python3 to avoid the
|
|
24
|
+
# `jq -s` quirks around very large inputs and to control the chunk
|
|
25
|
+
# shape (drop the embedding from filtering candidates but keep it
|
|
26
|
+
# for the math).
|
|
27
|
+
python3 - "$dir" <<'PY'
|
|
28
|
+
import json, os, sys
|
|
29
|
+
dir_path = sys.argv[1]
|
|
30
|
+
out = []
|
|
31
|
+
try:
|
|
32
|
+
for name in sorted(os.listdir(dir_path)):
|
|
33
|
+
if not name.endswith(".jsonl"):
|
|
34
|
+
continue
|
|
35
|
+
path = os.path.join(dir_path, name)
|
|
36
|
+
try:
|
|
37
|
+
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
38
|
+
for line in f:
|
|
39
|
+
line = line.strip()
|
|
40
|
+
if not line:
|
|
41
|
+
continue
|
|
42
|
+
try:
|
|
43
|
+
rec = json.loads(line)
|
|
44
|
+
except json.JSONDecodeError:
|
|
45
|
+
continue
|
|
46
|
+
out.append(rec)
|
|
47
|
+
except OSError:
|
|
48
|
+
continue
|
|
49
|
+
except FileNotFoundError:
|
|
50
|
+
pass
|
|
51
|
+
print(json.dumps(out))
|
|
52
|
+
PY
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Compute top-K cosine-similarity matches against the query embedding.
|
|
56
|
+
#
|
|
57
|
+
# The chunks are streamed from disk one line at a time so memory and
|
|
58
|
+
# argv stay bounded as the per-project store grows. Earlier versions
|
|
59
|
+
# passed the full chunks array as an argv string, which would trip the
|
|
60
|
+
# OS ARG_MAX limit somewhere around tens of thousands of chunks; this
|
|
61
|
+
# form never holds more than one chunk in memory at a time.
|
|
62
|
+
#
|
|
63
|
+
# Usage: historian_retriever_search <sessions_dir>
|
|
64
|
+
# <query_embedding_json>
|
|
65
|
+
# <top_k> <min_similarity>
|
|
66
|
+
# <max_age_days> <current_session_id>
|
|
67
|
+
#
|
|
68
|
+
# Output: JSON array sorted by similarity descending, length <= top_k.
|
|
69
|
+
# Each entry: {
|
|
70
|
+
# chunk_id, session_id, similarity, age_days, body_redacted,
|
|
71
|
+
# chunk_index, start_turn_index, end_turn_index, source
|
|
72
|
+
# }
|
|
73
|
+
historian_retriever_search() {
|
|
74
|
+
local sessions_dir="${1:-}"
|
|
75
|
+
local query="${2:-[]}"
|
|
76
|
+
local top_k="${3:-5}"
|
|
77
|
+
local min_sim="${4:-0.55}"
|
|
78
|
+
local max_age_days="${5:-180}"
|
|
79
|
+
local current_session="${6:-}"
|
|
80
|
+
|
|
81
|
+
if [[ -z "$sessions_dir" || ! -d "$sessions_dir" ]]; then
|
|
82
|
+
echo '[]'
|
|
83
|
+
return 0
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
python3 - "$sessions_dir" "$top_k" "$min_sim" "$max_age_days" "$current_session" "$query" <<'PY'
|
|
87
|
+
import datetime, json, math, os, sys
|
|
88
|
+
|
|
89
|
+
sessions_dir = sys.argv[1]
|
|
90
|
+
top_k = int(sys.argv[2])
|
|
91
|
+
min_sim = float(sys.argv[3])
|
|
92
|
+
max_age_days = int(sys.argv[4])
|
|
93
|
+
current_session = sys.argv[5]
|
|
94
|
+
query = json.loads(sys.argv[6] or "null")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def cosine(a, b):
|
|
98
|
+
if not a or not b or len(a) != len(b):
|
|
99
|
+
return None
|
|
100
|
+
dot = 0.0
|
|
101
|
+
na = 0.0
|
|
102
|
+
nb = 0.0
|
|
103
|
+
for x, y in zip(a, b):
|
|
104
|
+
dot += x * y
|
|
105
|
+
na += x * x
|
|
106
|
+
nb += y * y
|
|
107
|
+
if na <= 0.0 or nb <= 0.0:
|
|
108
|
+
return None
|
|
109
|
+
return dot / (math.sqrt(na) * math.sqrt(nb))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def parse_iso(s):
|
|
113
|
+
if not s:
|
|
114
|
+
return None
|
|
115
|
+
try:
|
|
116
|
+
return datetime.datetime.strptime(s, "%Y-%m-%dT%H:%M:%SZ").replace(
|
|
117
|
+
tzinfo=datetime.timezone.utc
|
|
118
|
+
)
|
|
119
|
+
except ValueError:
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
if not isinstance(query, list) or not query:
|
|
124
|
+
print("[]")
|
|
125
|
+
sys.exit(0)
|
|
126
|
+
|
|
127
|
+
now = datetime.datetime.now(datetime.timezone.utc)
|
|
128
|
+
scored = []
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def consider(chunk):
|
|
132
|
+
sid = chunk.get("session_id", "")
|
|
133
|
+
# Exclude chunks from the session that is currently asking for
|
|
134
|
+
# context; a session retrieving its own chunks is a degenerate case.
|
|
135
|
+
if current_session and sid == current_session:
|
|
136
|
+
return
|
|
137
|
+
embedding = chunk.get("embedding")
|
|
138
|
+
if not isinstance(embedding, list) or not embedding:
|
|
139
|
+
return
|
|
140
|
+
sim = cosine(query, embedding)
|
|
141
|
+
if sim is None or sim < min_sim:
|
|
142
|
+
return
|
|
143
|
+
created = parse_iso(chunk.get("created_at"))
|
|
144
|
+
if created is None:
|
|
145
|
+
age_days = -1
|
|
146
|
+
else:
|
|
147
|
+
age_days = (now - created).days
|
|
148
|
+
if max_age_days > 0 and age_days > max_age_days:
|
|
149
|
+
return
|
|
150
|
+
scored.append(
|
|
151
|
+
{
|
|
152
|
+
"chunk_id": chunk.get("chunk_id"),
|
|
153
|
+
"session_id": sid,
|
|
154
|
+
"similarity": round(sim, 4),
|
|
155
|
+
"age_days": age_days,
|
|
156
|
+
"body_redacted": chunk.get("body_redacted", ""),
|
|
157
|
+
"chunk_index": chunk.get("chunk_index"),
|
|
158
|
+
"start_turn_index": chunk.get("start_turn_index"),
|
|
159
|
+
"end_turn_index": chunk.get("end_turn_index"),
|
|
160
|
+
"source": chunk.get("source", "local"),
|
|
161
|
+
}
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
names = sorted(os.listdir(sessions_dir))
|
|
167
|
+
except OSError:
|
|
168
|
+
names = []
|
|
169
|
+
|
|
170
|
+
for name in names:
|
|
171
|
+
if not name.endswith(".jsonl"):
|
|
172
|
+
continue
|
|
173
|
+
path = os.path.join(sessions_dir, name)
|
|
174
|
+
try:
|
|
175
|
+
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
176
|
+
for line in f:
|
|
177
|
+
line = line.strip()
|
|
178
|
+
if not line:
|
|
179
|
+
continue
|
|
180
|
+
try:
|
|
181
|
+
chunk = json.loads(line)
|
|
182
|
+
except json.JSONDecodeError:
|
|
183
|
+
continue
|
|
184
|
+
consider(chunk)
|
|
185
|
+
except OSError:
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
scored.sort(key=lambda c: c["similarity"], reverse=True)
|
|
189
|
+
print(json.dumps(scored[:top_k]))
|
|
190
|
+
PY
|
|
191
|
+
}
|
|
@@ -108,3 +108,50 @@ historian_storage_reset_session() {
|
|
|
108
108
|
[[ -f "$path" ]] || return 0
|
|
109
109
|
: > "$path"
|
|
110
110
|
}
|
|
111
|
+
|
|
112
|
+
# ============================================================================
|
|
113
|
+
# Retrieval watermarks (per-session, scoped to the project key)
|
|
114
|
+
# ============================================================================
|
|
115
|
+
|
|
116
|
+
# Path used to hold the per-session retrieval state (count + last_ms) so
|
|
117
|
+
# the rate gate persists across UserPromptSubmit invocations within a
|
|
118
|
+
# single session. We key on (project, session) so cross-session retrieval
|
|
119
|
+
# limits don't leak. The state file uses `last_ms` — an epoch-millisecond
|
|
120
|
+
# timestamp of the last retrieval the rate gate let through — and the
|
|
121
|
+
# cooldown gate compares (now_ms - last_ms) against cooldown_seconds.
|
|
122
|
+
historian_retrieval_state_path() {
|
|
123
|
+
local key="$1"
|
|
124
|
+
local session_id="$2"
|
|
125
|
+
local safe
|
|
126
|
+
safe=$(printf '%s' "$session_id" | tr -cd '[:alnum:]._-')
|
|
127
|
+
[[ -z "$safe" ]] && safe="unknown"
|
|
128
|
+
printf '%s/retrieval-state/%s.json' "$(historian_project_dir "$key")" "$safe"
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# Read the JSON document at the watermark path. Returns {"count":0,
|
|
132
|
+
# "last_ms":0} when the file is absent or unreadable.
|
|
133
|
+
historian_retrieval_state_read() {
|
|
134
|
+
local key="$1"
|
|
135
|
+
local session_id="$2"
|
|
136
|
+
local path
|
|
137
|
+
path=$(historian_retrieval_state_path "$key" "$session_id")
|
|
138
|
+
if [[ -f "$path" ]]; then
|
|
139
|
+
jq -c '. // {count:0, last_ms:0}' "$path" 2>/dev/null \
|
|
140
|
+
|| printf '%s' '{"count":0,"last_ms":0}'
|
|
141
|
+
else
|
|
142
|
+
printf '%s' '{"count":0,"last_ms":0}'
|
|
143
|
+
fi
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
# Bump the count and update last_ms.
|
|
147
|
+
historian_retrieval_state_write() {
|
|
148
|
+
local key="$1"
|
|
149
|
+
local session_id="$2"
|
|
150
|
+
local count="$3"
|
|
151
|
+
local last_ms="$4"
|
|
152
|
+
local path
|
|
153
|
+
path=$(historian_retrieval_state_path "$key" "$session_id")
|
|
154
|
+
mkdir -p "$(dirname "$path")" 2>/dev/null
|
|
155
|
+
jq -cn --argjson count "$count" --argjson last_ms "$last_ms" \
|
|
156
|
+
'{ count: $count, last_ms: $last_ms }' > "$path" 2>/dev/null
|
|
157
|
+
}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Onlooker Memory Recall Tracker
|
|
3
|
+
# Invoked by SessionStart (matcher: *) when a session boots, resumes, or
|
|
4
|
+
# restarts after compaction. Emits one canonical `memory.recalled` event
|
|
5
|
+
# per typed-memory file present at the project's per-checkout memory
|
|
6
|
+
# store path. This approximates the substrate signal "these memories are
|
|
7
|
+
# now in the model's context for the session about to begin".
|
|
8
|
+
#
|
|
9
|
+
# Curator's usage tracker (and any future plugin that reasons about how
|
|
10
|
+
# often a memory is in scope) depends on this. The signal is coarse —
|
|
11
|
+
# per-session-load rather than per-recall — but actionable in aggregate.
|
|
12
|
+
#
|
|
13
|
+
# Hook contract:
|
|
14
|
+
# - Always exits 0. Never blocks SessionStart.
|
|
15
|
+
# - No-ops when there is no project memory store, no git context, or
|
|
16
|
+
# when the source is `compact` (compaction is metadata-only; the
|
|
17
|
+
# same memories remain in scope, so re-emitting would double-count).
|
|
18
|
+
|
|
19
|
+
set -uo pipefail # No -e: never block session startup
|
|
20
|
+
|
|
21
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
22
|
+
# shellcheck source=../lib/validate-path.sh
|
|
23
|
+
source "$SCRIPT_DIR/../lib/validate-path.sh"
|
|
24
|
+
# shellcheck source=../lib/onlooker-schema.sh
|
|
25
|
+
source "$SCRIPT_DIR/../lib/onlooker-schema.sh"
|
|
26
|
+
|
|
27
|
+
# Standard hook health instrumentation. hook_register sets up the timer;
|
|
28
|
+
# hook_set_context exports _HOOK_SESSION_ID + _HOOK_EVENT_NAME so failures
|
|
29
|
+
# attach to the right session in ~/.onlooker/logs/hook-health.jsonl;
|
|
30
|
+
# hook_success / hook_failure close the health record.
|
|
31
|
+
hook_register "memory-recall-tracker" "Memory Recall Tracker" "Emits memory.recalled per typed memory file present at SessionStart"
|
|
32
|
+
|
|
33
|
+
INPUT=$(cat 2>/dev/null || true)
|
|
34
|
+
hook_set_context "$INPUT" "SessionStart"
|
|
35
|
+
|
|
36
|
+
CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
|
|
37
|
+
SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
|
|
38
|
+
SOURCE=$(printf '%s' "$INPUT" | jq -r '.source // "startup"' 2>/dev/null) || SOURCE="startup"
|
|
39
|
+
[[ -z "$CWD" ]] && CWD="$(pwd)"
|
|
40
|
+
[[ -z "$SESSION_ID" ]] && SESSION_ID="unknown"
|
|
41
|
+
|
|
42
|
+
# Compaction reloads the session with the same memories still in scope.
|
|
43
|
+
# Re-emitting on each compaction would inflate usage counts; skip.
|
|
44
|
+
if [[ "$SOURCE" == "compact" ]]; then
|
|
45
|
+
hook_success
|
|
46
|
+
exit 0
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
# Resolve project_key. Mirrors the SHA256-of-remote-URL + common-dir
|
|
51
|
+
# fallback every memory plugin uses (see plugins/librarian/scripts/lib/
|
|
52
|
+
# librarian-project-key.sh and friends): if there's no origin remote,
|
|
53
|
+
# anchor the key on git --git-common-dir rather than --show-toplevel so
|
|
54
|
+
# two worktrees of the same local-only repo share a key.
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
_memory_sha256_first12() {
|
|
58
|
+
local input="$1"
|
|
59
|
+
if command -v shasum >/dev/null 2>&1; then
|
|
60
|
+
printf '%s' "$input" | shasum -a 256 2>/dev/null | cut -c1-12
|
|
61
|
+
elif command -v sha256sum >/dev/null 2>&1; then
|
|
62
|
+
printf '%s' "$input" | sha256sum 2>/dev/null | cut -c1-12
|
|
63
|
+
else
|
|
64
|
+
return 1
|
|
65
|
+
fi
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
_memory_repo_root_via_common_dir() {
|
|
69
|
+
local cwd="$1"
|
|
70
|
+
local common_dir toplevel
|
|
71
|
+
common_dir=$(git -C "$cwd" rev-parse --git-common-dir 2>/dev/null) || return 0
|
|
72
|
+
# git-common-dir may be relative; resolve relative to cwd.
|
|
73
|
+
if [[ -n "$common_dir" && "$common_dir" != /* ]]; then
|
|
74
|
+
common_dir="$(cd "$cwd" && cd "$common_dir" 2>/dev/null && pwd -P)" || common_dir=""
|
|
75
|
+
fi
|
|
76
|
+
if [[ -n "$common_dir" && -d "$common_dir" ]]; then
|
|
77
|
+
# common_dir is typically the .git dir of the main repo; its
|
|
78
|
+
# parent is the canonical repo root (shared across worktrees).
|
|
79
|
+
toplevel="$(cd "$common_dir/.." 2>/dev/null && pwd -P)" || toplevel=""
|
|
80
|
+
fi
|
|
81
|
+
if [[ -z "$toplevel" ]]; then
|
|
82
|
+
toplevel=$(git -C "$cwd" rev-parse --show-toplevel 2>/dev/null || true)
|
|
83
|
+
[[ -n "$toplevel" ]] && toplevel="$(cd "$toplevel" 2>/dev/null && pwd -P)"
|
|
84
|
+
fi
|
|
85
|
+
printf '%s' "$toplevel"
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
PROJECT_KEY=""
|
|
89
|
+
if git -C "$CWD" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
|
|
90
|
+
REMOTE=$(git -C "$CWD" remote get-url origin 2>/dev/null || true)
|
|
91
|
+
if [[ -n "$REMOTE" ]]; then
|
|
92
|
+
PROJECT_KEY=$(_memory_sha256_first12 "remote:${REMOTE}")
|
|
93
|
+
else
|
|
94
|
+
ROOT=$(_memory_repo_root_via_common_dir "$CWD")
|
|
95
|
+
if [[ -n "$ROOT" ]]; then
|
|
96
|
+
PROJECT_KEY=$(_memory_sha256_first12 "root:${ROOT}")
|
|
97
|
+
fi
|
|
98
|
+
fi
|
|
99
|
+
fi
|
|
100
|
+
|
|
101
|
+
if [[ -z "$PROJECT_KEY" ]]; then
|
|
102
|
+
hook_success
|
|
103
|
+
exit 0
|
|
104
|
+
fi
|
|
105
|
+
|
|
106
|
+
# ---------------------------------------------------------------------------
|
|
107
|
+
# Resolve the per-project typed-memory store at
|
|
108
|
+
# ~/.claude/projects/<encoded>/memory/. Claude Code encodes the project
|
|
109
|
+
# path by replacing path separators with `-` and prepending a leading `-`.
|
|
110
|
+
# Prefer $CLAUDE_PROJECT_ENCODED when the harness has populated it; fall
|
|
111
|
+
# back to deriving from CWD.
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
ENCODED="${CLAUDE_PROJECT_ENCODED:-}"
|
|
115
|
+
if [[ -z "$ENCODED" ]]; then
|
|
116
|
+
# Encode the absolute cwd: drop leading slash, swap remaining `/` for
|
|
117
|
+
# `-`, prepend the leading `-`.
|
|
118
|
+
ABS_CWD=$(cd "$CWD" 2>/dev/null && pwd -P) || ABS_CWD=""
|
|
119
|
+
if [[ -n "$ABS_CWD" ]]; then
|
|
120
|
+
ENCODED=$(printf '%s' "$ABS_CWD" | sed -E 's#/#-#g')
|
|
121
|
+
fi
|
|
122
|
+
fi
|
|
123
|
+
|
|
124
|
+
MEMORY_DIR="${CLAUDE_HOME}/projects/${ENCODED}/memory"
|
|
125
|
+
if [[ -z "$ENCODED" || ! -d "$MEMORY_DIR" ]]; then
|
|
126
|
+
hook_success
|
|
127
|
+
exit 0
|
|
128
|
+
fi
|
|
129
|
+
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
# Walk every *.md file (excluding MEMORY.md itself, which is the index, not
|
|
132
|
+
# a memory). For each, parse the YAML frontmatter's `type` field. Skip
|
|
133
|
+
# files whose type isn't one of the four valid enum values — emitting
|
|
134
|
+
# anything else would fail schema validation and the event would be
|
|
135
|
+
# silently dropped.
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
_extract_type() {
|
|
139
|
+
local path="$1"
|
|
140
|
+
[[ -f "$path" ]] || return 0
|
|
141
|
+
# Parse frontmatter type via awk + sed (no python dep, no yq dep).
|
|
142
|
+
awk '
|
|
143
|
+
NR == 1 && /^---/ { in_fm = 1; next }
|
|
144
|
+
in_fm && /^---/ { exit }
|
|
145
|
+
in_fm
|
|
146
|
+
' "$path" 2>/dev/null \
|
|
147
|
+
| sed -nE 's/^type:[[:space:]]*(.*)$/\1/p' \
|
|
148
|
+
| head -1 \
|
|
149
|
+
| tr -d '"' \
|
|
150
|
+
| tr -d "'"
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
position=0
|
|
154
|
+
for file in "$MEMORY_DIR"/*.md; do
|
|
155
|
+
[[ -f "$file" ]] || continue
|
|
156
|
+
fname=$(basename "$file")
|
|
157
|
+
[[ "$fname" == "MEMORY.md" ]] && continue
|
|
158
|
+
|
|
159
|
+
memory_type=$(_extract_type "$file")
|
|
160
|
+
case "$memory_type" in
|
|
161
|
+
user|feedback|project|reference)
|
|
162
|
+
;;
|
|
163
|
+
*)
|
|
164
|
+
# Untyped or unknown-typed memories don't fit the schema's
|
|
165
|
+
# enum. Skip silently rather than tank schema validation.
|
|
166
|
+
continue
|
|
167
|
+
;;
|
|
168
|
+
esac
|
|
169
|
+
|
|
170
|
+
payload=$(jq -cn \
|
|
171
|
+
--arg project_key "$PROJECT_KEY" \
|
|
172
|
+
--arg memory_file "$fname" \
|
|
173
|
+
--arg memory_type "$memory_type" \
|
|
174
|
+
--argjson recall_position "$position" \
|
|
175
|
+
'{
|
|
176
|
+
project_key: $project_key,
|
|
177
|
+
memory_file: $memory_file,
|
|
178
|
+
memory_type: $memory_type,
|
|
179
|
+
recall_position: $recall_position
|
|
180
|
+
}')
|
|
181
|
+
|
|
182
|
+
# Use the canonical ecosystem plugin name (matches the
|
|
183
|
+
# `${ONLOOKER_PLUGIN_NAME:-onlooker}` default that scripts/lib/
|
|
184
|
+
# onlooker-emit.sh and onlooker-event.mjs both fall back to). Other
|
|
185
|
+
# substrate-level emissions land under "onlooker" too, so this stays
|
|
186
|
+
# consistent with the existing event stream.
|
|
187
|
+
local_plugin="${ONLOOKER_PLUGIN_NAME:-onlooker}"
|
|
188
|
+
|
|
189
|
+
params=$(jq -cn \
|
|
190
|
+
--arg plugin "$local_plugin" \
|
|
191
|
+
--arg sid "$SESSION_ID" \
|
|
192
|
+
--arg type "memory.recalled" \
|
|
193
|
+
--argjson payload "$payload" \
|
|
194
|
+
'{ plugin: $plugin, session_id: $sid, event_type: $type, payload: $payload }')
|
|
195
|
+
|
|
196
|
+
event_json=$(printf '%s' "$params" \
|
|
197
|
+
| ONLOOKER_DIR="$ONLOOKER_DIR" ONLOOKER_PLUGIN_NAME="$local_plugin" \
|
|
198
|
+
node "$_ONLOOKER_EVENT_JS" emit 2>/dev/null) || event_json=""
|
|
199
|
+
[[ -z "$event_json" ]] && continue
|
|
200
|
+
|
|
201
|
+
onlooker_append_event "$event_json" || true
|
|
202
|
+
position=$((position + 1))
|
|
203
|
+
done
|
|
204
|
+
|
|
205
|
+
hook_success
|
|
206
|
+
exit 0
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
#
|
|
3
|
+
# Exercises the historian UserPromptSubmit retrieval pipeline end-to-end
|
|
4
|
+
# against a synthetic ollama daemon (a fake `curl` binary on PATH that
|
|
5
|
+
# returns predictable embeddings keyed on sentinel substrings in the
|
|
6
|
+
# prompt). Indexing happens via the real SessionEnd hook against the
|
|
7
|
+
# same stub, so the test exercises both halves of the embedder
|
|
8
|
+
# integration.
|
|
9
|
+
|
|
10
|
+
setup() {
|
|
11
|
+
source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
|
|
12
|
+
setup_test_env
|
|
13
|
+
|
|
14
|
+
PLUGIN_ROOT="${REPO_ROOT}/plugins/historian"
|
|
15
|
+
export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
|
|
16
|
+
export ONLOOKER_ECOSYSTEM_ROOT="$REPO_ROOT"
|
|
17
|
+
|
|
18
|
+
PROJECT_REPO="${BATS_TEST_TMPDIR}/repo"
|
|
19
|
+
mkdir -p "$PROJECT_REPO"
|
|
20
|
+
git -C "$PROJECT_REPO" init -q
|
|
21
|
+
git -C "$PROJECT_REPO" config user.email t@example.com
|
|
22
|
+
git -C "$PROJECT_REPO" config user.name "Test"
|
|
23
|
+
git -C "$PROJECT_REPO" remote add origin git@github.com:org/historian-retrieval-test.git
|
|
24
|
+
|
|
25
|
+
# shellcheck disable=SC1091
|
|
26
|
+
source "${PLUGIN_ROOT}/scripts/lib/historian-project-key.sh"
|
|
27
|
+
PROJECT_KEY=$(historian_project_key "$PROJECT_REPO")
|
|
28
|
+
[ -n "$PROJECT_KEY" ]
|
|
29
|
+
|
|
30
|
+
HIST_DIR="${ONLOOKER_DIR}/historian/${PROJECT_KEY}"
|
|
31
|
+
ONLOOKER_EVENTS_LOG="${ONLOOKER_DIR}/logs/onlooker-events.jsonl"
|
|
32
|
+
|
|
33
|
+
STUB_BIN="${BATS_TEST_TMPDIR}/bin"
|
|
34
|
+
mkdir -p "$STUB_BIN"
|
|
35
|
+
cat > "${STUB_BIN}/curl" <<'STUB'
|
|
36
|
+
#!/usr/bin/env bash
|
|
37
|
+
# Mini curl stub for historian bats tests.
|
|
38
|
+
# Parses just enough of the curl arg shape to find the URL and the -d
|
|
39
|
+
# payload. Returns deterministic embeddings keyed on sentinel substrings
|
|
40
|
+
# in the prompt.
|
|
41
|
+
url=""
|
|
42
|
+
payload=""
|
|
43
|
+
prev=""
|
|
44
|
+
for arg in "$@"; do
|
|
45
|
+
case "$prev" in
|
|
46
|
+
-d|--data|--data-raw)
|
|
47
|
+
payload="$arg"; prev=""; continue ;;
|
|
48
|
+
--max-time|-o|-H|--header)
|
|
49
|
+
prev=""; continue ;;
|
|
50
|
+
esac
|
|
51
|
+
case "$arg" in
|
|
52
|
+
-d|--data|--data-raw|--max-time|-o|-H|--header)
|
|
53
|
+
prev="$arg" ;;
|
|
54
|
+
-*)
|
|
55
|
+
;;
|
|
56
|
+
*)
|
|
57
|
+
[[ -z "$url" ]] && url="$arg" ;;
|
|
58
|
+
esac
|
|
59
|
+
done
|
|
60
|
+
|
|
61
|
+
# An env var toggles the probe success so the same stub serves the
|
|
62
|
+
# "embedder unavailable" test case.
|
|
63
|
+
if [[ "${HISTORIAN_STUB_OLLAMA_AVAILABLE:-1}" == "0" ]]; then
|
|
64
|
+
exit 7
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
if [[ "$url" == */api/tags ]]; then
|
|
68
|
+
exit 0
|
|
69
|
+
fi
|
|
70
|
+
|
|
71
|
+
if [[ "$url" == */api/embeddings ]]; then
|
|
72
|
+
prompt=$(printf '%s' "$payload" | jq -r '.prompt // ""' 2>/dev/null)
|
|
73
|
+
case "$prompt" in
|
|
74
|
+
*redash*) printf '{"embedding":[1,0,0]}' ;;
|
|
75
|
+
*kafka*) printf '{"embedding":[0,1,0]}' ;;
|
|
76
|
+
*postgres*) printf '{"embedding":[0,0,1]}' ;;
|
|
77
|
+
*) printf '{"embedding":[0.5,0.5,0.5]}' ;;
|
|
78
|
+
esac
|
|
79
|
+
exit 0
|
|
80
|
+
fi
|
|
81
|
+
|
|
82
|
+
exit 1
|
|
83
|
+
STUB
|
|
84
|
+
chmod +x "${STUB_BIN}/curl"
|
|
85
|
+
export PATH="${STUB_BIN}:${PATH}"
|
|
86
|
+
|
|
87
|
+
TRANSCRIPT="${BATS_TEST_TMPDIR}/transcript.jsonl"
|
|
88
|
+
SESSION_ID="sess-retrieval"
|
|
89
|
+
|
|
90
|
+
mkdir -p "${PROJECT_REPO}/.claude"
|
|
91
|
+
printf '%s\n' \
|
|
92
|
+
'{"historian":{"enabled":true,"indexing":{"min_transcript_chars_to_index":50,"chunk_target_chars":400,"chunk_overlap_chars":50},"retrieval":{"cooldown_seconds":60,"max_retrievals_per_session":5,"min_prompt_chars":40,"min_similarity":0.55,"max_age_days":365}}}' \
|
|
93
|
+
> "${PROJECT_REPO}/.claude/settings.json"
|
|
94
|
+
|
|
95
|
+
INDEX_HOOK="${PLUGIN_ROOT}/scripts/hooks/historian-session-end.sh"
|
|
96
|
+
RETRIEVE_HOOK="${PLUGIN_ROOT}/scripts/hooks/historian-prompt-submit.sh"
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
_index_input() {
|
|
100
|
+
local sid="${1:-$SESSION_ID}"
|
|
101
|
+
jq -cn --arg cwd "$PROJECT_REPO" --arg sid "$sid" --arg transcript "$TRANSCRIPT" \
|
|
102
|
+
'{cwd:$cwd, session_id:$sid, transcript_path:$transcript, hook_event_name:"SessionEnd"}'
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
_retrieve_input() {
|
|
106
|
+
local prompt="$1" sid="${2:-current}"
|
|
107
|
+
jq -cn --arg cwd "$PROJECT_REPO" --arg sid "$sid" --arg prompt "$prompt" \
|
|
108
|
+
'{cwd:$cwd, session_id:$sid, prompt:$prompt, hook_event_name:"UserPromptSubmit"}'
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
_append_text_turn() {
|
|
112
|
+
local role="$1" text="$2"
|
|
113
|
+
jq -cn --arg role "$role" --arg text "$text" \
|
|
114
|
+
'{role:$role, content:$text}' >> "$TRANSCRIPT"
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
_index_session() {
|
|
118
|
+
local sid="$1"
|
|
119
|
+
shift
|
|
120
|
+
: > "$TRANSCRIPT"
|
|
121
|
+
while [ $# -gt 0 ]; do
|
|
122
|
+
_append_text_turn "user" "$1"; shift
|
|
123
|
+
[ $# -gt 0 ] && { _append_text_turn "assistant" "$1"; shift; }
|
|
124
|
+
done
|
|
125
|
+
bash -c "printf '%s' '$(_index_input "$sid")' | '$INDEX_HOOK'" >/dev/null
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
@test "retrieval no-op when historian is disabled" {
|
|
129
|
+
rm -f "${PROJECT_REPO}/.claude/settings.json"
|
|
130
|
+
run bash -c "printf '%s' '$(_retrieve_input "a prompt long enough to clear the floor and trigger retrieval but historian is off")' | '$RETRIEVE_HOOK'"
|
|
131
|
+
[ "$status" -eq 0 ]
|
|
132
|
+
echo "$output" | jq -e '.hookSpecificOutput.additionalContext == ""' >/dev/null
|
|
133
|
+
[ ! -f "$ONLOOKER_EVENTS_LOG" ] || ! grep -q '"historian.retrieval' "$ONLOOKER_EVENTS_LOG"
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
@test "retrieval skipped when prompt is shorter than min_prompt_chars" {
|
|
137
|
+
run bash -c "printf '%s' '$(_retrieve_input "tiny")' | '$RETRIEVE_HOOK'"
|
|
138
|
+
[ "$status" -eq 0 ]
|
|
139
|
+
echo "$output" | jq -e '.hookSpecificOutput.additionalContext == ""' >/dev/null
|
|
140
|
+
grep '"event_type":"historian.retrieval.complete"' "$ONLOOKER_EVENTS_LOG" \
|
|
141
|
+
| jq -e '.payload.outcome == "skipped" and .payload.skip_reason == "short_prompt"' >/dev/null
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
@test "indexing embeds chunks when ollama is up" {
|
|
145
|
+
_index_session "$SESSION_ID" \
|
|
146
|
+
"We are debugging a redash dashboard problem with timezone offsets and saved query parameters this morning." \
|
|
147
|
+
"Sure — the latest version always passes UTC because of a chart migration we did last week."
|
|
148
|
+
|
|
149
|
+
local jsonl="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
150
|
+
[ -f "$jsonl" ]
|
|
151
|
+
jq -e '.embedding | type == "array" and length == 3' "$jsonl" >/dev/null
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
@test "retrieval surfaces a matching past chunk" {
|
|
155
|
+
# Index a past session containing a "redash" topic.
|
|
156
|
+
_index_session "past-1" \
|
|
157
|
+
"We are debugging a redash dashboard problem with timezone offsets and saved query parameters this morning." \
|
|
158
|
+
"Sure — the latest version always passes UTC because of a chart migration we did last week."
|
|
159
|
+
|
|
160
|
+
# New session, same project, query about redash → should match.
|
|
161
|
+
run bash -c "printf '%s' '$(_retrieve_input "Hitting another redash dashboard timezone issue on the same saved query parameters again today")' | '$RETRIEVE_HOOK'"
|
|
162
|
+
[ "$status" -eq 0 ]
|
|
163
|
+
|
|
164
|
+
local ctx
|
|
165
|
+
ctx=$(echo "$output" | jq -r '.hookSpecificOutput.additionalContext')
|
|
166
|
+
[[ "$ctx" == *"Historian: a past chunk looks similar"* ]]
|
|
167
|
+
[[ "$ctx" == *"redash"* ]]
|
|
168
|
+
|
|
169
|
+
grep '"event_type":"historian.retrieval.surfaced"' "$ONLOOKER_EVENTS_LOG" \
|
|
170
|
+
| jq -e '.payload.similarity >= 0.55 and .payload.source_session_id == "past-1"' >/dev/null
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
@test "retrieval returns empty when no chunk clears the similarity floor" {
|
|
174
|
+
# Past session about kafka — query about postgres falls below the
|
|
175
|
+
# 0.55 floor (the embedding vectors are orthogonal in the stub).
|
|
176
|
+
_index_session "past-2" \
|
|
177
|
+
"Investigating kafka consumer lag on the ingest pipeline today after the rebalance event yesterday." \
|
|
178
|
+
"Looks like the rebalance left a stale offset; manual reset cleared it."
|
|
179
|
+
|
|
180
|
+
run bash -c "printf '%s' '$(_retrieve_input "Working on a postgres migration plan today for our settings tables to add new columns safely")' | '$RETRIEVE_HOOK'"
|
|
181
|
+
[ "$status" -eq 0 ]
|
|
182
|
+
echo "$output" | jq -e '.hookSpecificOutput.additionalContext == ""' >/dev/null
|
|
183
|
+
|
|
184
|
+
grep '"event_type":"historian.retrieval.complete"' "$ONLOOKER_EVENTS_LOG" \
|
|
185
|
+
| jq -e '.payload.outcome == "empty"' >/dev/null
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
@test "retrieval skipped on cooldown" {
|
|
189
|
+
_index_session "past-3" \
|
|
190
|
+
"Yet another redash dashboard query that we had to fix the timezone on this morning to make the report run again." \
|
|
191
|
+
"ok"
|
|
192
|
+
|
|
193
|
+
# First retrieval surfaces something.
|
|
194
|
+
run bash -c "printf '%s' '$(_retrieve_input "redash dashboard timezone problem again on the saved query parameters this morning afternoon")' | '$RETRIEVE_HOOK'"
|
|
195
|
+
[ "$status" -eq 0 ]
|
|
196
|
+
grep -q '"event_type":"historian.retrieval.surfaced"' "$ONLOOKER_EVENTS_LOG"
|
|
197
|
+
|
|
198
|
+
rm -f "$ONLOOKER_EVENTS_LOG"
|
|
199
|
+
|
|
200
|
+
# Immediate second retrieval (same session) hits the cooldown gate
|
|
201
|
+
# (60s) and gets skipped without calling the embedder.
|
|
202
|
+
run bash -c "printf '%s' '$(_retrieve_input "redash dashboard timezone problem follow-up just a moment after the previous prompt cleared")' | '$RETRIEVE_HOOK'"
|
|
203
|
+
[ "$status" -eq 0 ]
|
|
204
|
+
grep '"event_type":"historian.retrieval.complete"' "$ONLOOKER_EVENTS_LOG" \
|
|
205
|
+
| jq -e '.payload.outcome == "skipped" and .payload.skip_reason == "cooldown"' >/dev/null
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
@test "retrieval skipped when the embedder is unreachable" {
|
|
209
|
+
_index_session "past-4" \
|
|
210
|
+
"Yet another redash dashboard query that we had to fix the timezone on this morning to make the report run again." \
|
|
211
|
+
"ok"
|
|
212
|
+
|
|
213
|
+
# Turn off the stub so the probe fails.
|
|
214
|
+
HISTORIAN_STUB_OLLAMA_AVAILABLE=0 \
|
|
215
|
+
run bash -c "printf '%s' '$(_retrieve_input "redash dashboard timezone problem long enough to clear the prompt floor for retrieval")' | '$RETRIEVE_HOOK'"
|
|
216
|
+
[ "$status" -eq 0 ]
|
|
217
|
+
echo "$output" | jq -e '.hookSpecificOutput.additionalContext == ""' >/dev/null
|
|
218
|
+
|
|
219
|
+
grep -q '"event_type":"historian.embedder.unavailable"' "$ONLOOKER_EVENTS_LOG"
|
|
220
|
+
grep '"event_type":"historian.retrieval.complete"' "$ONLOOKER_EVENTS_LOG" \
|
|
221
|
+
| jq -e '.payload.outcome == "skipped" and .payload.skip_reason == "embedder_unavailable"' >/dev/null
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
@test "retrieval excludes chunks from the current session id" {
|
|
225
|
+
# Index the same session id we'll then query from — should be excluded.
|
|
226
|
+
_index_session "current" \
|
|
227
|
+
"Working on a redash dashboard right now in this very session of the test framework that we are running." \
|
|
228
|
+
"ok"
|
|
229
|
+
|
|
230
|
+
run bash -c "printf '%s' '$(_retrieve_input "redash dashboard timezone trouble inside this very session of the test framework")' | '$RETRIEVE_HOOK'"
|
|
231
|
+
[ "$status" -eq 0 ]
|
|
232
|
+
|
|
233
|
+
echo "$output" | jq -e '.hookSpecificOutput.additionalContext == ""' >/dev/null
|
|
234
|
+
grep '"event_type":"historian.retrieval.complete"' "$ONLOOKER_EVENTS_LOG" \
|
|
235
|
+
| jq -e '.payload.outcome == "empty"' >/dev/null
|
|
236
|
+
}
|