@onlooker-community/ecosystem 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/.claude-plugin/marketplace.json +26 -0
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.release-please-manifest.json +4 -2
  4. package/CHANGELOG.md +14 -0
  5. package/docs/memory-architecture.md +102 -0
  6. package/package.json +3 -3
  7. package/plugins/curator/.claude-plugin/plugin.json +14 -0
  8. package/plugins/curator/CHANGELOG.md +10 -0
  9. package/plugins/curator/README.md +55 -0
  10. package/plugins/curator/config.json +41 -0
  11. package/plugins/curator/docs/adr/001-staleness-tiers.md +100 -0
  12. package/plugins/curator/docs/design.md +311 -0
  13. package/plugins/curator/hooks/hooks.json +15 -0
  14. package/plugins/curator/scripts/hooks/curator-session-start.sh +343 -0
  15. package/plugins/curator/scripts/lib/curator-checks.sh +155 -0
  16. package/plugins/curator/scripts/lib/curator-config.sh +67 -0
  17. package/plugins/curator/scripts/lib/curator-emit.sh +61 -0
  18. package/plugins/curator/scripts/lib/curator-memory-reader.sh +225 -0
  19. package/plugins/curator/scripts/lib/curator-project-key.sh +82 -0
  20. package/plugins/curator/scripts/lib/curator-storage.sh +176 -0
  21. package/plugins/curator/scripts/lib/curator-ulid.sh +43 -0
  22. package/plugins/historian/docs/adr/001-local-embeddings-only.md +96 -0
  23. package/plugins/historian/docs/design.md +317 -0
  24. package/plugins/librarian/.claude-plugin/plugin.json +14 -0
  25. package/plugins/librarian/CHANGELOG.md +10 -0
  26. package/plugins/librarian/README.md +51 -0
  27. package/plugins/librarian/config.json +52 -0
  28. package/plugins/librarian/docs/adr/001-propose-dont-auto-write.md +87 -0
  29. package/plugins/librarian/docs/design.md +301 -0
  30. package/plugins/librarian/hooks/hooks.json +26 -0
  31. package/plugins/librarian/scripts/hooks/librarian-session-end.sh +312 -0
  32. package/plugins/librarian/scripts/hooks/librarian-session-start.sh +103 -0
  33. package/plugins/librarian/scripts/lib/librarian-archivist-reader.sh +67 -0
  34. package/plugins/librarian/scripts/lib/librarian-classifier.sh +139 -0
  35. package/plugins/librarian/scripts/lib/librarian-config.sh +74 -0
  36. package/plugins/librarian/scripts/lib/librarian-durability.sh +77 -0
  37. package/plugins/librarian/scripts/lib/librarian-emit.sh +72 -0
  38. package/plugins/librarian/scripts/lib/librarian-project-key.sh +83 -0
  39. package/plugins/librarian/scripts/lib/librarian-storage.sh +222 -0
  40. package/plugins/librarian/scripts/lib/librarian-ulid.sh +50 -0
  41. package/release-please-config.json +32 -0
  42. package/test/bats/curator-session-start.bats +316 -0
  43. package/test/bats/librarian-session-end.bats +182 -0
  44. package/test/bats/librarian-session-start.bats +136 -0
@@ -0,0 +1,176 @@
1
+ #!/usr/bin/env bash
2
+ # Storage layout helpers for Curator.
3
+ #
4
+ # Layout (under $ONLOOKER_DIR/curator/<project-key>/):
5
+ # manifest.json project metadata (remote_url, repo_root, last_seen_at)
6
+ # last_cheap_scan.json watermark: when cheap-tier last ran
7
+ # last_llm_sweep.json watermark: when LLM sweep last ran
8
+ # findings/<ulid>.json one finding per file (open, acknowledged, or resolved)
9
+
10
+ # ============================================================================
11
+ # Path helpers
12
+ # ============================================================================
13
+
14
+ curator_storage_root() {
15
+ local base="${ONLOOKER_DIR:-$HOME/.onlooker}"
16
+ printf '%s/curator' "$base"
17
+ }
18
+
19
+ curator_project_dir() {
20
+ local key="$1"
21
+ printf '%s/%s' "$(curator_storage_root)" "$key"
22
+ }
23
+
24
+ curator_findings_dir() {
25
+ local key="$1"
26
+ printf '%s/findings' "$(curator_project_dir "$key")"
27
+ }
28
+
29
+ curator_storage_init() {
30
+ local key="$1"
31
+ [[ -z "$key" ]] && return 1
32
+ local project_dir
33
+ project_dir=$(curator_project_dir "$key")
34
+ mkdir -p "$project_dir/findings" 2>/dev/null
35
+ }
36
+
37
+ curator_storage_write_manifest() {
38
+ local key="$1"
39
+ local remote_url="$2"
40
+ local repo_root="$3"
41
+ [[ -z "$key" ]] && return 1
42
+
43
+ curator_storage_init "$key" || return 1
44
+ local manifest_path now
45
+ manifest_path="$(curator_project_dir "$key")/manifest.json"
46
+ now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
47
+
48
+ jq -n \
49
+ --arg key "$key" \
50
+ --arg remote "$remote_url" \
51
+ --arg root "$repo_root" \
52
+ --arg now "$now" \
53
+ '{
54
+ project_key: $key,
55
+ remote_url: (if $remote == "" then null else $remote end),
56
+ repo_root: (if $root == "" then null else $root end),
57
+ last_seen_at: $now
58
+ }' > "$manifest_path" 2>/dev/null
59
+ }
60
+
61
+ # ============================================================================
62
+ # Watermarks
63
+ # ============================================================================
64
+
65
+ curator_last_cheap_scan_path() {
66
+ printf '%s/last_cheap_scan.json' "$(curator_project_dir "$1")"
67
+ }
68
+
69
+ curator_last_llm_sweep_path() {
70
+ printf '%s/last_llm_sweep.json' "$(curator_project_dir "$1")"
71
+ }
72
+
73
+ curator_storage_read_watermark() {
74
+ local path="$1"
75
+ [[ -f "$path" ]] || return 0
76
+ jq -r '.scanned_at // empty' "$path" 2>/dev/null
77
+ }
78
+
79
+ curator_storage_write_watermark() {
80
+ local path="$1"
81
+ [[ -z "$path" ]] && return 1
82
+ mkdir -p "$(dirname "$path")" 2>/dev/null
83
+ local now
84
+ now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
85
+ jq -n --arg t "$now" '{ scanned_at: $t }' > "$path" 2>/dev/null
86
+ }
87
+
88
+ # ============================================================================
89
+ # Findings
90
+ # ============================================================================
91
+
92
+ # Write a finding to disk, keyed by ULID. Dedup is by deduped_hash so a
93
+ # repeat scan that surfaces the same fact does not write a new finding.
94
+ #
95
+ # Usage: curator_storage_write_finding <key> <ulid> <json>
96
+ curator_storage_write_finding() {
97
+ local key="$1"
98
+ local id="$2"
99
+ local json="$3"
100
+ [[ -z "$key" || -z "$id" || -z "$json" ]] && return 1
101
+
102
+ curator_storage_init "$key" || return 1
103
+ local path
104
+ path="$(curator_findings_dir "$key")/${id}.json"
105
+ printf '%s\n' "$json" > "$path" 2>/dev/null && printf '%s' "$path"
106
+ }
107
+
108
+ # Load all findings for a project key as a JSON array.
109
+ curator_storage_load_findings() {
110
+ local key="$1"
111
+ [[ -z "$key" ]] && { echo '[]'; return 0; }
112
+ local dir
113
+ dir=$(curator_findings_dir "$key")
114
+ [[ -d "$dir" ]] || { echo '[]'; return 0; }
115
+
116
+ local file all='[]'
117
+ for file in "$dir"/*.json; do
118
+ [[ -f "$file" ]] || continue
119
+ local item
120
+ item=$(jq '.' "$file" 2>/dev/null) || continue
121
+ all=$(printf '%s' "$all" | jq --argjson item "$item" '. + [$item]')
122
+ done
123
+ printf '%s' "$all"
124
+ }
125
+
126
+ # Return 0 if a finding with the given dedup hash already exists (open).
127
+ curator_storage_has_finding_with_hash() {
128
+ local key="$1"
129
+ local hash="$2"
130
+ [[ -z "$key" || -z "$hash" ]] && return 1
131
+ local existing
132
+ existing=$(curator_storage_load_findings "$key")
133
+ printf '%s' "$existing" | jq -e --arg h "$hash" '
134
+ any(.[]; (.deduped_hash // "") == $h and (.status // "open") == "open")
135
+ ' >/dev/null 2>&1
136
+ }
137
+
138
+ curator_storage_count_open() {
139
+ local key="$1"
140
+ local all
141
+ all=$(curator_storage_load_findings "$key")
142
+ printf '%s' "$all" | jq '[.[] | select((.status // "open") == "open")] | length' 2>/dev/null
143
+ }
144
+
145
+ # Open-finding counts grouped by kind. Used by the surfacer to render a
146
+ # pointer like "2 path-broken, 1 date-decayed".
147
+ #
148
+ # jq's group_by groups CONSECUTIVE matches, so the array must be sorted
149
+ # by .kind first or the same kind can produce multiple groups (and the
150
+ # downstream summary double-counts).
151
+ curator_storage_open_counts_by_kind() {
152
+ local key="$1"
153
+ local all
154
+ all=$(curator_storage_load_findings "$key")
155
+ printf '%s' "$all" | jq -c '
156
+ [.[] | select((.status // "open") == "open")]
157
+ | sort_by(.kind)
158
+ | group_by(.kind)
159
+ | map({ kind: .[0].kind, count: length })
160
+ | sort_by(-.count)
161
+ '
162
+ }
163
+
164
+ # Hash a finding's identity-relevant fields. Two findings with the same
165
+ # kind + memory_file + matched_phrase (where applicable) share a hash.
166
+ # Plain shasum input — no expensive normalization needed.
167
+ curator_finding_hash() {
168
+ local raw="$1"
169
+ if command -v shasum >/dev/null 2>&1; then
170
+ printf '%s' "$raw" | shasum -a 256 2>/dev/null | cut -c1-16
171
+ elif command -v sha256sum >/dev/null 2>&1; then
172
+ printf '%s' "$raw" | sha256sum 2>/dev/null | cut -c1-16
173
+ else
174
+ return 1
175
+ fi
176
+ }
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env bash
2
+ # Minimal ULID generator for Curator finding IDs.
3
+ #
4
+ # Spec: https://github.com/ulid/spec — 48-bit timestamp + 80-bit randomness,
5
+ # lexicographically sortable, Crockford Base32. Monotonicity within a single
6
+ # millisecond is not required; findings are written infrequently.
7
+
8
+ _CURATOR_ULID_ALPHABET="0123456789ABCDEFGHJKMNPQRSTVWXYZ"
9
+
10
+ _curator_ulid_encode() {
11
+ local n="$1"
12
+ local len="$2"
13
+ local out=""
14
+ local i
15
+ for ((i = 0; i < len; i++)); do
16
+ out="${_CURATOR_ULID_ALPHABET:$((n % 32)):1}${out}"
17
+ n=$((n / 32))
18
+ done
19
+ printf '%s' "$out"
20
+ }
21
+
22
+ curator_ulid() {
23
+ local now_ms
24
+ if [[ "$(uname)" == "Darwin" ]]; then
25
+ now_ms=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
26
+ || now_ms=$(($(date +%s) * 1000))
27
+ else
28
+ now_ms=$(date +%s%3N 2>/dev/null) || now_ms=$(($(date +%s) * 1000))
29
+ fi
30
+
31
+ local rand_hi rand_lo
32
+ rand_hi=$((RANDOM * 32768 + RANDOM))
33
+ rand_lo=$((RANDOM * 32768 + RANDOM))
34
+ rand_hi=$(((rand_hi * 256 + RANDOM % 256) & ((1 << 40) - 1)))
35
+ rand_lo=$(((rand_lo * 256 + RANDOM % 256) & ((1 << 40) - 1)))
36
+
37
+ local ts_part hi_part lo_part
38
+ ts_part=$(_curator_ulid_encode "$now_ms" 10)
39
+ hi_part=$(_curator_ulid_encode "$rand_hi" 8)
40
+ lo_part=$(_curator_ulid_encode "$rand_lo" 8)
41
+
42
+ printf '%s%s%s' "$ts_part" "$hi_part" "$lo_part"
43
+ }
@@ -0,0 +1,96 @@
1
+ # ADR-001: Historian Uses Local Embeddings by Default
2
+
3
+ - Status: Accepted
4
+ - Date: 2026-06-02
5
+ - Deciders: Meagan
6
+ - Tags: historian, embeddings, privacy, data-egress, local-first
7
+
8
+ ## Context and Problem Statement
9
+
10
+ Historian's central operation is embedding session transcripts. Two timeframes consume embeddings: indexing (once per session, in bulk) and retrieval (potentially every user prompt). The model choice — local vs. remote — drives privacy posture, cost shape, latency profile, retrieval quality, and dependency surface, all at once.
11
+
12
+ Remote embedding APIs (Voyage, OpenAI, Cohere, Anthropic's voyage models) are easier to set up, produce higher-quality embeddings, and impose no local compute requirement. They also mean every session's transcript content — including paths, identifiers, code snippets, internal terminology, and anything the user said in conversation — is sent over the wire to a third party, in the moment-by-moment course of normal use. Even a well-intentioned user with a benign API key may not realize how much they are streaming out, because the operation is invisible.
13
+
14
+ Local embedding models (`nomic-embed-text` via ollama, `bge-small` via fastembed) sacrifice some retrieval quality and impose a one-time local setup cost in exchange for keeping all transcript content on the user's machine. Modern small embedding models are good enough for the precedent-retrieval task historian is designed for.
15
+
16
+ This ADR records why local embedding is the architectural baseline, and what the remote opt-in path requires.
17
+
18
+ ## Decision Drivers
19
+
20
+ - **Transcript content is the most sensitive thing historian touches.** It contains code paths the user is actively working on, names of internal systems, mid-thought work product, mistakes, and side discussions. A user adopting historian for productivity gains should not, as a side effect, opt into per-prompt transcript egress.
21
+ - **Per-prompt latency matters.** Retrieval runs on `UserPromptSubmit` — a hot path. A remote round-trip introduces ~100–400ms of latency every time. A local embedding model adds ~20–60ms. Cumulatively across a working day this is material.
22
+ - **Per-prompt cost matters.** Even at $0.00002 per 1K tokens, retrieval that fires on every long-enough prompt is a non-trivial recurring cost. Local embedding has a one-time setup cost and is free thereafter.
23
+ - **Retrieval-quality budget.** Historian's surface ("here is one past chunk that looks similar") is informational, not load-bearing. The model can ignore a marginal match. Top-1 retrieval against `nomic-embed-text` is good enough for the "have we seen this before?" signal. The marginal quality gain from a frontier embedding model does not unlock new capabilities here.
24
+ - **Fail-soft requirement.** Plugins in the Onlooker ecosystem must not block sessions they were not invited to. A remote-by-default embedder means a failed network call is a failed retrieval — silently. A local embedder still works without a network.
25
+ - **Opt-in pattern for sensitive plugins.** Compass's `data_egress` block makes egress an explicit configuration decision. Historian inherits this pattern: the user must affirm both `embedder.backend: "remote"` AND `data_egress.allow_remote_embedding: true` to send transcript content off-machine. Two independent affirmations.
26
+
27
+ ## Considered Options
28
+
29
+ 1. **Remote-by-default, with a local fallback.** Easier first-run UX. The user gets best-in-class retrieval immediately. Local fallback covers offline scenarios but most users never see it.
30
+ 2. **Local-by-default, with a remote opt-in.** Privacy-by-default. Higher first-run friction (the user must install or already have ollama). Remote opt-in is gated by an explicit egress affirmation.
31
+ 3. **No embeddings at all — keyword search only.** Cheapest, simplest, no dependency. Retrieval quality is poor for paraphrased prompts ("the flaky test" vs. "tests timing out intermittently"). Misses the semantic-recall use case that motivated historian.
32
+ 4. **Hybrid: local for embedding, remote for reranking.** Embed everything locally; for the top-N candidates, call a remote reranker to break ties. Reduces remote egress dramatically while keeping high retrieval quality. More complex; defers to a future iteration.
33
+
34
+ ## Decision
35
+
36
+ We adopt **Option 2: local embedding (via ollama with `nomic-embed-text`) is the default backend. Remote embedding is opt-in and requires both `embedder.backend: "remote"` and `data_egress.allow_remote_embedding: true`.**
37
+
38
+ Backends, in preference order:
39
+
40
+ 1. **ollama with `nomic-embed-text`.** Recommended default. 768-dim embeddings, ~110MB model, fully local, good cosine-similarity behavior on prose and code-mixed content. Historian's `/historian setup` skill walks the user through `ollama pull nomic-embed-text`.
41
+ 2. **fastembed via Python sidecar.** Fallback for users without ollama. ONNX-based; slower startup but no separate runtime daemon.
42
+ 3. **Remote.** Opt-in only. Requires two-key affirmation. When enabled, historian still stores all embeddings and chunk bodies locally — only the embedding *computation* leaves the machine.
43
+
44
+ The two-key affirmation pattern (`embedder.backend: "remote"` AND `data_egress.allow_remote_embedding: true`) means a user who copies a config snippet that flips one key still gets a hard fail at startup rather than silent egress. The mismatch logs `historian.config.warning` with the specific message "remote embedding configured but egress not allowed."
45
+
46
+ Option 1 is rejected because the user-experience benefit (no install step) is small relative to the privacy cost (silent transcript egress on every prompt). A user who genuinely wants remote-grade quality can flip the two-key opt-in.
47
+
48
+ Option 3 is rejected because keyword retrieval misses the use cases historian is designed for. The motivating examples (flaky-test deja-vu, "I tried this approach already and it didn't work") all involve paraphrase across sessions.
49
+
50
+ Option 4 (hybrid local + remote reranker) is deferred. It is appealing — most egress avoided, near-frontier retrieval quality — but it triples the configuration surface and requires both backends to be working. The simpler local-only default is the right starting point; hybrid is a natural future opt-in.
51
+
52
+ ## Consequences
53
+
54
+ ### Positive
55
+
56
+ - Transcript content stays on the user's machine by default. No silent egress.
57
+ - Per-prompt retrieval latency stays in the 30–100ms range with a warm ollama process. The hot path is not slowed by network.
58
+ - Retrieval cost is zero after model download.
59
+ - The plugin works offline.
60
+ - The two-key egress affirmation forces an explicit decision at configuration time, matching compass's pattern. A user who wants remote retrieval can opt in cleanly; a user who doesn't is never accidentally signed up.
61
+ - Embedding-storage is forward-compatible with future cross-project sharing (`source: "team:<id>"`) without inheriting "we already sent everything to a third party" as a starting condition.
62
+
63
+ ### Negative
64
+
65
+ - First-run UX requires installing ollama (~150MB runtime) and pulling the model (~110MB). The `/historian setup` skill streamlines this but it is friction relative to "edit a config and go."
66
+ - Retrieval quality is lower than frontier embedding models on edge cases (e.g., highly idiosyncratic terminology, very short prompts, code-heavy chunks). For most prose-shaped recall the gap is small.
67
+ - Local embedding consumes ~200MB of resident memory while the ollama process runs. On constrained machines this is noticeable.
68
+ - A user who wants remote retrieval has to flip two keys, not one. This is intentional but adds documentation surface.
69
+
70
+ ### Neutral
71
+
72
+ - Ollama as the default runtime ties historian to a specific local-LLM ecosystem. Fastembed as a fallback hedges this dependency. A pure-Rust backend (e.g., direct ONNX) is plausible if ollama proves to be the wrong bet.
73
+ - The decision to store chunk bodies locally alongside embeddings is independent of this ADR but mutually reinforcing — co-located chunk bodies mean retrieval results can render without any network round-trip.
74
+
75
+ ## Implementation Notes
76
+
77
+ - `embedder.backend: "ollama"` is checked first. If ollama is not on PATH or the configured host is unreachable, historian falls through to fastembed automatically. If fastembed is also unavailable, historian emits `historian.embedder.unavailable` and disables both indexing and retrieval for the session — no partial behavior.
78
+ - `embedder.backend: "remote"` does NOT auto-fall-through if the remote endpoint is unreachable. A misconfigured or down remote backend produces `historian.embedder.unavailable` and stops; this prevents silent fallback that might violate the user's expectation of where embeddings happen.
79
+ - The two-key affirmation check runs at `SessionStart`. A configuration with `backend: "remote"` and `allow_remote_embedding: false` logs a warning and forces `backend: "ollama"` for the session. The user is told their config has a mismatch.
80
+ - The `nomic-embed-text` cosine-similarity distribution on this corpus puts unrelated chunks around 0.30–0.45 and related chunks around 0.55–0.85. `min_similarity: 0.55` is the default floor. The `/historian calibrate` skill (mentioned as an open question in the design doc) is the per-project tuning surface.
81
+ - Remote backends, if enabled, must respect the same chunk-body redaction pipeline as local backends. Redaction is applied before any network call. Verified by an emitted `historian.chunk.sanitized` event preceding the embed.
82
+ - Re-embedding after a backend change is not automatic. The vector store has no `embedding_model` column today; mixing vectors from two different models silently degrades retrieval. A future migration adds the column and forces a re-index on backend change. Until then, the docs warn against changing `embedder.backend` after data is stored.
83
+
84
+ ## Validation
85
+
86
+ - A test session with a transcript of ~10K characters should index in ≤2 seconds end-to-end on ollama with `nomic-embed-text` running locally on a typical macOS dev laptop.
87
+ - A `UserPromptSubmit` retrieval against a vector store with 500 chunks should complete in under 100ms wall-clock.
88
+ - A misconfigured `backend: "remote"` with `allow_remote_embedding: false` must produce `historian.config.warning` at SessionStart and operate as if `backend: "ollama"` for the session. No network call may be made.
89
+ - A purge via `/historian purge all` must remove all chunks for the project key and leave no embeddings behind. Verified by `historian.purge.completed` followed by an empty `historian.stats` report.
90
+
91
+ ## References
92
+
93
+ - Compass design — precedent for explicit `data_egress` configuration blocks (`plugins/compass/docs/design.md#data-egress`)
94
+ - Compass `data_egress` discussion in the design doc — the "near-zero egress" mode template historian inherits
95
+ - Memory architecture overview (`docs/memory-architecture.md`)
96
+ - Historian design (`../design.md`)
@@ -0,0 +1,317 @@
1
+ # Historian — Plugin Design
2
+
3
+ **Plugin name:** `historian`
4
+ **Tagline:** *Recalls past sessions when they matter.*
5
+ **Status:** Design (pre-implementation)
6
+
7
+ Historian is the episodic memory layer. At `SessionEnd`, it chunks and embeds the session transcript and stores the vectors locally. At `UserPromptSubmit`, it computes the prompt's embedding, retrieves the most similar past chunks (above a similarity threshold), and surfaces them as `additionalContext` — "you worked on something like this in session X." The goal is precedent recall, not distillation: where librarian preserves the *conclusion* of a past session as a typed memory, historian preserves the *verbatim shape* of the conversation so future sessions can see what was actually tried, said, and rejected.
8
+
9
+ It sits in the [memory architecture](../../../docs/memory-architecture.md) parallel to librarian and curator. It operates on its own substrate (a local vector store) and does not write to the typed memory store. See [ADR-001](adr/001-local-embeddings-only.md) for the embeddings-locality decision — the design assumes a local embedding model and a local vector store.
10
+
11
+ ---
12
+
13
+ ## Failure Modes Historian Addresses
14
+
15
+ **A — "We've solved this exact bug before."** A user hits a flaky test and asks the model to investigate. The model debugs from scratch. Three months ago, the same flake was investigated in this repo, the root cause was identified, and the fix landed. Historian surfaces the past session's relevant chunks so the model can short-circuit to known-good context.
16
+
17
+ **B — "I tried X already and it didn't work."** A user begins exploring an approach; the model elaborates the approach in detail. Two weeks ago the same approach was tried and abandoned. Without historian, this is invisible — the typed memory store may have a `feedback` entry ("X doesn't work here") but the *why* is in the transcript. Historian retrieves the dead-end discussion, not just the conclusion.
18
+
19
+ **C — "What was the rationale we settled on?"** A code shape exists because of a past discussion. The commit message has "use the cached path" but no rationale. The typed memory store has "use cached path for hot writes" but no nuance. The original session has 40 turns of weighing tradeoffs. Historian retrieves the rationale-bearing chunks.
20
+
21
+ **D — "We were in a similar situation in the other repo."** Cross-repo recall — out of scope by default. Historian is per-project. Cross-project retrieval is an opt-in mode noted in [Open Questions](#open-questions).
22
+
23
+ ---
24
+
25
+ ## Architecture
26
+
27
+ ```
28
+ SessionEnd hook fires
29
+
30
+
31
+ ┌──────────────────────┐
32
+ │ Transcript Reader │ reads full session transcript JSONL
33
+ └─────────┬────────────┘
34
+
35
+
36
+ ┌──────────────────────┐
37
+ │ Chunker │ rolling-window chunks; preserves turn boundaries
38
+ │ │ default: 600-token chunks with 100-token overlap
39
+ └─────────┬────────────┘
40
+
41
+
42
+ ┌──────────────────────┐
43
+ │ Sanitizer │ redacts patterns: API keys, tokens, .env content
44
+ │ │ drops chunks marked [historian:skip] by the user
45
+ └─────────┬────────────┘
46
+
47
+
48
+ ┌──────────────────────┐
49
+ │ Local Embedder │ ollama (default model: nomic-embed-text)
50
+ │ │ fallback: fastembed via Python sidecar
51
+ └─────────┬────────────┘
52
+
53
+
54
+ ┌──────────────────────┐
55
+ │ Vector Store │ sqlite-vec at
56
+ │ │ ~/.onlooker/historian/<project-key>/vectors.db
57
+ └──────────────────────┘
58
+
59
+ UserPromptSubmit hook fires
60
+
61
+
62
+ ┌──────────────────────┐
63
+ │ Rate Gate │ per-turn budget; cooldown after recent retrieval
64
+ └─────────┬────────────┘
65
+
66
+
67
+ ┌──────────────────────┐
68
+ │ Query Embedder │ embed the current user prompt
69
+ └─────────┬────────────┘
70
+
71
+
72
+ ┌──────────────────────┐
73
+ │ ANN Lookup │ top-K candidates by cosine; filter by min_similarity
74
+ │ │ filter by age (configurable max_age_days)
75
+ └─────────┬────────────┘
76
+ │ ≥1 result
77
+
78
+ ┌──────────────────────┐
79
+ │ Surfacer │ emits additionalContext block with the top match
80
+ │ │ ("Similar past session 47d ago — excerpt + link")
81
+ └──────────────────────┘
82
+ ```
83
+
84
+ ### Transcript Reader
85
+
86
+ At `SessionEnd`, reads the full transcript from `transcript_path` in the hook payload (same field compass and tribunal use). Parses as JSONL. Filters to user and assistant messages only — tool calls and tool results are dropped at this stage to keep the embedded content semantically focused. The resulting message list is the input to the chunker.
87
+
88
+ If the transcript is shorter than `min_transcript_chars_to_index` (default: 1200), historian skips indexing — the session is too short to plausibly produce a useful precedent.
89
+
90
+ ### Chunker
91
+
92
+ Chunks the message list into overlapping windows:
93
+
94
+ - **Chunk size:** `chunk_target_tokens` (default: 600). Measured via the local tokenizer used by the embedding model.
95
+ - **Overlap:** `chunk_overlap_tokens` (default: 100). Ensures cross-chunk concepts aren't sliced apart.
96
+ - **Turn-boundary respect:** Chunks never split mid-turn. The chunker accumulates turns until adding the next would exceed the target; then it emits a chunk and starts a new one. If a single turn exceeds the target, it is emitted as-is and the next chunk begins after it (no mid-turn split).
97
+ - **Metadata per chunk:** `session_id`, `chunk_index`, `start_turn_id`, `end_turn_id`, `created_at`, `chunk_token_count`.
98
+
99
+ ### Sanitizer
100
+
101
+ Before embedding, each chunk is scanned for:
102
+
103
+ 1. **Secret patterns.** AWS-style keys (`AKIA...`), bearer tokens (`Bearer ey...`), Anthropic API keys, GitHub PATs (`ghp_...`), `.env`-style assignments (`SECRET_KEY=...`). Matches are replaced with `[REDACTED:secret]`.
104
+ 2. **`[historian:skip]` markers.** A chunk containing the literal string `[historian:skip]` is dropped entirely. This is the in-band escape for users to mark sensitive turns.
105
+ 3. **Path-aware redaction.** When a chunk references a path in `historian.never_index_paths`, the chunk is dropped. This is the path-level escape for "this directory's discussions should never be indexed."
106
+
107
+ Redacted chunks are still embedded (the surrounding content has value); dropped chunks are not. Both decisions are logged as `historian.chunk.sanitized` and `historian.chunk.dropped` events with reasons.
108
+
109
+ ### Local Embedder
110
+
111
+ Embedding runs locally to keep transcript content off the wire and avoid per-prompt API cost. See [ADR-001](adr/001-local-embeddings-only.md) for the full reasoning.
112
+
113
+ Backends, in preference order:
114
+
115
+ 1. **ollama with `nomic-embed-text`.** Ollama is the recommended runtime. The `nomic-embed-text` model (~110MB) produces 768-dim embeddings, works offline, and is cheap to install. Historian's setup skill walks users through `ollama pull nomic-embed-text` if needed.
116
+ 2. **fastembed via Python sidecar.** A Python subprocess hosting fastembed (ONNX-based). Slower startup, no external runtime dependency. Used when ollama is not on PATH.
117
+ 3. **Disabled.** If neither backend is available, historian emits `historian.embedder.unavailable` once per session and skips indexing. Retrieval also degrades to "no results."
118
+
119
+ Historian does not call a remote embedding API by default. Users who want a remote embedding model (e.g., for cross-repo retrieval or higher quality) can opt in via `embedder.backend: "remote"`, which gates on `data_egress.allow_remote_embedding: true` to make the egress decision explicit.
120
+
121
+ ### Vector Store
122
+
123
+ `sqlite-vec` at `~/.onlooker/historian/<project-key>/vectors.db`. Schema:
124
+
125
+ ```sql
126
+ CREATE TABLE chunks (
127
+ chunk_id TEXT PRIMARY KEY, -- ULID
128
+ session_id TEXT NOT NULL,
129
+ chunk_index INTEGER NOT NULL,
130
+ start_turn_id TEXT,
131
+ end_turn_id TEXT,
132
+ body_redacted TEXT NOT NULL, -- post-sanitizer text
133
+ created_at TEXT NOT NULL,
134
+ source TEXT NOT NULL -- "local" today; future "team:<id>" etc.
135
+ );
136
+ CREATE VIRTUAL TABLE chunks_vec USING vec0(
137
+ embedding FLOAT[768]
138
+ );
139
+ CREATE INDEX idx_chunks_session ON chunks(session_id);
140
+ CREATE INDEX idx_chunks_created ON chunks(created_at);
141
+ ```
142
+
143
+ The chunk body (`body_redacted`) is stored alongside the embedding so retrieval can return the actual text without re-reading the transcript. Storage cost is bounded by retention: chunks older than `retention_days` (default: 365) are pruned on a daily-cap basis.
144
+
145
+ ### Rate Gate
146
+
147
+ At `UserPromptSubmit`:
148
+
149
+ 1. Skip if `disabled` in settings.
150
+ 2. Skip if a retrieval has run in the last `cooldown_seconds` (default: 60) for this session. Avoids spamming retrieval on rapid-fire prompts.
151
+ 3. Skip if more than `max_retrievals_per_session` (default: 5) have run this session. A precedent recall is most useful in the first handful of turns; later turns are usually deep in the current work and don't benefit.
152
+ 4. Skip if the prompt is shorter than `min_prompt_chars` (default: 60). Short prompts ("ok", "next", "do it") have no semantic signal.
153
+
154
+ Each skip emits `historian.retrieval.skipped` with the reason.
155
+
156
+ ### Query Embedder, ANN Lookup, and Surfacer
157
+
158
+ For retrievals that pass the rate gate:
159
+
160
+ 1. Embed the prompt using the same backend as indexing.
161
+ 2. `vec0` cosine-similarity lookup, top `retrieval_top_k` (default: 5).
162
+ 3. Filter to candidates with `similarity >= min_similarity` (default: 0.55, calibrated against `nomic-embed-text` cosine distribution).
163
+ 4. Filter to candidates within `max_age_days` (default: 180). Older chunks are deprioritized rather than dropped — they appear with a "long ago" hint in the surfaced context.
164
+ 5. Filter out chunks from the current session (a session retrieving itself is a degenerate case).
165
+
166
+ The surfacer emits `additionalContext` of the form:
167
+
168
+ > Historian: a prompt 47 days ago looked similar. Excerpt (session 01J…):
169
+ >
170
+ > > [chunk text, truncated to 400 chars]
171
+ >
172
+ > Full session: `~/.onlooker/historian/<project-key>/sessions/<session_id>/transcript.json` (preserved on `historian.session.archive: true`; otherwise transcript reference only).
173
+
174
+ Only the top result is surfaced inline. The skill `/historian recall <query>` lets the user inspect more candidates.
175
+
176
+ ---
177
+
178
+ ## Integration Points
179
+
180
+ **Archivist.** Independent. Archivist preserves session-level distillations; historian preserves raw chunks. Same source (transcript) but different storage and different retrieval semantics.
181
+
182
+ **Librarian.** Independent at runtime. A future enhancement: when librarian classifies an artifact as "session-only — don't promote," historian could weight its source chunks lower in retrieval (the user has already signaled they're not durable). Deferred.
183
+
184
+ **Curator.** Independent. Curator audits the typed memory store; historian's substrate is the vector DB.
185
+
186
+ **Ecosystem substrate.** Historian writes to its own sub-path under `~/.onlooker/` and emits events via `onlooker-event.mjs`. No new substrate dependencies.
187
+
188
+ **Compass / Tribunal / Echo / Warden / Governor.** No interaction.
189
+
190
+ ---
191
+
192
+ ## Configuration (`config.json`)
193
+
194
+ ```json
195
+ {
196
+ "plugin_name": "historian",
197
+ "storage_path": "${ONLOOKER_DIR:-$HOME/.onlooker}",
198
+ "historian": {
199
+ "enabled": false,
200
+ "indexing": {
201
+ "trigger": "SessionEnd",
202
+ "min_transcript_chars_to_index": 1200,
203
+ "chunk_target_tokens": 600,
204
+ "chunk_overlap_tokens": 100,
205
+ "retention_days": 365,
206
+ "prune_daily_cap_chunks": 5000
207
+ },
208
+ "embedder": {
209
+ "backend": "ollama",
210
+ "ollama": {
211
+ "model": "nomic-embed-text",
212
+ "host": "http://127.0.0.1:11434",
213
+ "request_timeout_seconds": 8
214
+ },
215
+ "fastembed": {
216
+ "model": "BAAI/bge-small-en-v1.5",
217
+ "sidecar_command": "python3 -m historian_fastembed"
218
+ },
219
+ "remote": {
220
+ "enabled": false,
221
+ "provider": "voyage",
222
+ "model": "voyage-3-lite",
223
+ "note": "Remote embedding sends transcript chunks to a third-party API. Requires data_egress.allow_remote_embedding: true to take effect."
224
+ }
225
+ },
226
+ "sanitization": {
227
+ "redact_secret_patterns": true,
228
+ "drop_skip_marker": true,
229
+ "never_index_paths": []
230
+ },
231
+ "retrieval": {
232
+ "trigger": "UserPromptSubmit",
233
+ "cooldown_seconds": 60,
234
+ "max_retrievals_per_session": 5,
235
+ "min_prompt_chars": 60,
236
+ "retrieval_top_k": 5,
237
+ "min_similarity": 0.55,
238
+ "max_age_days": 180
239
+ },
240
+ "surfacer": {
241
+ "surface_top_n": 1,
242
+ "excerpt_chars_max": 400,
243
+ "include_age_hint": true
244
+ },
245
+ "data_egress": {
246
+ "allow_remote_embedding": false,
247
+ "note": "When false, embedding stays local. When true, transcript chunks are sent to the configured remote provider for embedding only — chunks are not stored remotely."
248
+ },
249
+ "session_archive": {
250
+ "enabled": false,
251
+ "note": "When true, the full transcript at session end is copied to ~/.onlooker/historian/<key>/sessions/<session_id>/transcript.json so retrieval surfaces can link to the source. When false, only chunk bodies are retained."
252
+ }
253
+ }
254
+ }
255
+ ```
256
+
257
+ ---
258
+
259
+ ## Events
260
+
261
+ | Event | Trigger | Key payload fields |
262
+ |---|---|---|
263
+ | `historian.indexing.started` | SessionEnd indexing run begins | `session_id`, `transcript_chars` |
264
+ | `historian.indexing.completed` | Run succeeds | `chunks_indexed`, `chunks_dropped`, `duration_ms` |
265
+ | `historian.indexing.skipped` | Indexing skipped | `reason: too_short\|embedder_unavailable\|disabled` |
266
+ | `historian.chunk.sanitized` | Secret patterns redacted in a chunk | `chunk_id`, `redaction_count` |
267
+ | `historian.chunk.dropped` | Chunk dropped entirely | `reason: skip_marker\|never_index_path` |
268
+ | `historian.embedder.unavailable` | Backend unreachable | `backend`, `error_summary` |
269
+ | `historian.retrieval.started` | UserPromptSubmit retrieval begins | `prompt_chars` |
270
+ | `historian.retrieval.skipped` | Skipped by rate gate | `reason: cooldown\|budget\|short_prompt\|disabled` |
271
+ | `historian.retrieval.empty` | No candidates above similarity floor | `top_similarity`, `min_similarity` |
272
+ | `historian.retrieval.surfaced` | A precedent was surfaced as additionalContext | `chunk_id`, `similarity`, `age_days` |
273
+ | `historian.prune.completed` | Daily retention prune ran | `chunks_pruned`, `chunks_remaining` |
274
+ | `historian.purge.completed` | User-triggered purge ran | `scope: session\|date_range\|all`, `chunks_purged` |
275
+
276
+ ---
277
+
278
+ ## Skills
279
+
280
+ **`/historian setup`** — checks for ollama on PATH, offers to install (or run the equivalent for fastembed), pulls the embedding model, and writes a confirmation to the storage dir.
281
+
282
+ **`/historian recall <query>`** — runs an ad-hoc retrieval against the current project's vector store. Returns the top K matches with similarity scores and full chunk bodies. Useful for "remind me what I tried last time" queries that don't naturally surface during a session.
283
+
284
+ **`/historian purge`** — interactive purge with three scopes: `session <id>` (remove all chunks from one session), `before <date>` (remove all chunks older than a date), `all` (full reset for this project). Always requires explicit user confirmation.
285
+
286
+ **`/historian stats`** — reports vector store size, chunk count, oldest chunk date, embedding model, last index run, last retrieval, retrieval-hit rate over last 30 days.
287
+
288
+ ---
289
+
290
+ ## Open Questions
291
+
292
+ 1. **Cross-project retrieval.** A precedent in repo A may be relevant in repo B (e.g., the user solved a similar Postgres deadlock in two different services). Historian is per-project today. A `team:<id>` source mode could allow shared vector stores, but it introduces multi-user privacy concerns and a new substrate dependency. Deferred.
293
+
294
+ 2. **Retrieval-hit calibration.** `min_similarity = 0.55` is a guess based on `nomic-embed-text`'s typical cosine distribution. A `/historian calibrate` skill could label a small set of past prompt-chunk pairs as relevant/irrelevant and tune the threshold per-project.
295
+
296
+ 3. **Index-time vs. retrieval-time redaction.** Redaction at index time is permanent and safe. Retrieval-time redaction would let users tune redaction rules without re-indexing. The asymmetry: a secret indexed today can't be redacted later without a re-embed pass. The design picks index-time redaction as the default (irreversibility is the safer error) and leaves a re-embed path for the rare case of needing to update rules.
297
+
298
+ 4. **Chunk overlap policy on long single turns.** A 4000-token assistant turn becomes a single chunk (no mid-turn split). The next chunk begins after it, losing the trailing-context overlap. Acceptable today; a "soft split" mode that breaks at paragraph boundaries within long turns is a future option.
299
+
300
+ 5. **Session-archive storage cost.** With `session_archive: true`, the full transcript is preserved per session — typically tens of MB per active project per month. The retention cap applies to chunks, not archived transcripts; a separate `session_archive_retention_days` setting may be needed.
301
+
302
+ 6. **Embedding-model versioning.** Re-running indexing with a different embedding model produces vectors in a different geometric space. The vector store has no concept of model version today. Adding `embedding_model` and `embedding_dim` columns and filtering retrieval by model match is straightforward but not yet decided.
303
+
304
+ 7. **Coordination with `~/.onlooker/logs/onlooker-events.jsonl`.** The JSONL log itself contains a rich, structured record of past sessions. Historian could index event payloads (decisions, dead-ends, findings) alongside or instead of raw transcripts. The case against: the transcript already contains the conversation those events summarize; indexing both is duplication.
305
+
306
+ 8. **Interaction with compaction.** A long session that compacts mid-flow has a truncated transcript at `SessionEnd`. Historian only sees the post-compaction tail. If pre-compaction content was important, it lives only in archivist. Whether historian should also index archivist artifacts (as a complement to transcript chunks) is a deferred design question.
307
+
308
+ ---
309
+
310
+ ## Non-Goals
311
+
312
+ - Does not call remote embedding APIs by default — local embedding is the architectural baseline (see [ADR-001](adr/001-local-embeddings-only.md)).
313
+ - Does not write to the typed memory store — that is librarian's job.
314
+ - Does not distill or summarize past sessions — preservation of verbatim shape is the point.
315
+ - Does not perform cross-project retrieval by default.
316
+ - Does not block any tool call — surfacer is informational only.
317
+ - Does not retain transcripts beyond chunk bodies unless `session_archive: true` is explicitly enabled.
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "librarian",
3
+ "version": "0.1.0",
4
+ "description": "Consolidation layer between archivist's per-session artifacts and the user's durable typed memory store. Detects which session decisions, dead-ends, and open questions deserve to live across sessions, classifies them into the user/feedback/project/reference types, and queues them as proposals for explicit confirmation. Auto-promotion is opt-in. Builds on the Onlooker ecosystem plugin.",
5
+ "author": {
6
+ "name": "Onlooker Community",
7
+ "url": "https://onlooker.dev"
8
+ },
9
+ "homepage": "https://onlooker.dev",
10
+ "repository": "https://github.com/onlooker-community/ecosystem",
11
+ "license": "MIT",
12
+ "skills": [],
13
+ "agents": []
14
+ }