@onlooker-community/ecosystem 0.21.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/.claude-plugin/marketplace.json +13 -0
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.release-please-manifest.json +3 -2
  4. package/CHANGELOG.md +15 -0
  5. package/hooks/hooks.json +4 -0
  6. package/package.json +2 -2
  7. package/plugins/historian/.claude-plugin/plugin.json +14 -0
  8. package/plugins/historian/CHANGELOG.md +17 -0
  9. package/plugins/historian/README.md +84 -0
  10. package/plugins/historian/config.json +46 -0
  11. package/plugins/historian/hooks/hooks.json +26 -0
  12. package/plugins/historian/scripts/hooks/historian-prompt-submit.sh +269 -0
  13. package/plugins/historian/scripts/hooks/historian-session-end.sh +235 -0
  14. package/plugins/historian/scripts/lib/historian-chunker.sh +129 -0
  15. package/plugins/historian/scripts/lib/historian-config.sh +66 -0
  16. package/plugins/historian/scripts/lib/historian-embedder.sh +126 -0
  17. package/plugins/historian/scripts/lib/historian-emit.sh +61 -0
  18. package/plugins/historian/scripts/lib/historian-project-key.sh +80 -0
  19. package/plugins/historian/scripts/lib/historian-retriever.sh +191 -0
  20. package/plugins/historian/scripts/lib/historian-sanitizer.sh +123 -0
  21. package/plugins/historian/scripts/lib/historian-storage.sh +157 -0
  22. package/plugins/historian/scripts/lib/historian-transcript.sh +83 -0
  23. package/plugins/historian/scripts/lib/historian-ulid.sh +43 -0
  24. package/release-please-config.json +16 -0
  25. package/scripts/hooks/memory-recall-tracker.sh +206 -0
  26. package/test/bats/historian-prompt-submit.bats +236 -0
  27. package/test/bats/historian-session-end.bats +296 -0
  28. package/test/bats/memory-recall-tracker.bats +189 -0
@@ -150,6 +150,19 @@
150
150
  "license": "MIT",
151
151
  "keywords": ["memory", "audit", "staleness", "findings", "auto-memory", "decay"],
152
152
  "tags": ["memory", "context-engineering"]
153
+ },
154
+ {
155
+ "name": "historian",
156
+ "source": "./plugins/historian",
157
+ "description": "Episodic memory layer for past Claude Code sessions. At SessionEnd, reads the session transcript, drops tool calls and tool results, chunks the remaining user + assistant turns at turn boundaries with overlap, redacts secret-shaped substrings (AWS keys, GitHub PATs, Anthropic API keys, KEY=value env assignments), and appends one JSONL line per surviving chunk to ~/.onlooker/historian/<project-key>/sessions/<session-id>.jsonl. Future-tense retrieval (vector embeddings + UserPromptSubmit similarity surfacer) lands in a follow-up; this version ships the indexing pipeline only. Requires the ecosystem plugin.",
158
+ "author": {
159
+ "name": "Onlooker Community"
160
+ },
161
+ "homepage": "https://onlooker.dev",
162
+ "repository": "https://github.com/onlooker-community/ecosystem",
163
+ "license": "MIT",
164
+ "keywords": ["memory", "episodic", "transcript", "indexing", "session", "retrieval"],
165
+ "tags": ["memory", "context-engineering"]
153
166
  }
154
167
  ]
155
168
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ecosystem",
3
- "version": "0.21.0",
3
+ "version": "0.23.0",
4
4
  "description": "Observability substrate for Claude Code. Provides the shared ~/.onlooker/ storage root, canonical schema-validated event emission, session and tool tracking hooks, and prompt rules. Required by all other Onlooker plugins.",
5
5
  "author": {
6
6
  "name": "Onlooker Community",
@@ -1,5 +1,5 @@
1
1
  {
2
- ".": "0.21.0",
2
+ ".": "0.23.0",
3
3
  "plugins/archivist": "0.1.0",
4
4
  "plugins/tribunal": "1.0.1",
5
5
  "plugins/echo": "0.2.0",
@@ -10,5 +10,6 @@
10
10
  "plugins/counsel": "0.2.0",
11
11
  "plugins/warden": "0.2.0",
12
12
  "plugins/librarian": "0.1.0",
13
- "plugins/curator": "0.1.0"
13
+ "plugins/curator": "0.1.0",
14
+ "plugins/historian": "0.2.0"
14
15
  }
package/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.23.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.22.0...ecosystem-v0.23.0) (2026-06-04)
4
+
5
+
6
+ ### Features
7
+
8
+ * **ecosystem:** emit memory.recalled at SessionStart :link: ([#62](https://github.com/onlooker-community/ecosystem/issues/62)) ([d5876f9](https://github.com/onlooker-community/ecosystem/commit/d5876f9f819165cc07d691d733662b549863b7f5))
9
+ * **historian:** retrieval pipeline + ollama embedder :telescope: ([#61](https://github.com/onlooker-community/ecosystem/issues/61)) ([7eae752](https://github.com/onlooker-community/ecosystem/commit/7eae752a288c4678ab093042469f2e65d428f0d9))
10
+
11
+ ## [0.22.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.21.0...ecosystem-v0.22.0) (2026-06-04)
12
+
13
+
14
+ ### Features
15
+
16
+ * **historian:** introduce SessionEnd indexing :spiral_notepad: ([#59](https://github.com/onlooker-community/ecosystem/issues/59)) ([dd6c7f6](https://github.com/onlooker-community/ecosystem/commit/dd6c7f6ea872437cab6b16de50838dfc72750c7b))
17
+
3
18
  ## [0.21.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.20.0...ecosystem-v0.21.0) (2026-06-04)
4
19
 
5
20
 
package/hooks/hooks.json CHANGED
@@ -87,6 +87,10 @@
87
87
  {
88
88
  "type": "command",
89
89
  "command": "\"$CLAUDE_PLUGIN_ROOT\"/scripts/hooks/session-start-tracker.sh"
90
+ },
91
+ {
92
+ "type": "command",
93
+ "command": "\"$CLAUDE_PLUGIN_ROOT\"/scripts/hooks/memory-recall-tracker.sh"
90
94
  }
91
95
  ]
92
96
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@onlooker-community/ecosystem",
3
- "version": "0.21.0",
3
+ "version": "0.23.0",
4
4
  "description": "Agents, skills, hooks, commands, rules, and MCP configurations that power [Onlooker](https://onlooker.dev)",
5
5
  "author": {
6
6
  "name": "Onlooker Community",
@@ -26,7 +26,7 @@
26
26
  "test": "npm run test:bats && npm run test:schema",
27
27
  "test:bats": "bats test/bats",
28
28
  "test:schema": "node --test test/node/*.test.mjs",
29
- "test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh",
29
+ "test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh plugins/historian/scripts/hooks/*.sh plugins/historian/scripts/lib/*.sh",
30
30
  "lint:references": "node scripts/lint/check-references.mjs",
31
31
  "lint:manifests": "node scripts/lint/check-manifests.mjs",
32
32
  "coverage:node": "node scripts/coverage/run-coverage.mjs",
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "historian",
3
+ "version": "0.2.0",
4
+ "description": "Episodic memory layer. At SessionEnd, chunks and sanitizes the session transcript and stores chunks under $ONLOOKER_DIR/historian/<project-key>/sessions/ (default $HOME/.onlooker). On UserPromptSubmit, embeds the prompt and performs similarity retrieval over stored chunks to surface relevant past context. Builds on the Onlooker ecosystem plugin.",
5
+ "author": {
6
+ "name": "Onlooker Community",
7
+ "url": "https://onlooker.dev"
8
+ },
9
+ "homepage": "https://onlooker.dev",
10
+ "repository": "https://github.com/onlooker-community/ecosystem",
11
+ "license": "MIT",
12
+ "skills": [],
13
+ "agents": []
14
+ }
@@ -0,0 +1,17 @@
1
+ # Changelog
2
+
3
+ ## [0.2.0](https://github.com/onlooker-community/ecosystem/compare/historian-v0.1.0...historian-v0.2.0) (2026-06-04)
4
+
5
+
6
+ ### Features
7
+
8
+ * **historian:** retrieval pipeline + ollama embedder :telescope: ([#61](https://github.com/onlooker-community/ecosystem/issues/61)) ([7eae752](https://github.com/onlooker-community/ecosystem/commit/7eae752a288c4678ab093042469f2e65d428f0d9))
9
+
10
+ ## [0.1.0](https://github.com/onlooker-community/ecosystem/compare/historian-v0.0.1...historian-v0.1.0) (2026-06-04)
11
+
12
+
13
+ ### Features
14
+
15
+ * **historian:** introduce SessionEnd indexing :spiral_notepad: ([#59](https://github.com/onlooker-community/ecosystem/issues/59)) ([dd6c7f6](https://github.com/onlooker-community/ecosystem/commit/dd6c7f6ea872437cab6b16de50838dfc72750c7b))
16
+
17
+ ## Changelog
@@ -0,0 +1,84 @@
1
+ # Historian
2
+
3
+ Episodic memory layer for past Claude Code sessions.
4
+
5
+ At every `SessionEnd`, Historian reads the session transcript, splits it into overlapping chunks at turn boundaries, redacts secret-shaped substrings, embeds each chunk via a local Ollama daemon, and persists the chunks under `~/.onlooker/historian/<project-key>/sessions/<session-id>.jsonl`. At every `UserPromptSubmit`, Historian embeds the prompt and retrieves the most similar past chunk (within a similarity floor and freshness window), then injects an `additionalContext` block whose first line is a "looks similar" pointer and whose body is a multi-line excerpt of the matched chunk.
6
+
7
+ Historian is a sibling plugin to [`ecosystem`](../../) and assumes the Onlooker observability substrate (`~/.onlooker/`) is present. It is parallel to [`librarian`](../librarian) (which consolidates session decisions into the typed memory store) — both turn session-scoped material into something queryable across sessions, but at different levels of distillation. Librarian distills; historian preserves verbatim.
8
+
9
+ See [`docs/design.md`](docs/design.md) and [ADR-001](docs/adr/001-local-embeddings-only.md) for the full design, including the local-embeddings-by-default decision.
10
+
11
+ ## How it works
12
+
13
+ | Hook | What Historian does |
14
+ |------|---------------------|
15
+ | `SessionEnd` | Reads the transcript at `transcript_path`, drops tool calls and tool results (keeps user + assistant messages), chunks at turn boundaries inside the configured character target with overlap, runs the sanitizer (secret redaction + `[historian:skip]` markers + path-deny list), embeds each surviving chunk via the configured backend, and appends one JSONL line per chunk to the session's file. Emits `historian.indexing.*`, `historian.chunk.*`, and `historian.embedder.unavailable` events along the way. |
16
+ | `UserPromptSubmit` | Rate-gated retrieval: short prompts, cooldown windows, and per-session caps short-circuit before the embedder runs. Otherwise embeds the prompt, streams every JSONL chunk for the project, and injects an `additionalContext` block — a header pointer line plus a multi-line excerpt — for the top cosine-similarity match above the floor. Excludes chunks from the current session id (a session retrieving its own chunks is the degenerate case). Emits `historian.retrieval.started` when the rate gate clears, `historian.retrieval.surfaced` on the surfaced outcome, and `historian.retrieval.complete` with `outcome: surfaced\|empty\|skipped` and a `skip_reason` enum for skipped runs. |
17
+
18
+ ## Activation
19
+
20
+ Historian is **off by default**. Enable per-project in `.claude/settings.json`:
21
+
22
+ ```json
23
+ {
24
+ "historian": {
25
+ "enabled": true
26
+ }
27
+ }
28
+ ```
29
+
30
+ See [`config.json`](config.json) for the full set of tunable defaults.
31
+
32
+ ## Storage layout
33
+
34
+ ```text
35
+ ~/.onlooker/historian/<project-key>/
36
+ ├── manifest.json # project metadata
37
+ ├── retrieval-state/<session-id>.json # rate-gate state: count + last_ms
38
+ └── sessions/<session-id>.jsonl # one chunk per line, append-only
39
+ ```
40
+
41
+ Each chunk line:
42
+
43
+ ```json
44
+ {
45
+ "chunk_id": "01J...",
46
+ "session_id": "...",
47
+ "chunk_index": 0,
48
+ "start_turn_index": 0,
49
+ "end_turn_index": 3,
50
+ "body_redacted": "...",
51
+ "body_chars": 2103,
52
+ "created_at": "2026-06-04T...",
53
+ "source": "local",
54
+ "redaction_count": 0,
55
+ "embedding": [0.123, 0.456, ...]
56
+ }
57
+ ```
58
+
59
+ The `embedding` field is present iff the embedder was available at indexing time. Chunks indexed without an embedder are still readable but invisible to similarity retrieval until they are re-indexed.
60
+
61
+ ## Embedder
62
+
63
+ Default backend is local **Ollama** with the `nomic-embed-text` model. Set up:
64
+
65
+ ```bash
66
+ ollama pull nomic-embed-text
67
+ ollama serve # run as a background service; the historian client expects 127.0.0.1:11434
68
+ ```
69
+
70
+ Override the host or model in `.claude/settings.json` under `historian.embedder.ollama.{host,model,request_timeout_seconds}`. Set `historian.embedder.backend: "none"` to disable embedding entirely — chunks index without vectors and retrieval no-ops.
71
+
72
+ ## Status
73
+
74
+ This plugin ships **scaffolding + the SessionEnd indexing pipeline + the UserPromptSubmit retrieval pipeline + Ollama embedder integration**. Deferred to follow-up landings:
75
+
76
+ - **fastembed sidecar and remote embedder backends** — opt-in via the two-key egress affirmation from [ADR-001](docs/adr/001-local-embeddings-only.md).
77
+ - **Prune (retention sweep) and purge (manual)** skills.
78
+ - **`/historian recall`, `/historian setup`, `/historian stats`, `/historian purge`** slash commands.
79
+
80
+ ## Requirements
81
+
82
+ - The `ecosystem` plugin installed (for `~/.onlooker/` substrate).
83
+ - `jq` for JSON manipulation.
84
+ - `python3` for chunking and sanitization (no extra packages — stdlib only).
@@ -0,0 +1,46 @@
1
+ {
2
+ "plugin_name": "historian",
3
+ "storage_path": "~/.onlooker",
4
+ "historian": {
5
+ "enabled": false,
6
+ "indexing": {
7
+ "trigger": "SessionEnd",
8
+ "min_transcript_chars_to_index": 1200,
9
+ "chunk_target_chars": 2400,
10
+ "chunk_overlap_chars": 400,
11
+ "retention_days": 365
12
+ },
13
+ "sanitization": {
14
+ "redact_secret_patterns": true,
15
+ "drop_skip_marker": true,
16
+ "never_index_paths": []
17
+ },
18
+ "session_archive": {
19
+ "enabled": false,
20
+ "_note": "When true, the full transcript at SessionEnd is copied alongside the chunks so retrieval can link to the source. When false, only chunk bodies are retained."
21
+ },
22
+ "embedder": {
23
+ "backend": "ollama",
24
+ "_note": "Backend selector. `ollama` (default) talks to a local Ollama daemon. `none` skips embedding entirely; retrieval no-ops without vectors. `fastembed` and `remote` are reserved for a future landing.",
25
+ "ollama": {
26
+ "host": "http://127.0.0.1:11434",
27
+ "model": "nomic-embed-text",
28
+ "request_timeout_seconds": 8
29
+ }
30
+ },
31
+ "retrieval": {
32
+ "enabled": true,
33
+ "cooldown_seconds": 60,
34
+ "max_retrievals_per_session": 5,
35
+ "min_prompt_chars": 60,
36
+ "retrieval_top_k": 5,
37
+ "min_similarity": 0.55,
38
+ "max_age_days": 180
39
+ },
40
+ "surfacer": {
41
+ "excerpt_chars_max": 400,
42
+ "include_age_hint": true,
43
+ "_note": "Retrieval always surfaces top-1 (per the design's deliberate top-1 inject). Multi-result surfacing is reserved for a future `/historian recall` skill rather than the UserPromptSubmit inject path."
44
+ }
45
+ }
46
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "hooks": {
3
+ "SessionEnd": [
4
+ {
5
+ "matcher": "*",
6
+ "hooks": [
7
+ {
8
+ "type": "command",
9
+ "command": "\"$CLAUDE_PLUGIN_ROOT\"/scripts/hooks/historian-session-end.sh"
10
+ }
11
+ ]
12
+ }
13
+ ],
14
+ "UserPromptSubmit": [
15
+ {
16
+ "matcher": "*",
17
+ "hooks": [
18
+ {
19
+ "type": "command",
20
+ "command": "\"$CLAUDE_PLUGIN_ROOT\"/scripts/hooks/historian-prompt-submit.sh"
21
+ }
22
+ ]
23
+ }
24
+ ]
25
+ }
26
+ }
@@ -0,0 +1,269 @@
1
+ #!/usr/bin/env bash
2
+ # Historian UserPromptSubmit retrieval pipeline.
3
+ #
4
+ # Flow:
5
+ # 1. Rate gate (cooldown_seconds, max_retrievals_per_session,
6
+ # min_prompt_chars). Each ungated invocation costs one ollama
7
+ # embedding round-trip; the gates keep the cost bounded.
8
+ # 2. Embed the prompt via the configured backend.
9
+ # 3. Stream every chunk record for the project from disk one line at
10
+ # a time, cosine-search against the query vector, filter by
11
+ # min_similarity and max_age_days.
12
+ # 4. Emit one historian.retrieval.surfaced event for the top match
13
+ # and inject an `additionalContext` block whose first line is a
14
+ # "looks similar" pointer and whose body is a multi-line excerpt
15
+ # of the matched chunk.
16
+ #
17
+ # Hook contract:
18
+ # - Always exits 0. Never blocks the prompt.
19
+ # - Emits valid hookSpecificOutput JSON even when nothing to inject.
20
+ # - No-ops when historian.enabled is not true OR retrieval is disabled.
21
+ # - Lifecycle events: historian.retrieval.started fires when the rate
22
+ # gate clears and we are about to embed. All outcomes flow through
23
+ # historian.retrieval.complete with `outcome: surfaced | empty |
24
+ # skipped` and (on skipped) a `skip_reason` enum (short_prompt,
25
+ # cooldown, budget, embedder_unavailable). The surfaced case also
26
+ # emits historian.retrieval.surfaced with the matched chunk's
27
+ # chunk_id, similarity, age_days, and source_session_id.
28
+
29
+ set -uo pipefail
30
+
31
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
32
+ PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
33
+
34
+ _ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
35
+ if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
36
+ _candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
37
+ if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
38
+ _ECOSYSTEM_ROOT="$_candidate"
39
+ fi
40
+ fi
41
+ if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
42
+ # shellcheck disable=SC1091
43
+ CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
44
+ fi
45
+
46
+ # shellcheck source=../lib/historian-config.sh
47
+ source "${PLUGIN_ROOT}/scripts/lib/historian-config.sh"
48
+ # shellcheck source=../lib/historian-project-key.sh
49
+ source "${PLUGIN_ROOT}/scripts/lib/historian-project-key.sh"
50
+ # shellcheck source=../lib/historian-storage.sh
51
+ source "${PLUGIN_ROOT}/scripts/lib/historian-storage.sh"
52
+ # shellcheck source=../lib/historian-emit.sh
53
+ source "${PLUGIN_ROOT}/scripts/lib/historian-emit.sh"
54
+ # shellcheck source=../lib/historian-embedder.sh
55
+ source "${PLUGIN_ROOT}/scripts/lib/historian-embedder.sh"
56
+ # shellcheck source=../lib/historian-retriever.sh
57
+ source "${PLUGIN_ROOT}/scripts/lib/historian-retriever.sh"
58
+
59
+ _emit_context() {
60
+ local context="${1:-}"
61
+ jq -cn --arg ctx "$context" '{
62
+ hookSpecificOutput: {
63
+ hookEventName: "UserPromptSubmit",
64
+ additionalContext: $ctx
65
+ }
66
+ }'
67
+ }
68
+
69
+ _now_ms() {
70
+ python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null \
71
+ || echo "$(( $(date +%s) * 1000 ))"
72
+ }
73
+
74
+ INPUT=$(cat 2>/dev/null || true)
75
+ CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
76
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
77
+ PROMPT=$(printf '%s' "$INPUT" | jq -r '.prompt // .user_message // .message // ""' 2>/dev/null) || PROMPT=""
78
+ [[ -z "$CWD" ]] && CWD="$(pwd)"
79
+ [[ -z "$SESSION_ID" ]] && SESSION_ID="unknown"
80
+
81
+ REPO_ROOT=$(historian_project_repo_root "$CWD")
82
+ historian_config_load "$REPO_ROOT"
83
+
84
+ if ! historian_config_enabled; then
85
+ _emit_context ""
86
+ exit 0
87
+ fi
88
+
89
+ RETRIEVAL_ENABLED=$(historian_config_get '.historian.retrieval.enabled')
90
+ if [[ "$RETRIEVAL_ENABLED" == "false" ]]; then
91
+ _emit_context ""
92
+ exit 0
93
+ fi
94
+
95
+ PROJECT_KEY=$(historian_project_key "$CWD")
96
+ if [[ -z "$PROJECT_KEY" ]]; then
97
+ _emit_context ""
98
+ exit 0
99
+ fi
100
+
101
+ # ----------------------------------------------------------------------------
102
+ # Rate gate.
103
+ #
104
+ # Skipped paths emit historian.retrieval.complete with outcome:"skipped"
105
+ # and a skip_reason, matching the schema's lifecycle-event shape. There
106
+ # is no separate retrieval.skipped event in the schema; the outcome
107
+ # field carries that signal.
108
+ # ----------------------------------------------------------------------------
109
+
110
+ RETRIEVAL_STARTED_MS=$(_now_ms)
111
+
112
+ _emit_complete_skipped() {
113
+ local reason="$1"
114
+ local now duration
115
+ now=$(_now_ms)
116
+ duration=$((now - RETRIEVAL_STARTED_MS))
117
+ historian_emit "historian.retrieval.complete" "$SESSION_ID" "$(jq -cn \
118
+ --arg outcome "skipped" \
119
+ --arg skip_reason "$reason" \
120
+ --argjson duration_ms "$duration" \
121
+ '{ outcome: $outcome, skip_reason: $skip_reason, duration_ms: $duration_ms }')"
122
+ }
123
+
124
+ MIN_PROMPT_CHARS=$(historian_config_get '.historian.retrieval.min_prompt_chars')
125
+ [[ -z "$MIN_PROMPT_CHARS" || "$MIN_PROMPT_CHARS" == "null" ]] && MIN_PROMPT_CHARS=60
126
+
127
+ PROMPT_LEN=${#PROMPT}
128
+ if (( PROMPT_LEN < MIN_PROMPT_CHARS )); then
129
+ _emit_complete_skipped "short_prompt"
130
+ _emit_context ""
131
+ exit 0
132
+ fi
133
+
134
+ COOLDOWN_SECONDS=$(historian_config_get '.historian.retrieval.cooldown_seconds')
135
+ [[ -z "$COOLDOWN_SECONDS" || "$COOLDOWN_SECONDS" == "null" ]] && COOLDOWN_SECONDS=60
136
+ MAX_RETRIEVALS=$(historian_config_get '.historian.retrieval.max_retrievals_per_session')
137
+ [[ -z "$MAX_RETRIEVALS" || "$MAX_RETRIEVALS" == "null" ]] && MAX_RETRIEVALS=5
138
+
139
+ STATE=$(historian_retrieval_state_read "$PROJECT_KEY" "$SESSION_ID")
140
+ PREV_COUNT=$(printf '%s' "$STATE" | jq -r '.count // 0')
141
+ PREV_LAST_MS=$(printf '%s' "$STATE" | jq -r '.last_ms // 0')
142
+
143
+ NOW_MS=$(_now_ms)
144
+ ELAPSED_MS=$((NOW_MS - PREV_LAST_MS))
145
+ COOLDOWN_MS=$((COOLDOWN_SECONDS * 1000))
146
+
147
+ if (( PREV_LAST_MS > 0 && ELAPSED_MS < COOLDOWN_MS )); then
148
+ _emit_complete_skipped "cooldown"
149
+ _emit_context ""
150
+ exit 0
151
+ fi
152
+
153
+ if (( PREV_COUNT >= MAX_RETRIEVALS )); then
154
+ _emit_complete_skipped "budget"
155
+ _emit_context ""
156
+ exit 0
157
+ fi
158
+
159
+ # ----------------------------------------------------------------------------
160
+ # Embed the prompt + search.
161
+ # ----------------------------------------------------------------------------
162
+
163
+ historian_emit "historian.retrieval.started" "$SESSION_ID" "$(jq -cn \
164
+ --argjson prompt_chars "$PROMPT_LEN" \
165
+ '{ prompt_chars: $prompt_chars }')"
166
+
167
+ if ! historian_embedder_available; then
168
+ BACKEND=$(historian_config_get '.historian.embedder.backend')
169
+ [[ -z "$BACKEND" || "$BACKEND" == "null" ]] && BACKEND="none"
170
+ historian_emit "historian.embedder.unavailable" "$SESSION_ID" "$(jq -cn \
171
+ --arg backend "$BACKEND" '{ backend: $backend }')"
172
+ _emit_complete_skipped "embedder_unavailable"
173
+ _emit_context ""
174
+ exit 0
175
+ fi
176
+
177
+ QUERY_EMBEDDING=$(historian_embedder_embed "$PROMPT")
178
+ if [[ -z "$QUERY_EMBEDDING" ]]; then
179
+ _emit_complete_skipped "embedder_unavailable"
180
+ _emit_context ""
181
+ exit 0
182
+ fi
183
+
184
+ TOP_K=$(historian_config_get '.historian.retrieval.retrieval_top_k')
185
+ [[ -z "$TOP_K" || "$TOP_K" == "null" ]] && TOP_K=5
186
+ MIN_SIMILARITY=$(historian_config_get '.historian.retrieval.min_similarity')
187
+ [[ -z "$MIN_SIMILARITY" || "$MIN_SIMILARITY" == "null" ]] && MIN_SIMILARITY="0.55"
188
+ MAX_AGE=$(historian_config_get '.historian.retrieval.max_age_days')
189
+ [[ -z "$MAX_AGE" || "$MAX_AGE" == "null" ]] && MAX_AGE=180
190
+
191
+ SESSIONS_DIR=$(historian_sessions_dir "$PROJECT_KEY")
192
+ RESULTS=$(historian_retriever_search "$SESSIONS_DIR" "$QUERY_EMBEDDING" "$TOP_K" \
193
+ "$MIN_SIMILARITY" "$MAX_AGE" "$SESSION_ID")
194
+ RESULT_COUNT=$(printf '%s' "$RESULTS" | jq 'length' 2>/dev/null) || RESULT_COUNT=0
195
+
196
+ # Bump the rate-gate state for any non-skipped run (we paid for the
197
+ # embedding regardless of whether anything matched).
198
+ historian_retrieval_state_write "$PROJECT_KEY" "$SESSION_ID" \
199
+ "$((PREV_COUNT + 1))" "$NOW_MS" || true
200
+
201
+ if [[ "$RESULT_COUNT" == "0" ]]; then
202
+ NOW=$(_now_ms)
203
+ DURATION_MS=$((NOW - RETRIEVAL_STARTED_MS))
204
+ historian_emit "historian.retrieval.complete" "$SESSION_ID" "$(jq -cn \
205
+ --arg outcome "empty" \
206
+ --argjson duration_ms "$DURATION_MS" \
207
+ '{ outcome: $outcome, duration_ms: $duration_ms }')"
208
+ _emit_context ""
209
+ exit 0
210
+ fi
211
+
212
+ # ----------------------------------------------------------------------------
213
+ # Surfacer.
214
+ # ----------------------------------------------------------------------------
215
+
216
+ EXCERPT_MAX=$(historian_config_get '.historian.surfacer.excerpt_chars_max')
217
+ [[ -z "$EXCERPT_MAX" || "$EXCERPT_MAX" == "null" ]] && EXCERPT_MAX=400
218
+ INCLUDE_AGE=$(historian_config_get '.historian.surfacer.include_age_hint')
219
+ [[ -z "$INCLUDE_AGE" || "$INCLUDE_AGE" == "null" ]] && INCLUDE_AGE="true"
220
+
221
+ TOP=$(printf '%s' "$RESULTS" | jq -c '.[0]')
222
+ TOP_CHUNK_ID=$(printf '%s' "$TOP" | jq -r '.chunk_id // ""')
223
+ TOP_SIM=$(printf '%s' "$TOP" | jq -r '.similarity // 0')
224
+ TOP_AGE=$(printf '%s' "$TOP" | jq -r '.age_days // 0')
225
+ TOP_SESSION=$(printf '%s' "$TOP" | jq -r '.session_id // ""')
226
+ TOP_BODY=$(printf '%s' "$TOP" | jq -r '.body_redacted // ""')
227
+
228
+ EXCERPT="$TOP_BODY"
229
+ if (( ${#EXCERPT} > EXCERPT_MAX )); then
230
+ EXCERPT="${EXCERPT:0:EXCERPT_MAX}…"
231
+ fi
232
+
233
+ if [[ "$INCLUDE_AGE" == "true" ]]; then
234
+ AGE_HINT=" (${TOP_AGE}d ago, session ${TOP_SESSION})"
235
+ else
236
+ AGE_HINT=""
237
+ fi
238
+
239
+ CONTEXT=$(printf 'Historian: a past chunk looks similar%s. Excerpt:\n\n> %s\n' \
240
+ "$AGE_HINT" "$EXCERPT")
241
+
242
+ historian_emit "historian.retrieval.surfaced" "$SESSION_ID" "$(jq -cn \
243
+ --arg chunk_id "$TOP_CHUNK_ID" \
244
+ --argjson similarity "$TOP_SIM" \
245
+ --argjson age_days "$TOP_AGE" \
246
+ --arg source_session_id "$TOP_SESSION" \
247
+ '{
248
+ chunk_id: $chunk_id,
249
+ similarity: $similarity,
250
+ age_days: $age_days,
251
+ source_session_id: $source_session_id
252
+ }')"
253
+
254
+ NOW=$(_now_ms)
255
+ DURATION_MS=$((NOW - RETRIEVAL_STARTED_MS))
256
+ historian_emit "historian.retrieval.complete" "$SESSION_ID" "$(jq -cn \
257
+ --arg outcome "surfaced" \
258
+ --argjson top_similarity "$TOP_SIM" \
259
+ --argjson candidates_above_floor "$RESULT_COUNT" \
260
+ --argjson duration_ms "$DURATION_MS" \
261
+ '{
262
+ outcome: $outcome,
263
+ top_similarity: $top_similarity,
264
+ candidates_above_floor: $candidates_above_floor,
265
+ duration_ms: $duration_ms
266
+ }')"
267
+
268
+ _emit_context "$CONTEXT"
269
+ exit 0