@onlooker-community/ecosystem 0.21.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +13 -0
- package/.claude-plugin/plugin.json +1 -1
- package/.release-please-manifest.json +3 -2
- package/CHANGELOG.md +15 -0
- package/hooks/hooks.json +4 -0
- package/package.json +2 -2
- package/plugins/historian/.claude-plugin/plugin.json +14 -0
- package/plugins/historian/CHANGELOG.md +17 -0
- package/plugins/historian/README.md +84 -0
- package/plugins/historian/config.json +46 -0
- package/plugins/historian/hooks/hooks.json +26 -0
- package/plugins/historian/scripts/hooks/historian-prompt-submit.sh +269 -0
- package/plugins/historian/scripts/hooks/historian-session-end.sh +235 -0
- package/plugins/historian/scripts/lib/historian-chunker.sh +129 -0
- package/plugins/historian/scripts/lib/historian-config.sh +66 -0
- package/plugins/historian/scripts/lib/historian-embedder.sh +126 -0
- package/plugins/historian/scripts/lib/historian-emit.sh +61 -0
- package/plugins/historian/scripts/lib/historian-project-key.sh +80 -0
- package/plugins/historian/scripts/lib/historian-retriever.sh +191 -0
- package/plugins/historian/scripts/lib/historian-sanitizer.sh +123 -0
- package/plugins/historian/scripts/lib/historian-storage.sh +157 -0
- package/plugins/historian/scripts/lib/historian-transcript.sh +83 -0
- package/plugins/historian/scripts/lib/historian-ulid.sh +43 -0
- package/release-please-config.json +16 -0
- package/scripts/hooks/memory-recall-tracker.sh +206 -0
- package/test/bats/historian-prompt-submit.bats +236 -0
- package/test/bats/historian-session-end.bats +296 -0
- package/test/bats/memory-recall-tracker.bats +189 -0
|
@@ -150,6 +150,19 @@
|
|
|
150
150
|
"license": "MIT",
|
|
151
151
|
"keywords": ["memory", "audit", "staleness", "findings", "auto-memory", "decay"],
|
|
152
152
|
"tags": ["memory", "context-engineering"]
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
"name": "historian",
|
|
156
|
+
"source": "./plugins/historian",
|
|
157
|
+
"description": "Episodic memory layer for past Claude Code sessions. At SessionEnd, reads the session transcript, drops tool calls and tool results, chunks the remaining user + assistant turns at turn boundaries with overlap, redacts secret-shaped substrings (AWS keys, GitHub PATs, Anthropic API keys, KEY=value env assignments), and appends one JSONL line per surviving chunk to ~/.onlooker/historian/<project-key>/sessions/<session-id>.jsonl. Future-tense retrieval (vector embeddings + UserPromptSubmit similarity surfacer) lands in a follow-up; this version ships the indexing pipeline only. Requires the ecosystem plugin.",
|
|
158
|
+
"author": {
|
|
159
|
+
"name": "Onlooker Community"
|
|
160
|
+
},
|
|
161
|
+
"homepage": "https://onlooker.dev",
|
|
162
|
+
"repository": "https://github.com/onlooker-community/ecosystem",
|
|
163
|
+
"license": "MIT",
|
|
164
|
+
"keywords": ["memory", "episodic", "transcript", "indexing", "session", "retrieval"],
|
|
165
|
+
"tags": ["memory", "context-engineering"]
|
|
153
166
|
}
|
|
154
167
|
]
|
|
155
168
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ecosystem",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.23.0",
|
|
4
4
|
"description": "Observability substrate for Claude Code. Provides the shared ~/.onlooker/ storage root, canonical schema-validated event emission, session and tool tracking hooks, and prompt rules. Required by all other Onlooker plugins.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Onlooker Community",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
".": "0.
|
|
2
|
+
".": "0.23.0",
|
|
3
3
|
"plugins/archivist": "0.1.0",
|
|
4
4
|
"plugins/tribunal": "1.0.1",
|
|
5
5
|
"plugins/echo": "0.2.0",
|
|
@@ -10,5 +10,6 @@
|
|
|
10
10
|
"plugins/counsel": "0.2.0",
|
|
11
11
|
"plugins/warden": "0.2.0",
|
|
12
12
|
"plugins/librarian": "0.1.0",
|
|
13
|
-
"plugins/curator": "0.1.0"
|
|
13
|
+
"plugins/curator": "0.1.0",
|
|
14
|
+
"plugins/historian": "0.2.0"
|
|
14
15
|
}
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.23.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.22.0...ecosystem-v0.23.0) (2026-06-04)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* **ecosystem:** emit memory.recalled at SessionStart :link: ([#62](https://github.com/onlooker-community/ecosystem/issues/62)) ([d5876f9](https://github.com/onlooker-community/ecosystem/commit/d5876f9f819165cc07d691d733662b549863b7f5))
|
|
9
|
+
* **historian:** retrieval pipeline + ollama embedder :telescope: ([#61](https://github.com/onlooker-community/ecosystem/issues/61)) ([7eae752](https://github.com/onlooker-community/ecosystem/commit/7eae752a288c4678ab093042469f2e65d428f0d9))
|
|
10
|
+
|
|
11
|
+
## [0.22.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.21.0...ecosystem-v0.22.0) (2026-06-04)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
### Features
|
|
15
|
+
|
|
16
|
+
* **historian:** introduce SessionEnd indexing :spiral_notepad: ([#59](https://github.com/onlooker-community/ecosystem/issues/59)) ([dd6c7f6](https://github.com/onlooker-community/ecosystem/commit/dd6c7f6ea872437cab6b16de50838dfc72750c7b))
|
|
17
|
+
|
|
3
18
|
## [0.21.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.20.0...ecosystem-v0.21.0) (2026-06-04)
|
|
4
19
|
|
|
5
20
|
|
package/hooks/hooks.json
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@onlooker-community/ecosystem",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.23.0",
|
|
4
4
|
"description": "Agents, skills, hooks, commands, rules, and MCP configurations that power [Onlooker](https://onlooker.dev)",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Onlooker Community",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"test": "npm run test:bats && npm run test:schema",
|
|
27
27
|
"test:bats": "bats test/bats",
|
|
28
28
|
"test:schema": "node --test test/node/*.test.mjs",
|
|
29
|
-
"test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh",
|
|
29
|
+
"test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh plugins/historian/scripts/hooks/*.sh plugins/historian/scripts/lib/*.sh",
|
|
30
30
|
"lint:references": "node scripts/lint/check-references.mjs",
|
|
31
31
|
"lint:manifests": "node scripts/lint/check-manifests.mjs",
|
|
32
32
|
"coverage:node": "node scripts/coverage/run-coverage.mjs",
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "historian",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Episodic memory layer. At SessionEnd, chunks and sanitizes the session transcript and stores chunks under $ONLOOKER_DIR/historian/<project-key>/sessions/ (default $HOME/.onlooker). On UserPromptSubmit, embeds the prompt and performs similarity retrieval over stored chunks to surface relevant past context. Builds on the Onlooker ecosystem plugin.",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "Onlooker Community",
|
|
7
|
+
"url": "https://onlooker.dev"
|
|
8
|
+
},
|
|
9
|
+
"homepage": "https://onlooker.dev",
|
|
10
|
+
"repository": "https://github.com/onlooker-community/ecosystem",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"skills": [],
|
|
13
|
+
"agents": []
|
|
14
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.2.0](https://github.com/onlooker-community/ecosystem/compare/historian-v0.1.0...historian-v0.2.0) (2026-06-04)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* **historian:** retrieval pipeline + ollama embedder :telescope: ([#61](https://github.com/onlooker-community/ecosystem/issues/61)) ([7eae752](https://github.com/onlooker-community/ecosystem/commit/7eae752a288c4678ab093042469f2e65d428f0d9))
|
|
9
|
+
|
|
10
|
+
## [0.1.0](https://github.com/onlooker-community/ecosystem/compare/historian-v0.0.1...historian-v0.1.0) (2026-06-04)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### Features
|
|
14
|
+
|
|
15
|
+
* **historian:** introduce SessionEnd indexing :spiral_notepad: ([#59](https://github.com/onlooker-community/ecosystem/issues/59)) ([dd6c7f6](https://github.com/onlooker-community/ecosystem/commit/dd6c7f6ea872437cab6b16de50838dfc72750c7b))
|
|
16
|
+
|
|
17
|
+
## Changelog
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Historian
|
|
2
|
+
|
|
3
|
+
Episodic memory layer for past Claude Code sessions.
|
|
4
|
+
|
|
5
|
+
At every `SessionEnd`, Historian reads the session transcript, splits it into overlapping chunks at turn boundaries, redacts secret-shaped substrings, embeds each chunk via a local Ollama daemon, and persists the chunks under `~/.onlooker/historian/<project-key>/sessions/<session-id>.jsonl`. At every `UserPromptSubmit`, Historian embeds the prompt and retrieves the most similar past chunk (within a similarity floor and freshness window), then injects an `additionalContext` block whose first line is a "looks similar" pointer and whose body is a multi-line excerpt of the matched chunk.
|
|
6
|
+
|
|
7
|
+
Historian is a sibling plugin to [`ecosystem`](../../) and assumes the Onlooker observability substrate (`~/.onlooker/`) is present. It is parallel to [`librarian`](../librarian) (which consolidates session decisions into the typed memory store) — both turn session-scoped material into something queryable across sessions, but at different levels of distillation. Librarian distills; historian preserves verbatim.
|
|
8
|
+
|
|
9
|
+
See [`docs/design.md`](docs/design.md) and [ADR-001](docs/adr/001-local-embeddings-only.md) for the full design, including the local-embeddings-by-default decision.
|
|
10
|
+
|
|
11
|
+
## How it works
|
|
12
|
+
|
|
13
|
+
| Hook | What Historian does |
|
|
14
|
+
|------|---------------------|
|
|
15
|
+
| `SessionEnd` | Reads the transcript at `transcript_path`, drops tool calls and tool results (keeps user + assistant messages), chunks at turn boundaries inside the configured character target with overlap, runs the sanitizer (secret redaction + `[historian:skip]` markers + path-deny list), embeds each surviving chunk via the configured backend, and appends one JSONL line per chunk to the session's file. Emits `historian.indexing.*`, `historian.chunk.*`, and `historian.embedder.unavailable` events along the way. |
|
|
16
|
+
| `UserPromptSubmit` | Rate-gated retrieval: short prompts, cooldown windows, and per-session caps short-circuit before the embedder runs. Otherwise embeds the prompt, streams every JSONL chunk for the project, and injects an `additionalContext` block — a header pointer line plus a multi-line excerpt — for the top cosine-similarity match above the floor. Excludes chunks from the current session id (a session retrieving its own chunks is the degenerate case). Emits `historian.retrieval.started` when the rate gate clears, `historian.retrieval.surfaced` on the surfaced outcome, and `historian.retrieval.complete` with `outcome: surfaced\|empty\|skipped` and a `skip_reason` enum for skipped runs. |
|
|
17
|
+
|
|
18
|
+
## Activation
|
|
19
|
+
|
|
20
|
+
Historian is **off by default**. Enable per-project in `.claude/settings.json`:
|
|
21
|
+
|
|
22
|
+
```json
|
|
23
|
+
{
|
|
24
|
+
"historian": {
|
|
25
|
+
"enabled": true
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
See [`config.json`](config.json) for the full set of tunable defaults.
|
|
31
|
+
|
|
32
|
+
## Storage layout
|
|
33
|
+
|
|
34
|
+
```text
|
|
35
|
+
~/.onlooker/historian/<project-key>/
|
|
36
|
+
├── manifest.json # project metadata
|
|
37
|
+
├── retrieval-state/<session-id>.json # rate-gate state: count + last_ms
|
|
38
|
+
└── sessions/<session-id>.jsonl # one chunk per line, append-only
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Each chunk line:
|
|
42
|
+
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"chunk_id": "01J...",
|
|
46
|
+
"session_id": "...",
|
|
47
|
+
"chunk_index": 0,
|
|
48
|
+
"start_turn_index": 0,
|
|
49
|
+
"end_turn_index": 3,
|
|
50
|
+
"body_redacted": "...",
|
|
51
|
+
"body_chars": 2103,
|
|
52
|
+
"created_at": "2026-06-04T...",
|
|
53
|
+
"source": "local",
|
|
54
|
+
"redaction_count": 0,
|
|
55
|
+
"embedding": [0.123, 0.456, ...]
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
The `embedding` field is present iff the embedder was available at indexing time. Chunks indexed without an embedder are still readable but invisible to similarity retrieval until they are re-indexed.
|
|
60
|
+
|
|
61
|
+
## Embedder
|
|
62
|
+
|
|
63
|
+
Default backend is local **Ollama** with the `nomic-embed-text` model. Set up:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
ollama pull nomic-embed-text
|
|
67
|
+
ollama serve # run as a background service; the historian client expects 127.0.0.1:11434
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Override the host or model in `.claude/settings.json` under `historian.embedder.ollama.{host,model,request_timeout_seconds}`. Set `historian.embedder.backend: "none"` to disable embedding entirely — chunks index without vectors and retrieval no-ops.
|
|
71
|
+
|
|
72
|
+
## Status
|
|
73
|
+
|
|
74
|
+
This plugin ships **scaffolding + the SessionEnd indexing pipeline + the UserPromptSubmit retrieval pipeline + Ollama embedder integration**. Deferred to follow-up landings:
|
|
75
|
+
|
|
76
|
+
- **fastembed sidecar and remote embedder backends** — opt-in via the two-key egress affirmation from [ADR-001](docs/adr/001-local-embeddings-only.md).
|
|
77
|
+
- **Prune (retention sweep) and purge (manual)** skills.
|
|
78
|
+
- **`/historian recall`, `/historian setup`, `/historian stats`, `/historian purge`** slash commands.
|
|
79
|
+
|
|
80
|
+
## Requirements
|
|
81
|
+
|
|
82
|
+
- The `ecosystem` plugin installed (for `~/.onlooker/` substrate).
|
|
83
|
+
- `jq` for JSON manipulation.
|
|
84
|
+
- `python3` for chunking and sanitization (no extra packages — stdlib only).
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"plugin_name": "historian",
|
|
3
|
+
"storage_path": "~/.onlooker",
|
|
4
|
+
"historian": {
|
|
5
|
+
"enabled": false,
|
|
6
|
+
"indexing": {
|
|
7
|
+
"trigger": "SessionEnd",
|
|
8
|
+
"min_transcript_chars_to_index": 1200,
|
|
9
|
+
"chunk_target_chars": 2400,
|
|
10
|
+
"chunk_overlap_chars": 400,
|
|
11
|
+
"retention_days": 365
|
|
12
|
+
},
|
|
13
|
+
"sanitization": {
|
|
14
|
+
"redact_secret_patterns": true,
|
|
15
|
+
"drop_skip_marker": true,
|
|
16
|
+
"never_index_paths": []
|
|
17
|
+
},
|
|
18
|
+
"session_archive": {
|
|
19
|
+
"enabled": false,
|
|
20
|
+
"_note": "When true, the full transcript at SessionEnd is copied alongside the chunks so retrieval can link to the source. When false, only chunk bodies are retained."
|
|
21
|
+
},
|
|
22
|
+
"embedder": {
|
|
23
|
+
"backend": "ollama",
|
|
24
|
+
"_note": "Backend selector. `ollama` (default) talks to a local Ollama daemon. `none` skips embedding entirely; retrieval no-ops without vectors. `fastembed` and `remote` are reserved for a future landing.",
|
|
25
|
+
"ollama": {
|
|
26
|
+
"host": "http://127.0.0.1:11434",
|
|
27
|
+
"model": "nomic-embed-text",
|
|
28
|
+
"request_timeout_seconds": 8
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"retrieval": {
|
|
32
|
+
"enabled": true,
|
|
33
|
+
"cooldown_seconds": 60,
|
|
34
|
+
"max_retrievals_per_session": 5,
|
|
35
|
+
"min_prompt_chars": 60,
|
|
36
|
+
"retrieval_top_k": 5,
|
|
37
|
+
"min_similarity": 0.55,
|
|
38
|
+
"max_age_days": 180
|
|
39
|
+
},
|
|
40
|
+
"surfacer": {
|
|
41
|
+
"excerpt_chars_max": 400,
|
|
42
|
+
"include_age_hint": true,
|
|
43
|
+
"_note": "Retrieval always surfaces top-1 (per the design's deliberate top-1 inject). Multi-result surfacing is reserved for a future `/historian recall` skill rather than the UserPromptSubmit inject path."
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"hooks": {
|
|
3
|
+
"SessionEnd": [
|
|
4
|
+
{
|
|
5
|
+
"matcher": "*",
|
|
6
|
+
"hooks": [
|
|
7
|
+
{
|
|
8
|
+
"type": "command",
|
|
9
|
+
"command": "\"$CLAUDE_PLUGIN_ROOT\"/scripts/hooks/historian-session-end.sh"
|
|
10
|
+
}
|
|
11
|
+
]
|
|
12
|
+
}
|
|
13
|
+
],
|
|
14
|
+
"UserPromptSubmit": [
|
|
15
|
+
{
|
|
16
|
+
"matcher": "*",
|
|
17
|
+
"hooks": [
|
|
18
|
+
{
|
|
19
|
+
"type": "command",
|
|
20
|
+
"command": "\"$CLAUDE_PLUGIN_ROOT\"/scripts/hooks/historian-prompt-submit.sh"
|
|
21
|
+
}
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Historian UserPromptSubmit retrieval pipeline.
|
|
3
|
+
#
|
|
4
|
+
# Flow:
|
|
5
|
+
# 1. Rate gate (cooldown_seconds, max_retrievals_per_session,
|
|
6
|
+
# min_prompt_chars). Each ungated invocation costs one ollama
|
|
7
|
+
# embedding round-trip; the gates keep the cost bounded.
|
|
8
|
+
# 2. Embed the prompt via the configured backend.
|
|
9
|
+
# 3. Stream every chunk record for the project from disk one line at
|
|
10
|
+
# a time, cosine-search against the query vector, filter by
|
|
11
|
+
# min_similarity and max_age_days.
|
|
12
|
+
# 4. Emit one historian.retrieval.surfaced event for the top match
|
|
13
|
+
# and inject an `additionalContext` block whose first line is a
|
|
14
|
+
# "looks similar" pointer and whose body is a multi-line excerpt
|
|
15
|
+
# of the matched chunk.
|
|
16
|
+
#
|
|
17
|
+
# Hook contract:
|
|
18
|
+
# - Always exits 0. Never blocks the prompt.
|
|
19
|
+
# - Emits valid hookSpecificOutput JSON even when nothing to inject.
|
|
20
|
+
# - No-ops when historian.enabled is not true OR retrieval is disabled.
|
|
21
|
+
# - Lifecycle events: historian.retrieval.started fires when the rate
|
|
22
|
+
# gate clears and we are about to embed. All outcomes flow through
|
|
23
|
+
# historian.retrieval.complete with `outcome: surfaced | empty |
|
|
24
|
+
# skipped` and (on skipped) a `skip_reason` enum (short_prompt,
|
|
25
|
+
# cooldown, budget, embedder_unavailable). The surfaced case also
|
|
26
|
+
# emits historian.retrieval.surfaced with the matched chunk's
|
|
27
|
+
# chunk_id, similarity, age_days, and source_session_id.
|
|
28
|
+
|
|
29
|
+
set -uo pipefail
|
|
30
|
+
|
|
31
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
32
|
+
PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
33
|
+
|
|
34
|
+
_ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
|
|
35
|
+
if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
|
|
36
|
+
_candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
|
|
37
|
+
if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
|
|
38
|
+
_ECOSYSTEM_ROOT="$_candidate"
|
|
39
|
+
fi
|
|
40
|
+
fi
|
|
41
|
+
if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
|
|
42
|
+
# shellcheck disable=SC1091
|
|
43
|
+
CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
# shellcheck source=../lib/historian-config.sh
|
|
47
|
+
source "${PLUGIN_ROOT}/scripts/lib/historian-config.sh"
|
|
48
|
+
# shellcheck source=../lib/historian-project-key.sh
|
|
49
|
+
source "${PLUGIN_ROOT}/scripts/lib/historian-project-key.sh"
|
|
50
|
+
# shellcheck source=../lib/historian-storage.sh
|
|
51
|
+
source "${PLUGIN_ROOT}/scripts/lib/historian-storage.sh"
|
|
52
|
+
# shellcheck source=../lib/historian-emit.sh
|
|
53
|
+
source "${PLUGIN_ROOT}/scripts/lib/historian-emit.sh"
|
|
54
|
+
# shellcheck source=../lib/historian-embedder.sh
|
|
55
|
+
source "${PLUGIN_ROOT}/scripts/lib/historian-embedder.sh"
|
|
56
|
+
# shellcheck source=../lib/historian-retriever.sh
|
|
57
|
+
source "${PLUGIN_ROOT}/scripts/lib/historian-retriever.sh"
|
|
58
|
+
|
|
59
|
+
_emit_context() {
|
|
60
|
+
local context="${1:-}"
|
|
61
|
+
jq -cn --arg ctx "$context" '{
|
|
62
|
+
hookSpecificOutput: {
|
|
63
|
+
hookEventName: "UserPromptSubmit",
|
|
64
|
+
additionalContext: $ctx
|
|
65
|
+
}
|
|
66
|
+
}'
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
_now_ms() {
|
|
70
|
+
python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null \
|
|
71
|
+
|| echo "$(( $(date +%s) * 1000 ))"
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
INPUT=$(cat 2>/dev/null || true)
|
|
75
|
+
CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
|
|
76
|
+
SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
|
|
77
|
+
PROMPT=$(printf '%s' "$INPUT" | jq -r '.prompt // .user_message // .message // ""' 2>/dev/null) || PROMPT=""
|
|
78
|
+
[[ -z "$CWD" ]] && CWD="$(pwd)"
|
|
79
|
+
[[ -z "$SESSION_ID" ]] && SESSION_ID="unknown"
|
|
80
|
+
|
|
81
|
+
REPO_ROOT=$(historian_project_repo_root "$CWD")
|
|
82
|
+
historian_config_load "$REPO_ROOT"
|
|
83
|
+
|
|
84
|
+
if ! historian_config_enabled; then
|
|
85
|
+
_emit_context ""
|
|
86
|
+
exit 0
|
|
87
|
+
fi
|
|
88
|
+
|
|
89
|
+
RETRIEVAL_ENABLED=$(historian_config_get '.historian.retrieval.enabled')
|
|
90
|
+
if [[ "$RETRIEVAL_ENABLED" == "false" ]]; then
|
|
91
|
+
_emit_context ""
|
|
92
|
+
exit 0
|
|
93
|
+
fi
|
|
94
|
+
|
|
95
|
+
PROJECT_KEY=$(historian_project_key "$CWD")
|
|
96
|
+
if [[ -z "$PROJECT_KEY" ]]; then
|
|
97
|
+
_emit_context ""
|
|
98
|
+
exit 0
|
|
99
|
+
fi
|
|
100
|
+
|
|
101
|
+
# ----------------------------------------------------------------------------
|
|
102
|
+
# Rate gate.
|
|
103
|
+
#
|
|
104
|
+
# Skipped paths emit historian.retrieval.complete with outcome:"skipped"
|
|
105
|
+
# and a skip_reason, matching the schema's lifecycle-event shape. There
|
|
106
|
+
# is no separate retrieval.skipped event in the schema; the outcome
|
|
107
|
+
# field carries that signal.
|
|
108
|
+
# ----------------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
RETRIEVAL_STARTED_MS=$(_now_ms)
|
|
111
|
+
|
|
112
|
+
_emit_complete_skipped() {
|
|
113
|
+
local reason="$1"
|
|
114
|
+
local now duration
|
|
115
|
+
now=$(_now_ms)
|
|
116
|
+
duration=$((now - RETRIEVAL_STARTED_MS))
|
|
117
|
+
historian_emit "historian.retrieval.complete" "$SESSION_ID" "$(jq -cn \
|
|
118
|
+
--arg outcome "skipped" \
|
|
119
|
+
--arg skip_reason "$reason" \
|
|
120
|
+
--argjson duration_ms "$duration" \
|
|
121
|
+
'{ outcome: $outcome, skip_reason: $skip_reason, duration_ms: $duration_ms }')"
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
MIN_PROMPT_CHARS=$(historian_config_get '.historian.retrieval.min_prompt_chars')
|
|
125
|
+
[[ -z "$MIN_PROMPT_CHARS" || "$MIN_PROMPT_CHARS" == "null" ]] && MIN_PROMPT_CHARS=60
|
|
126
|
+
|
|
127
|
+
PROMPT_LEN=${#PROMPT}
|
|
128
|
+
if (( PROMPT_LEN < MIN_PROMPT_CHARS )); then
|
|
129
|
+
_emit_complete_skipped "short_prompt"
|
|
130
|
+
_emit_context ""
|
|
131
|
+
exit 0
|
|
132
|
+
fi
|
|
133
|
+
|
|
134
|
+
COOLDOWN_SECONDS=$(historian_config_get '.historian.retrieval.cooldown_seconds')
|
|
135
|
+
[[ -z "$COOLDOWN_SECONDS" || "$COOLDOWN_SECONDS" == "null" ]] && COOLDOWN_SECONDS=60
|
|
136
|
+
MAX_RETRIEVALS=$(historian_config_get '.historian.retrieval.max_retrievals_per_session')
|
|
137
|
+
[[ -z "$MAX_RETRIEVALS" || "$MAX_RETRIEVALS" == "null" ]] && MAX_RETRIEVALS=5
|
|
138
|
+
|
|
139
|
+
STATE=$(historian_retrieval_state_read "$PROJECT_KEY" "$SESSION_ID")
|
|
140
|
+
PREV_COUNT=$(printf '%s' "$STATE" | jq -r '.count // 0')
|
|
141
|
+
PREV_LAST_MS=$(printf '%s' "$STATE" | jq -r '.last_ms // 0')
|
|
142
|
+
|
|
143
|
+
NOW_MS=$(_now_ms)
|
|
144
|
+
ELAPSED_MS=$((NOW_MS - PREV_LAST_MS))
|
|
145
|
+
COOLDOWN_MS=$((COOLDOWN_SECONDS * 1000))
|
|
146
|
+
|
|
147
|
+
if (( PREV_LAST_MS > 0 && ELAPSED_MS < COOLDOWN_MS )); then
|
|
148
|
+
_emit_complete_skipped "cooldown"
|
|
149
|
+
_emit_context ""
|
|
150
|
+
exit 0
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
if (( PREV_COUNT >= MAX_RETRIEVALS )); then
|
|
154
|
+
_emit_complete_skipped "budget"
|
|
155
|
+
_emit_context ""
|
|
156
|
+
exit 0
|
|
157
|
+
fi
|
|
158
|
+
|
|
159
|
+
# ----------------------------------------------------------------------------
|
|
160
|
+
# Embed the prompt + search.
|
|
161
|
+
# ----------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
historian_emit "historian.retrieval.started" "$SESSION_ID" "$(jq -cn \
|
|
164
|
+
--argjson prompt_chars "$PROMPT_LEN" \
|
|
165
|
+
'{ prompt_chars: $prompt_chars }')"
|
|
166
|
+
|
|
167
|
+
if ! historian_embedder_available; then
|
|
168
|
+
BACKEND=$(historian_config_get '.historian.embedder.backend')
|
|
169
|
+
[[ -z "$BACKEND" || "$BACKEND" == "null" ]] && BACKEND="none"
|
|
170
|
+
historian_emit "historian.embedder.unavailable" "$SESSION_ID" "$(jq -cn \
|
|
171
|
+
--arg backend "$BACKEND" '{ backend: $backend }')"
|
|
172
|
+
_emit_complete_skipped "embedder_unavailable"
|
|
173
|
+
_emit_context ""
|
|
174
|
+
exit 0
|
|
175
|
+
fi
|
|
176
|
+
|
|
177
|
+
QUERY_EMBEDDING=$(historian_embedder_embed "$PROMPT")
|
|
178
|
+
if [[ -z "$QUERY_EMBEDDING" ]]; then
|
|
179
|
+
_emit_complete_skipped "embedder_unavailable"
|
|
180
|
+
_emit_context ""
|
|
181
|
+
exit 0
|
|
182
|
+
fi
|
|
183
|
+
|
|
184
|
+
TOP_K=$(historian_config_get '.historian.retrieval.retrieval_top_k')
|
|
185
|
+
[[ -z "$TOP_K" || "$TOP_K" == "null" ]] && TOP_K=5
|
|
186
|
+
MIN_SIMILARITY=$(historian_config_get '.historian.retrieval.min_similarity')
|
|
187
|
+
[[ -z "$MIN_SIMILARITY" || "$MIN_SIMILARITY" == "null" ]] && MIN_SIMILARITY="0.55"
|
|
188
|
+
MAX_AGE=$(historian_config_get '.historian.retrieval.max_age_days')
|
|
189
|
+
[[ -z "$MAX_AGE" || "$MAX_AGE" == "null" ]] && MAX_AGE=180
|
|
190
|
+
|
|
191
|
+
SESSIONS_DIR=$(historian_sessions_dir "$PROJECT_KEY")
|
|
192
|
+
RESULTS=$(historian_retriever_search "$SESSIONS_DIR" "$QUERY_EMBEDDING" "$TOP_K" \
|
|
193
|
+
"$MIN_SIMILARITY" "$MAX_AGE" "$SESSION_ID")
|
|
194
|
+
RESULT_COUNT=$(printf '%s' "$RESULTS" | jq 'length' 2>/dev/null) || RESULT_COUNT=0
|
|
195
|
+
|
|
196
|
+
# Bump the rate-gate state for any non-skipped run (we paid for the
|
|
197
|
+
# embedding regardless of whether anything matched).
|
|
198
|
+
historian_retrieval_state_write "$PROJECT_KEY" "$SESSION_ID" \
|
|
199
|
+
"$((PREV_COUNT + 1))" "$NOW_MS" || true
|
|
200
|
+
|
|
201
|
+
if [[ "$RESULT_COUNT" == "0" ]]; then
|
|
202
|
+
NOW=$(_now_ms)
|
|
203
|
+
DURATION_MS=$((NOW - RETRIEVAL_STARTED_MS))
|
|
204
|
+
historian_emit "historian.retrieval.complete" "$SESSION_ID" "$(jq -cn \
|
|
205
|
+
--arg outcome "empty" \
|
|
206
|
+
--argjson duration_ms "$DURATION_MS" \
|
|
207
|
+
'{ outcome: $outcome, duration_ms: $duration_ms }')"
|
|
208
|
+
_emit_context ""
|
|
209
|
+
exit 0
|
|
210
|
+
fi
|
|
211
|
+
|
|
212
|
+
# ----------------------------------------------------------------------------
|
|
213
|
+
# Surfacer.
|
|
214
|
+
# ----------------------------------------------------------------------------
|
|
215
|
+
|
|
216
|
+
EXCERPT_MAX=$(historian_config_get '.historian.surfacer.excerpt_chars_max')
|
|
217
|
+
[[ -z "$EXCERPT_MAX" || "$EXCERPT_MAX" == "null" ]] && EXCERPT_MAX=400
|
|
218
|
+
INCLUDE_AGE=$(historian_config_get '.historian.surfacer.include_age_hint')
|
|
219
|
+
[[ -z "$INCLUDE_AGE" || "$INCLUDE_AGE" == "null" ]] && INCLUDE_AGE="true"
|
|
220
|
+
|
|
221
|
+
TOP=$(printf '%s' "$RESULTS" | jq -c '.[0]')
|
|
222
|
+
TOP_CHUNK_ID=$(printf '%s' "$TOP" | jq -r '.chunk_id // ""')
|
|
223
|
+
TOP_SIM=$(printf '%s' "$TOP" | jq -r '.similarity // 0')
|
|
224
|
+
TOP_AGE=$(printf '%s' "$TOP" | jq -r '.age_days // 0')
|
|
225
|
+
TOP_SESSION=$(printf '%s' "$TOP" | jq -r '.session_id // ""')
|
|
226
|
+
TOP_BODY=$(printf '%s' "$TOP" | jq -r '.body_redacted // ""')
|
|
227
|
+
|
|
228
|
+
EXCERPT="$TOP_BODY"
|
|
229
|
+
if (( ${#EXCERPT} > EXCERPT_MAX )); then
|
|
230
|
+
EXCERPT="${EXCERPT:0:EXCERPT_MAX}…"
|
|
231
|
+
fi
|
|
232
|
+
|
|
233
|
+
if [[ "$INCLUDE_AGE" == "true" ]]; then
|
|
234
|
+
AGE_HINT=" (${TOP_AGE}d ago, session ${TOP_SESSION})"
|
|
235
|
+
else
|
|
236
|
+
AGE_HINT=""
|
|
237
|
+
fi
|
|
238
|
+
|
|
239
|
+
CONTEXT=$(printf 'Historian: a past chunk looks similar%s. Excerpt:\n\n> %s\n' \
|
|
240
|
+
"$AGE_HINT" "$EXCERPT")
|
|
241
|
+
|
|
242
|
+
historian_emit "historian.retrieval.surfaced" "$SESSION_ID" "$(jq -cn \
|
|
243
|
+
--arg chunk_id "$TOP_CHUNK_ID" \
|
|
244
|
+
--argjson similarity "$TOP_SIM" \
|
|
245
|
+
--argjson age_days "$TOP_AGE" \
|
|
246
|
+
--arg source_session_id "$TOP_SESSION" \
|
|
247
|
+
'{
|
|
248
|
+
chunk_id: $chunk_id,
|
|
249
|
+
similarity: $similarity,
|
|
250
|
+
age_days: $age_days,
|
|
251
|
+
source_session_id: $source_session_id
|
|
252
|
+
}')"
|
|
253
|
+
|
|
254
|
+
NOW=$(_now_ms)
|
|
255
|
+
DURATION_MS=$((NOW - RETRIEVAL_STARTED_MS))
|
|
256
|
+
historian_emit "historian.retrieval.complete" "$SESSION_ID" "$(jq -cn \
|
|
257
|
+
--arg outcome "surfaced" \
|
|
258
|
+
--argjson top_similarity "$TOP_SIM" \
|
|
259
|
+
--argjson candidates_above_floor "$RESULT_COUNT" \
|
|
260
|
+
--argjson duration_ms "$DURATION_MS" \
|
|
261
|
+
'{
|
|
262
|
+
outcome: $outcome,
|
|
263
|
+
top_similarity: $top_similarity,
|
|
264
|
+
candidates_above_floor: $candidates_above_floor,
|
|
265
|
+
duration_ms: $duration_ms
|
|
266
|
+
}')"
|
|
267
|
+
|
|
268
|
+
_emit_context "$CONTEXT"
|
|
269
|
+
exit 0
|