@onlooker-community/ecosystem 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -150,6 +150,19 @@
150
150
  "license": "MIT",
151
151
  "keywords": ["memory", "audit", "staleness", "findings", "auto-memory", "decay"],
152
152
  "tags": ["memory", "context-engineering"]
153
+ },
154
+ {
155
+ "name": "historian",
156
+ "source": "./plugins/historian",
157
+ "description": "Episodic memory layer for past Claude Code sessions. At SessionEnd, reads the session transcript, drops tool calls and tool results, chunks the remaining user + assistant turns at turn boundaries with overlap, redacts secret-shaped substrings (AWS keys, GitHub PATs, Anthropic API keys, KEY=value env assignments), and appends one JSONL line per surviving chunk to ~/.onlooker/historian/<project-key>/sessions/<session-id>.jsonl. Future-tense retrieval (vector embeddings + UserPromptSubmit similarity surfacer) lands in a follow-up; this version ships the indexing pipeline only. Requires the ecosystem plugin.",
158
+ "author": {
159
+ "name": "Onlooker Community"
160
+ },
161
+ "homepage": "https://onlooker.dev",
162
+ "repository": "https://github.com/onlooker-community/ecosystem",
163
+ "license": "MIT",
164
+ "keywords": ["memory", "episodic", "transcript", "indexing", "session", "retrieval"],
165
+ "tags": ["memory", "context-engineering"]
153
166
  }
154
167
  ]
155
168
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ecosystem",
3
- "version": "0.21.0",
3
+ "version": "0.22.0",
4
4
  "description": "Observability substrate for Claude Code. Provides the shared ~/.onlooker/ storage root, canonical schema-validated event emission, session and tool tracking hooks, and prompt rules. Required by all other Onlooker plugins.",
5
5
  "author": {
6
6
  "name": "Onlooker Community",
@@ -1,5 +1,5 @@
1
1
  {
2
- ".": "0.21.0",
2
+ ".": "0.22.0",
3
3
  "plugins/archivist": "0.1.0",
4
4
  "plugins/tribunal": "1.0.1",
5
5
  "plugins/echo": "0.2.0",
@@ -10,5 +10,6 @@
10
10
  "plugins/counsel": "0.2.0",
11
11
  "plugins/warden": "0.2.0",
12
12
  "plugins/librarian": "0.1.0",
13
- "plugins/curator": "0.1.0"
13
+ "plugins/curator": "0.1.0",
14
+ "plugins/historian": "0.1.0"
14
15
  }
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.22.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.21.0...ecosystem-v0.22.0) (2026-06-04)
4
+
5
+
6
+ ### Features
7
+
8
+ * **historian:** introduce SessionEnd indexing :spiral_notepad: ([#59](https://github.com/onlooker-community/ecosystem/issues/59)) ([dd6c7f6](https://github.com/onlooker-community/ecosystem/commit/dd6c7f6ea872437cab6b16de50838dfc72750c7b))
9
+
3
10
  ## [0.21.0](https://github.com/onlooker-community/ecosystem/compare/ecosystem-v0.20.0...ecosystem-v0.21.0) (2026-06-04)
4
11
 
5
12
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@onlooker-community/ecosystem",
3
- "version": "0.21.0",
3
+ "version": "0.22.0",
4
4
  "description": "Agents, skills, hooks, commands, rules, and MCP configurations that power [Onlooker](https://onlooker.dev)",
5
5
  "author": {
6
6
  "name": "Onlooker Community",
@@ -26,7 +26,7 @@
26
26
  "test": "npm run test:bats && npm run test:schema",
27
27
  "test:bats": "bats test/bats",
28
28
  "test:schema": "node --test test/node/*.test.mjs",
29
- "test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh",
29
+ "test:shellcheck": "shellcheck -S error -x install.sh scripts/common.sh scripts/hooks/*.sh scripts/lib/*.sh plugins/archivist/scripts/hooks/*.sh plugins/archivist/scripts/lib/*.sh plugins/tribunal/scripts/hooks/*.sh plugins/tribunal/scripts/lib/*.sh plugins/echo/scripts/hooks/*.sh plugins/echo/scripts/lib/*.sh plugins/governor/scripts/hooks/*.sh plugins/governor/scripts/lib/*.sh plugins/compass/scripts/hooks/*.sh plugins/compass/scripts/lib/*.sh plugins/scribe/scripts/hooks/*.sh plugins/scribe/scripts/lib/*.sh plugins/counsel/scripts/hooks/*.sh plugins/counsel/scripts/lib/*.sh plugins/warden/scripts/hooks/*.sh plugins/warden/scripts/lib/*.sh plugins/librarian/scripts/hooks/*.sh plugins/librarian/scripts/lib/*.sh plugins/curator/scripts/hooks/*.sh plugins/curator/scripts/lib/*.sh plugins/historian/scripts/hooks/*.sh plugins/historian/scripts/lib/*.sh",
30
30
  "lint:references": "node scripts/lint/check-references.mjs",
31
31
  "lint:manifests": "node scripts/lint/check-manifests.mjs",
32
32
  "coverage:node": "node scripts/coverage/run-coverage.mjs",
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "historian",
3
+ "version": "0.1.0",
4
+ "description": "Episodic memory layer. At SessionEnd, chunks and sanitizes the session transcript and stores the chunks locally under ~/.onlooker/historian/<project-key>/sessions/. Future-tense retrieval (vector embeddings + UserPromptSubmit similarity surfacer) lands in a follow-up; this PR ships the indexing pipeline only. Builds on the Onlooker ecosystem plugin.",
5
+ "author": {
6
+ "name": "Onlooker Community",
7
+ "url": "https://onlooker.dev"
8
+ },
9
+ "homepage": "https://onlooker.dev",
10
+ "repository": "https://github.com/onlooker-community/ecosystem",
11
+ "license": "MIT",
12
+ "skills": [],
13
+ "agents": []
14
+ }
@@ -0,0 +1,10 @@
1
+ # Changelog
2
+
3
+ ## [0.1.0](https://github.com/onlooker-community/ecosystem/compare/historian-v0.0.1...historian-v0.1.0) (2026-06-04)
4
+
5
+
6
+ ### Features
7
+
8
+ * **historian:** introduce SessionEnd indexing :spiral_notepad: ([#59](https://github.com/onlooker-community/ecosystem/issues/59)) ([dd6c7f6](https://github.com/onlooker-community/ecosystem/commit/dd6c7f6ea872437cab6b16de50838dfc72750c7b))
9
+
10
+ ## Changelog
@@ -0,0 +1,70 @@
1
+ # Historian
2
+
3
+ Episodic memory layer for past Claude Code sessions.
4
+
5
+ At every `SessionEnd`, Historian reads the session transcript, splits it into overlapping chunks at turn boundaries, redacts secret-shaped substrings, and persists the chunks under `~/.onlooker/historian/<project-key>/sessions/<session-id>.jsonl`. Future sessions can retrieve relevant past chunks when the user starts a similar problem.
6
+
7
+ Historian is a sibling plugin to [`ecosystem`](../../) and assumes the Onlooker observability substrate (`~/.onlooker/`) is present. It is parallel to [`librarian`](../librarian) (which consolidates session decisions into the typed memory store) — both turn session-scoped material into something queryable across sessions, but at different levels of distillation. Librarian distills; historian preserves verbatim.
8
+
9
+ See [`docs/design.md`](docs/design.md) and [ADR-001](docs/adr/001-local-embeddings-only.md) for the full design, including the local-embeddings-by-default decision.
10
+
11
+ ## How it works
12
+
13
+ | Hook | What Historian does |
14
+ |------|---------------------|
15
+ | `SessionEnd` | Reads the transcript at `transcript_path`, drops tool calls and tool results (keeps user + assistant messages), chunks at turn boundaries inside the configured character target with overlap, runs the sanitizer (secret redaction + `[historian:skip]` markers + path-deny list), and appends one JSONL line per chunk to the session's file. Emits `historian.indexing.*` and `historian.chunk.*` events along the way. |
16
+ | `UserPromptSubmit` | No-op in this PR — the rate gate, query embedder, ANN lookup, and surfacer are deferred to a follow-up that ships the retrieval pipeline alongside the first embedder backend. |
17
+
18
+ ## Activation
19
+
20
+ Historian is **off by default**. Enable per-project in `.claude/settings.json`:
21
+
22
+ ```json
23
+ {
24
+ "historian": {
25
+ "enabled": true
26
+ }
27
+ }
28
+ ```
29
+
30
+ See [`config.json`](config.json) for the full set of tunable defaults.
31
+
32
+ ## Storage layout
33
+
34
+ ```text
35
+ ~/.onlooker/historian/<project-key>/
36
+ ├── manifest.json # project metadata
37
+ └── sessions/<session-id>.jsonl # one chunk per line, append-only
38
+ ```
39
+
40
+ Each chunk line:
41
+
42
+ ```json
43
+ {
44
+ "chunk_id": "01J...",
45
+ "session_id": "...",
46
+ "chunk_index": 0,
47
+ "start_turn_index": 0,
48
+ "end_turn_index": 3,
49
+ "body_redacted": "...",
50
+ "body_chars": 2103,
51
+ "created_at": "2026-06-04T...",
52
+ "source": "local",
53
+ "redaction_count": 0
54
+ }
55
+ ```
56
+
57
+ ## Status
58
+
59
+ This plugin ships **scaffolding + the SessionEnd indexing pipeline (transcript reader → chunker → sanitizer → JSONL store)**. Deferred to follow-up landings:
60
+
61
+ - **Retrieval and surfacer** — `UserPromptSubmit` rate gate, query embedding, ANN lookup, and `additionalContext` injection of the top match.
62
+ - **Embedder backends** — ollama (`nomic-embed-text`), fastembed sidecar, and remote (opt-in via the two-key egress affirmation from [ADR-001](docs/adr/001-local-embeddings-only.md)). Chunks are indexed without vectors today; the JSONL records make adding embeddings a future column-add, not a re-index.
63
+ - **Prune (retention sweep) and purge (manual)** skills.
64
+ - **`/historian recall`, `/historian setup`, `/historian stats`, `/historian purge`** slash commands.
65
+
66
+ ## Requirements
67
+
68
+ - The `ecosystem` plugin installed (for `~/.onlooker/` substrate).
69
+ - `jq` for JSON manipulation.
70
+ - `python3` for chunking and sanitization (no extra packages — stdlib only).
@@ -0,0 +1,30 @@
1
+ {
2
+ "plugin_name": "historian",
3
+ "storage_path": "~/.onlooker",
4
+ "historian": {
5
+ "enabled": false,
6
+ "indexing": {
7
+ "trigger": "SessionEnd",
8
+ "min_transcript_chars_to_index": 1200,
9
+ "chunk_target_chars": 2400,
10
+ "chunk_overlap_chars": 400,
11
+ "retention_days": 365
12
+ },
13
+ "sanitization": {
14
+ "redact_secret_patterns": true,
15
+ "drop_skip_marker": true,
16
+ "never_index_paths": []
17
+ },
18
+ "session_archive": {
19
+ "enabled": false,
20
+ "_note": "When true, the full transcript at SessionEnd is copied alongside the chunks so retrieval can link to the source. When false, only chunk bodies are retained."
21
+ },
22
+ "embedder": {
23
+ "backend": "none",
24
+ "_note": "Embedder backends (ollama, fastembed, remote) are deferred to a follow-up. The current 'none' value indexes chunks without vectors; retrieval is also deferred."
25
+ },
26
+ "retrieval": {
27
+ "_note": "UserPromptSubmit retrieval and surfacer are deferred to a follow-up commit; the hook currently no-ops."
28
+ }
29
+ }
30
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "hooks": {
3
+ "SessionEnd": [
4
+ {
5
+ "matcher": "*",
6
+ "hooks": [
7
+ {
8
+ "type": "command",
9
+ "command": "\"$CLAUDE_PLUGIN_ROOT\"/scripts/hooks/historian-session-end.sh"
10
+ }
11
+ ]
12
+ }
13
+ ],
14
+ "UserPromptSubmit": [
15
+ {
16
+ "matcher": "*",
17
+ "hooks": [
18
+ {
19
+ "type": "command",
20
+ "command": "\"$CLAUDE_PLUGIN_ROOT\"/scripts/hooks/historian-prompt-submit.sh"
21
+ }
22
+ ]
23
+ }
24
+ ]
25
+ }
26
+ }
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env bash
2
+ # Historian UserPromptSubmit hook — STUB.
3
+ #
4
+ # The full retrieval pipeline (rate gate → query embedder → ANN lookup →
5
+ # additionalContext surfacer) is deferred to a follow-up landing that ships
6
+ # the first embedder backend. Today the hook is intentionally a no-op so
7
+ # the plugin can be installed and indexing can run without retrieval.
8
+ #
9
+ # Hook contract:
10
+ # - Always exits 0.
11
+ # - Never produces additionalContext while the retrieval pipeline is
12
+ # unimplemented.
13
+
14
+ set -uo pipefail
15
+ exit 0
@@ -0,0 +1,204 @@
1
+ #!/usr/bin/env bash
2
+ # Historian SessionEnd indexing pipeline.
3
+ #
4
+ # Reads the session transcript, drops tool calls / tool results, chunks
5
+ # the remaining user + assistant turns at turn boundaries, redacts
6
+ # secret-shaped substrings, and appends one JSONL line per surviving
7
+ # chunk to ~/.onlooker/historian/<project-key>/sessions/<session-id>.jsonl.
8
+ #
9
+ # Hook contract:
10
+ # - Always exits 0. Never blocks session shutdown.
11
+ # - No-ops when historian.enabled is not true.
12
+ # - No-ops when there is no project key, no transcript path, or the
13
+ # transcript is shorter than min_transcript_chars_to_index.
14
+ # - Indexing failures are fail-soft: an emitted historian.indexing.complete
15
+ # with outcome "skipped" + a skip_reason is the worst case.
16
+
17
+ set -uo pipefail
18
+
19
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20
+ PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
21
+
22
+ _ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
23
+ if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
24
+ _candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
25
+ if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
26
+ _ECOSYSTEM_ROOT="$_candidate"
27
+ fi
28
+ fi
29
+ if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
30
+ # shellcheck disable=SC1091
31
+ CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
32
+ fi
33
+
34
+ # shellcheck source=../lib/historian-config.sh
35
+ source "${PLUGIN_ROOT}/scripts/lib/historian-config.sh"
36
+ # shellcheck source=../lib/historian-project-key.sh
37
+ source "${PLUGIN_ROOT}/scripts/lib/historian-project-key.sh"
38
+ # shellcheck source=../lib/historian-ulid.sh
39
+ source "${PLUGIN_ROOT}/scripts/lib/historian-ulid.sh"
40
+ # shellcheck source=../lib/historian-storage.sh
41
+ source "${PLUGIN_ROOT}/scripts/lib/historian-storage.sh"
42
+ # shellcheck source=../lib/historian-emit.sh
43
+ source "${PLUGIN_ROOT}/scripts/lib/historian-emit.sh"
44
+ # shellcheck source=../lib/historian-transcript.sh
45
+ source "${PLUGIN_ROOT}/scripts/lib/historian-transcript.sh"
46
+ # shellcheck source=../lib/historian-chunker.sh
47
+ source "${PLUGIN_ROOT}/scripts/lib/historian-chunker.sh"
48
+ # shellcheck source=../lib/historian-sanitizer.sh
49
+ source "${PLUGIN_ROOT}/scripts/lib/historian-sanitizer.sh"
50
+
51
+ INPUT=$(cat 2>/dev/null || true)
52
+ CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
53
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
54
+ TRANSCRIPT_PATH=$(printf '%s' "$INPUT" | jq -r '.transcript_path // ""' 2>/dev/null) || TRANSCRIPT_PATH=""
55
+ [[ -z "$CWD" ]] && CWD="$(pwd)"
56
+ [[ -z "$SESSION_ID" ]] && SESSION_ID="unknown"
57
+
58
+ REPO_ROOT=$(historian_project_repo_root "$CWD")
59
+ historian_config_load "$REPO_ROOT"
60
+ historian_config_enabled || exit 0
61
+
62
+ PROJECT_KEY=$(historian_project_key "$CWD")
63
+ [[ -z "$PROJECT_KEY" ]] && exit 0
64
+
65
+ historian_storage_init "$PROJECT_KEY" || exit 0
66
+ REMOTE_URL=$(historian_project_remote_url "$CWD")
67
+ historian_storage_write_manifest "$PROJECT_KEY" "$REMOTE_URL" "$REPO_ROOT" || true
68
+
69
+ # ----------------------------------------------------------------------------
70
+ # Transcript-availability check first — emit no started/complete for the
71
+ # transcript_unavailable path, just a complete-with-skip so the timeline
72
+ # reads cleanly. Once we have a real char count, emit started with that
73
+ # count (the schema requires transcript_chars on started, so emitting
74
+ # zero before the read produced misleading telemetry).
75
+ # ----------------------------------------------------------------------------
76
+
77
+ SCAN_START_MS=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
78
+ || SCAN_START_MS=$(($(date +%s) * 1000))
79
+
80
+ _emit_skip() {
81
+ local reason="$1"
82
+ local now_ms duration_ms
83
+ now_ms=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
84
+ || now_ms=$(($(date +%s) * 1000))
85
+ duration_ms=$((now_ms - SCAN_START_MS))
86
+ historian_emit "historian.indexing.complete" "$SESSION_ID" "$(jq -cn \
87
+ --arg outcome "skipped" \
88
+ --arg skip_reason "$reason" \
89
+ --argjson duration_ms "$duration_ms" \
90
+ '{ outcome: $outcome, skip_reason: $skip_reason, duration_ms: $duration_ms }')"
91
+ }
92
+
93
+ if [[ -z "$TRANSCRIPT_PATH" || ! -f "$TRANSCRIPT_PATH" ]]; then
94
+ _emit_skip "transcript_unavailable"
95
+ exit 0
96
+ fi
97
+
98
+ MIN_CHARS=$(historian_config_get '.historian.indexing.min_transcript_chars_to_index')
99
+ [[ -z "$MIN_CHARS" || "$MIN_CHARS" == "null" ]] && MIN_CHARS=1200
100
+
101
+ TURNS=$(historian_transcript_load "$TRANSCRIPT_PATH")
102
+ TRANSCRIPT_CHARS=$(historian_transcript_char_count "$TURNS")
103
+ [[ -z "$TRANSCRIPT_CHARS" || "$TRANSCRIPT_CHARS" == "null" ]] && TRANSCRIPT_CHARS=0
104
+
105
+ historian_emit "historian.indexing.started" "$SESSION_ID" "$(jq -cn \
106
+ --arg session_id "$SESSION_ID" \
107
+ --argjson transcript_chars "$TRANSCRIPT_CHARS" \
108
+ '{ session_id: $session_id, transcript_chars: $transcript_chars }')"
109
+
110
+ if (( TRANSCRIPT_CHARS < MIN_CHARS )); then
111
+ _emit_skip "too_short"
112
+ exit 0
113
+ fi
114
+
115
+ # ----------------------------------------------------------------------------
116
+ # Chunker → sanitizer → JSONL store.
117
+ # ----------------------------------------------------------------------------
118
+
119
+ TARGET_CHARS=$(historian_config_get '.historian.indexing.chunk_target_chars')
120
+ [[ -z "$TARGET_CHARS" || "$TARGET_CHARS" == "null" ]] && TARGET_CHARS=2400
121
+ OVERLAP_CHARS=$(historian_config_get '.historian.indexing.chunk_overlap_chars')
122
+ [[ -z "$OVERLAP_CHARS" || "$OVERLAP_CHARS" == "null" ]] && OVERLAP_CHARS=400
123
+
124
+ CHUNKS=$(historian_chunker_split "$TURNS" "$TARGET_CHARS" "$OVERLAP_CHARS")
125
+ NEVER_INDEX_PATHS=$(historian_config_get '.historian.sanitization.never_index_paths | tojson')
126
+ [[ -z "$NEVER_INDEX_PATHS" || "$NEVER_INDEX_PATHS" == "null" ]] && NEVER_INDEX_PATHS='[]'
127
+
128
+ # Honor the two on/off knobs from the config block.
129
+ REDACT_SECRETS=$(historian_config_get '.historian.sanitization.redact_secret_patterns')
130
+ [[ -z "$REDACT_SECRETS" || "$REDACT_SECRETS" == "null" ]] && REDACT_SECRETS="true"
131
+ DROP_SKIP=$(historian_config_get '.historian.sanitization.drop_skip_marker')
132
+ [[ -z "$DROP_SKIP" || "$DROP_SKIP" == "null" ]] && DROP_SKIP="true"
133
+
134
+ SANITIZED=$(historian_sanitizer_run "$CHUNKS" "$NEVER_INDEX_PATHS" "$REDACT_SECRETS" "$DROP_SKIP")
135
+ KEPT=$(printf '%s' "$SANITIZED" | jq '.kept')
136
+ DROPPED=$(printf '%s' "$SANITIZED" | jq '.dropped')
137
+
138
+ # Re-indexing replaces the existing session file rather than appending,
139
+ # so SessionEnd is safely idempotent if re-fired against the same id.
140
+ historian_storage_reset_session "$PROJECT_KEY" "$SESSION_ID"
141
+
142
+ NOW_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
143
+ CHUNKS_INDEXED=0
144
+ KEPT_COUNT=$(printf '%s' "$KEPT" | jq 'length' 2>/dev/null) || KEPT_COUNT=0
145
+
146
+ for ((i = 0; i < KEPT_COUNT; i++)); do
147
+ CHUNK=$(printf '%s' "$KEPT" | jq -c ".[$i]")
148
+ [[ -z "$CHUNK" || "$CHUNK" == "null" ]] && continue
149
+
150
+ CHUNK_ID=$(historian_ulid)
151
+ REDACTION_COUNT=$(printf '%s' "$CHUNK" | jq -r '.redaction_count // 0')
152
+
153
+ RECORD=$(jq -cn \
154
+ --arg chunk_id "$CHUNK_ID" \
155
+ --arg session_id "$SESSION_ID" \
156
+ --argjson chunk_input "$CHUNK" \
157
+ --arg created_at "$NOW_TS" \
158
+ --arg source "local" \
159
+ '$chunk_input + {
160
+ chunk_id: $chunk_id,
161
+ session_id: $session_id,
162
+ created_at: $created_at,
163
+ source: $source
164
+ }')
165
+
166
+ if historian_storage_append_chunk "$PROJECT_KEY" "$SESSION_ID" "$RECORD"; then
167
+ CHUNKS_INDEXED=$((CHUNKS_INDEXED + 1))
168
+ if (( REDACTION_COUNT > 0 )); then
169
+ historian_emit "historian.chunk.sanitized" "$SESSION_ID" "$(jq -cn \
170
+ --arg chunk_id "$CHUNK_ID" \
171
+ --argjson redaction_count "$REDACTION_COUNT" \
172
+ '{ chunk_id: $chunk_id, redaction_count: $redaction_count }')"
173
+ fi
174
+ fi
175
+ done
176
+
177
+ # Emit one chunk.dropped event per skip reason summary (caps at the
178
+ # number of unique reasons; per-chunk emission would spam the log).
179
+ DROPPED_COUNT=$(printf '%s' "$DROPPED" | jq 'length' 2>/dev/null) || DROPPED_COUNT=0
180
+ if (( DROPPED_COUNT > 0 )); then
181
+ for reason in $(printf '%s' "$DROPPED" | jq -r '.[].reason' | sort -u); do
182
+ historian_emit "historian.chunk.dropped" "$SESSION_ID" "$(jq -cn \
183
+ --arg reason "$reason" \
184
+ '{ reason: $reason }')"
185
+ done
186
+ fi
187
+
188
+ NOW_MS=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
189
+ || NOW_MS=$(($(date +%s) * 1000))
190
+ DURATION_MS=$((NOW_MS - SCAN_START_MS))
191
+
192
+ historian_emit "historian.indexing.complete" "$SESSION_ID" "$(jq -cn \
193
+ --arg outcome "ok" \
194
+ --argjson chunks_indexed "$CHUNKS_INDEXED" \
195
+ --argjson chunks_dropped "$DROPPED_COUNT" \
196
+ --argjson duration_ms "$DURATION_MS" \
197
+ '{
198
+ outcome: $outcome,
199
+ chunks_indexed: $chunks_indexed,
200
+ chunks_dropped: $chunks_dropped,
201
+ duration_ms: $duration_ms
202
+ }')"
203
+
204
+ exit 0
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env bash
2
+ # Chunker for Historian.
3
+ #
4
+ # Given a JSON array of normalized turns (from historian-transcript.sh),
5
+ # produces a JSON array of chunk records. Each chunk:
6
+ # - Respects turn boundaries (no mid-turn splits)
7
+ # - Targets `target_chars` characters with `overlap_chars` overlap
8
+ # (carrying the last N chars of one chunk's content as the start of
9
+ # the next)
10
+ # - Records start_turn_index, end_turn_index, body_chars
11
+ #
12
+ # Character-based chunking instead of token-based: tokenizers vary by
13
+ # embedder, and the chunker shouldn't have to know which embedder will
14
+ # run downstream. Char counts approximate token counts at ~4 chars / token
15
+ # for English-ish prose; configs are tunable.
16
+
17
+ # Usage: historian_chunker_split <turns_json> <target_chars> <overlap_chars>
18
+ # Output: JSON array of chunks.
19
+ historian_chunker_split() {
20
+ local turns="${1:-[]}"
21
+ local target_chars="${2:-2400}"
22
+ local overlap_chars="${3:-400}"
23
+
24
+ python3 - "$target_chars" "$overlap_chars" "$turns" <<'PY'
25
+ import json, sys
26
+
27
+ target = int(sys.argv[1])
28
+ overlap = max(0, int(sys.argv[2]))
29
+ turns = json.loads(sys.argv[3] or "[]")
30
+
31
+ chunks = []
32
+ chunk_index = 0
33
+ buf_parts = []
34
+ buf_chars = 0
35
+ buf_start = None
36
+ buf_end = None
37
+
38
+ # Pending overlap text carried from the previous chunk. It seeds the next
39
+ # chunk's body but doesn't get attributed a turn (the overlap is purely
40
+ # textual continuity for the embedder).
41
+ pending_overlap = ""
42
+
43
+
44
+ def flush(force_text=None):
45
+ """Emit the current buffer as a chunk. force_text overrides the
46
+ accumulated body and is used when a single turn exceeds the target."""
47
+ global chunk_index, buf_parts, buf_chars, buf_start, buf_end
48
+ if force_text is None:
49
+ if not buf_parts:
50
+ return
51
+ body = "\n\n".join(buf_parts)
52
+ else:
53
+ body = force_text
54
+ if not body.strip():
55
+ # Reset and skip empty bodies (can happen with overlap-only carry).
56
+ buf_parts = []
57
+ buf_chars = 0
58
+ buf_start = None
59
+ buf_end = None
60
+ return
61
+ chunks.append({
62
+ "chunk_index": chunk_index,
63
+ "start_turn_index": buf_start,
64
+ "end_turn_index": buf_end,
65
+ "body": body,
66
+ "body_chars": len(body),
67
+ })
68
+ chunk_index += 1
69
+ buf_parts = []
70
+ buf_chars = 0
71
+ buf_start = None
72
+ buf_end = None
73
+
74
+
75
+ for turn in turns:
76
+ role = turn.get("role", "")
77
+ content = turn.get("content", "")
78
+ if not content:
79
+ continue
80
+ rendered = f"{role}: {content}"
81
+ rendered_len = len(rendered)
82
+
83
+ # If this single turn exceeds the target, flush whatever's pending and
84
+ # emit the oversized turn as its own chunk. The next chunk's overlap
85
+ # carries the last `overlap` chars of this turn's body.
86
+ if rendered_len > target:
87
+ # Flush pending buffer first.
88
+ if buf_parts:
89
+ flush()
90
+ # Seed an oversized chunk on its own.
91
+ body_for_chunk = (pending_overlap + ("\n\n" if pending_overlap else "")) + rendered
92
+ # Set start/end markers for the standalone chunk.
93
+ buf_start = turn["turn_index"]
94
+ buf_end = turn["turn_index"]
95
+ flush(force_text=body_for_chunk)
96
+ pending_overlap = body_for_chunk[-overlap:] if overlap > 0 else ""
97
+ continue
98
+
99
+ candidate_len = buf_chars + rendered_len + (2 if buf_parts else 0) # 2 for "\n\n"
100
+ if buf_parts and candidate_len > target:
101
+ # Flush the buffer; start a new chunk seeded with overlap from the
102
+ # body we just emitted.
103
+ last_body = ""
104
+ if chunks:
105
+ last_body = chunks[-1]["body"]
106
+ flush()
107
+ if overlap > 0 and last_body:
108
+ pending_overlap = last_body[-overlap:]
109
+ else:
110
+ pending_overlap = ""
111
+
112
+ if not buf_parts and pending_overlap:
113
+ buf_parts.append(pending_overlap)
114
+ buf_chars += len(pending_overlap)
115
+ pending_overlap = ""
116
+
117
+ buf_parts.append(rendered)
118
+ buf_chars += rendered_len + (2 if len(buf_parts) > 1 else 0)
119
+ if buf_start is None:
120
+ buf_start = turn["turn_index"]
121
+ buf_end = turn["turn_index"]
122
+
123
+ # Final flush.
124
+ if buf_parts:
125
+ flush()
126
+
127
+ print(json.dumps(chunks))
128
+ PY
129
+ }
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env bash
2
+ # Config resolution for Historian.
3
+ #
4
+ # Reads three layers, latest wins:
5
+ # 1. plugins/historian/config.json (defaults shipped with the plugin)
6
+ # 2. ~/.claude/settings.json
7
+ # 3. <repo>/.claude/settings.json
8
+ #
9
+ # Exposes:
10
+ # historian_config_load <repo_root> # populates _HISTORIAN_CONFIG (JSON)
11
+ # historian_config_get <jq-path> # echoes string value (empty if unset)
12
+ # historian_config_enabled # 0 if historian.enabled is true
13
+ #
14
+ # Settings overlay only touches the `historian.*` subtree of settings.json.
15
+
16
+ _HISTORIAN_CONFIG="{}"
17
+
18
+ historian_config_load() {
19
+ local repo_root="${1:-}"
20
+ local plugin_root="${CLAUDE_PLUGIN_ROOT:-}"
21
+ local home_dir="${HOME:-}"
22
+
23
+ local merged="{}"
24
+ local file
25
+
26
+ file="${plugin_root}/config.json"
27
+ if [[ -f "$file" ]]; then
28
+ local defaults
29
+ defaults=$(jq '.' "$file" 2>/dev/null) || defaults="{}"
30
+ merged=$(jq -n --argjson a "$merged" --argjson b "$defaults" '$a * $b' 2>/dev/null) \
31
+ || merged="$defaults"
32
+ fi
33
+
34
+ for file in "${home_dir}/.claude/settings.json" "${repo_root}/.claude/settings.json"; do
35
+ [[ -n "$file" && -f "$file" ]] || continue
36
+ local overlay
37
+ overlay=$(jq '{ historian: (.historian // {}) }' "$file" 2>/dev/null) || continue
38
+ [[ -z "$overlay" ]] && continue
39
+ merged=$(jq -n --argjson a "$merged" --argjson b "$overlay" '
40
+ def deepmerge($a; $b):
41
+ if ($a|type) == "object" and ($b|type) == "object" then
42
+ reduce (($a|keys) + ($b|keys) | unique)[] as $k
43
+ ({}; .[$k] = deepmerge($a[$k]; $b[$k]))
44
+ elif $b == null then $a
45
+ else $b end;
46
+ deepmerge($a; $b)
47
+ ' 2>/dev/null) || true
48
+ done
49
+
50
+ _HISTORIAN_CONFIG="$merged"
51
+ }
52
+
53
+ # Read a value from the loaded config. The explicit null check (instead of
54
+ # `// empty`) preserves boolean `false` — `// empty` would treat it the same
55
+ # as null and silently drop "explicitly disabled" settings.
56
+ historian_config_get() {
57
+ local path="$1"
58
+ printf '%s' "$_HISTORIAN_CONFIG" \
59
+ | jq -r "${path} | if . == null then empty else . end" 2>/dev/null
60
+ }
61
+
62
+ historian_config_enabled() {
63
+ local v
64
+ v=$(historian_config_get '.historian.enabled')
65
+ [[ "$v" == "true" ]]
66
+ }