@onlooker-community/ecosystem 0.21.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/.claude-plugin/marketplace.json +13 -0
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.release-please-manifest.json +3 -2
  4. package/CHANGELOG.md +15 -0
  5. package/hooks/hooks.json +4 -0
  6. package/package.json +2 -2
  7. package/plugins/historian/.claude-plugin/plugin.json +14 -0
  8. package/plugins/historian/CHANGELOG.md +17 -0
  9. package/plugins/historian/README.md +84 -0
  10. package/plugins/historian/config.json +46 -0
  11. package/plugins/historian/hooks/hooks.json +26 -0
  12. package/plugins/historian/scripts/hooks/historian-prompt-submit.sh +269 -0
  13. package/plugins/historian/scripts/hooks/historian-session-end.sh +235 -0
  14. package/plugins/historian/scripts/lib/historian-chunker.sh +129 -0
  15. package/plugins/historian/scripts/lib/historian-config.sh +66 -0
  16. package/plugins/historian/scripts/lib/historian-embedder.sh +126 -0
  17. package/plugins/historian/scripts/lib/historian-emit.sh +61 -0
  18. package/plugins/historian/scripts/lib/historian-project-key.sh +80 -0
  19. package/plugins/historian/scripts/lib/historian-retriever.sh +191 -0
  20. package/plugins/historian/scripts/lib/historian-sanitizer.sh +123 -0
  21. package/plugins/historian/scripts/lib/historian-storage.sh +157 -0
  22. package/plugins/historian/scripts/lib/historian-transcript.sh +83 -0
  23. package/plugins/historian/scripts/lib/historian-ulid.sh +43 -0
  24. package/release-please-config.json +16 -0
  25. package/scripts/hooks/memory-recall-tracker.sh +206 -0
  26. package/test/bats/historian-prompt-submit.bats +236 -0
  27. package/test/bats/historian-session-end.bats +296 -0
  28. package/test/bats/memory-recall-tracker.bats +189 -0
@@ -0,0 +1,235 @@
1
+ #!/usr/bin/env bash
2
+ # Historian SessionEnd indexing pipeline.
3
+ #
4
+ # Reads the session transcript, drops tool calls / tool results, chunks
5
+ # the remaining user + assistant turns at turn boundaries, redacts
6
+ # secret-shaped substrings, and appends one JSONL line per surviving
7
+ # chunk to ~/.onlooker/historian/<project-key>/sessions/<session-id>.jsonl.
8
+ #
9
+ # Hook contract:
10
+ # - Always exits 0. Never blocks session shutdown.
11
+ # - No-ops when historian.enabled is not true.
12
+ # - No-ops when there is no project key, no transcript path, or the
13
+ # transcript is shorter than min_transcript_chars_to_index.
14
+ # - Indexing failures are fail-soft: an emitted historian.indexing.complete
15
+ # with outcome "skipped" + a skip_reason is the worst case.
16
+
17
+ set -uo pipefail
18
+
19
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20
+ PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
21
+
22
+ _ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
23
+ if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
24
+ _candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
25
+ if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
26
+ _ECOSYSTEM_ROOT="$_candidate"
27
+ fi
28
+ fi
29
+ if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
30
+ # shellcheck disable=SC1091
31
+ CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
32
+ fi
33
+
34
+ # shellcheck source=../lib/historian-config.sh
35
+ source "${PLUGIN_ROOT}/scripts/lib/historian-config.sh"
36
+ # shellcheck source=../lib/historian-project-key.sh
37
+ source "${PLUGIN_ROOT}/scripts/lib/historian-project-key.sh"
38
+ # shellcheck source=../lib/historian-ulid.sh
39
+ source "${PLUGIN_ROOT}/scripts/lib/historian-ulid.sh"
40
+ # shellcheck source=../lib/historian-storage.sh
41
+ source "${PLUGIN_ROOT}/scripts/lib/historian-storage.sh"
42
+ # shellcheck source=../lib/historian-emit.sh
43
+ source "${PLUGIN_ROOT}/scripts/lib/historian-emit.sh"
44
+ # shellcheck source=../lib/historian-transcript.sh
45
+ source "${PLUGIN_ROOT}/scripts/lib/historian-transcript.sh"
46
+ # shellcheck source=../lib/historian-chunker.sh
47
+ source "${PLUGIN_ROOT}/scripts/lib/historian-chunker.sh"
48
+ # shellcheck source=../lib/historian-sanitizer.sh
49
+ source "${PLUGIN_ROOT}/scripts/lib/historian-sanitizer.sh"
50
+ # shellcheck source=../lib/historian-embedder.sh
51
+ source "${PLUGIN_ROOT}/scripts/lib/historian-embedder.sh"
52
+
53
+ INPUT=$(cat 2>/dev/null || true)
54
+ CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
55
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
56
+ TRANSCRIPT_PATH=$(printf '%s' "$INPUT" | jq -r '.transcript_path // ""' 2>/dev/null) || TRANSCRIPT_PATH=""
57
+ [[ -z "$CWD" ]] && CWD="$(pwd)"
58
+ [[ -z "$SESSION_ID" ]] && SESSION_ID="unknown"
59
+
60
+ REPO_ROOT=$(historian_project_repo_root "$CWD")
61
+ historian_config_load "$REPO_ROOT"
62
+ historian_config_enabled || exit 0
63
+
64
+ PROJECT_KEY=$(historian_project_key "$CWD")
65
+ [[ -z "$PROJECT_KEY" ]] && exit 0
66
+
67
+ historian_storage_init "$PROJECT_KEY" || exit 0
68
+ REMOTE_URL=$(historian_project_remote_url "$CWD")
69
+ historian_storage_write_manifest "$PROJECT_KEY" "$REMOTE_URL" "$REPO_ROOT" || true
70
+
71
+ # ----------------------------------------------------------------------------
72
+ # Transcript-availability check first — emit no started/complete for the
73
+ # transcript_unavailable path, just a complete-with-skip so the timeline
74
+ # reads cleanly. Once we have a real char count, emit started with that
75
+ # count (the schema requires transcript_chars on started, so emitting
76
+ # zero before the read produced misleading telemetry).
77
+ # ----------------------------------------------------------------------------
78
+
79
+ SCAN_START_MS=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
80
+ || SCAN_START_MS=$(($(date +%s) * 1000))
81
+
82
+ _emit_skip() {
83
+ local reason="$1"
84
+ local now_ms duration_ms
85
+ now_ms=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
86
+ || now_ms=$(($(date +%s) * 1000))
87
+ duration_ms=$((now_ms - SCAN_START_MS))
88
+ historian_emit "historian.indexing.complete" "$SESSION_ID" "$(jq -cn \
89
+ --arg outcome "skipped" \
90
+ --arg skip_reason "$reason" \
91
+ --argjson duration_ms "$duration_ms" \
92
+ '{ outcome: $outcome, skip_reason: $skip_reason, duration_ms: $duration_ms }')"
93
+ }
94
+
95
+ if [[ -z "$TRANSCRIPT_PATH" || ! -f "$TRANSCRIPT_PATH" ]]; then
96
+ _emit_skip "transcript_unavailable"
97
+ exit 0
98
+ fi
99
+
100
+ MIN_CHARS=$(historian_config_get '.historian.indexing.min_transcript_chars_to_index')
101
+ [[ -z "$MIN_CHARS" || "$MIN_CHARS" == "null" ]] && MIN_CHARS=1200
102
+
103
+ TURNS=$(historian_transcript_load "$TRANSCRIPT_PATH")
104
+ TRANSCRIPT_CHARS=$(historian_transcript_char_count "$TURNS")
105
+ [[ -z "$TRANSCRIPT_CHARS" || "$TRANSCRIPT_CHARS" == "null" ]] && TRANSCRIPT_CHARS=0
106
+
107
+ historian_emit "historian.indexing.started" "$SESSION_ID" "$(jq -cn \
108
+ --arg session_id "$SESSION_ID" \
109
+ --argjson transcript_chars "$TRANSCRIPT_CHARS" \
110
+ '{ session_id: $session_id, transcript_chars: $transcript_chars }')"
111
+
112
+ if (( TRANSCRIPT_CHARS < MIN_CHARS )); then
113
+ _emit_skip "too_short"
114
+ exit 0
115
+ fi
116
+
117
+ # ----------------------------------------------------------------------------
118
+ # Chunker → sanitizer → JSONL store.
119
+ # ----------------------------------------------------------------------------
120
+
121
+ TARGET_CHARS=$(historian_config_get '.historian.indexing.chunk_target_chars')
122
+ [[ -z "$TARGET_CHARS" || "$TARGET_CHARS" == "null" ]] && TARGET_CHARS=2400
123
+ OVERLAP_CHARS=$(historian_config_get '.historian.indexing.chunk_overlap_chars')
124
+ [[ -z "$OVERLAP_CHARS" || "$OVERLAP_CHARS" == "null" ]] && OVERLAP_CHARS=400
125
+
126
+ CHUNKS=$(historian_chunker_split "$TURNS" "$TARGET_CHARS" "$OVERLAP_CHARS")
127
+ NEVER_INDEX_PATHS=$(historian_config_get '.historian.sanitization.never_index_paths | tojson')
128
+ [[ -z "$NEVER_INDEX_PATHS" || "$NEVER_INDEX_PATHS" == "null" ]] && NEVER_INDEX_PATHS='[]'
129
+
130
+ # Honor the two on/off knobs from the config block.
131
+ REDACT_SECRETS=$(historian_config_get '.historian.sanitization.redact_secret_patterns')
132
+ [[ -z "$REDACT_SECRETS" || "$REDACT_SECRETS" == "null" ]] && REDACT_SECRETS="true"
133
+ DROP_SKIP=$(historian_config_get '.historian.sanitization.drop_skip_marker')
134
+ [[ -z "$DROP_SKIP" || "$DROP_SKIP" == "null" ]] && DROP_SKIP="true"
135
+
136
+ SANITIZED=$(historian_sanitizer_run "$CHUNKS" "$NEVER_INDEX_PATHS" "$REDACT_SECRETS" "$DROP_SKIP")
137
+ KEPT=$(printf '%s' "$SANITIZED" | jq '.kept')
138
+ DROPPED=$(printf '%s' "$SANITIZED" | jq '.dropped')
139
+
140
+ # Probe the embedder once before the chunk loop. If unavailable we
141
+ # index without vectors. The retriever shipped today is embedding-only,
142
+ # so chunks written without an `embedding` field are persisted but
143
+ # invisible to retrieval until they are re-indexed against a working
144
+ # embedder. Chunk bodies stay intact, so re-indexing is a re-embed pass
145
+ # rather than a full re-chunk.
146
+ EMBEDDER_READY=0
147
+ EMBEDDER_BACKEND=$(historian_config_get '.historian.embedder.backend')
148
+ [[ -z "$EMBEDDER_BACKEND" || "$EMBEDDER_BACKEND" == "null" ]] && EMBEDDER_BACKEND="none"
149
+ if [[ "$EMBEDDER_BACKEND" != "none" ]]; then
150
+ if historian_embedder_available; then
151
+ EMBEDDER_READY=1
152
+ else
153
+ historian_emit "historian.embedder.unavailable" "$SESSION_ID" "$(jq -cn \
154
+ --arg backend "$EMBEDDER_BACKEND" \
155
+ '{ backend: $backend }')"
156
+ fi
157
+ fi
158
+
159
+ # Re-indexing replaces the existing session file rather than appending,
160
+ # so SessionEnd is safely idempotent if re-fired against the same id.
161
+ historian_storage_reset_session "$PROJECT_KEY" "$SESSION_ID"
162
+
163
+ NOW_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
164
+ CHUNKS_INDEXED=0
165
+ KEPT_COUNT=$(printf '%s' "$KEPT" | jq 'length' 2>/dev/null) || KEPT_COUNT=0
166
+
167
+ for ((i = 0; i < KEPT_COUNT; i++)); do
168
+ CHUNK=$(printf '%s' "$KEPT" | jq -c ".[$i]")
169
+ [[ -z "$CHUNK" || "$CHUNK" == "null" ]] && continue
170
+
171
+ CHUNK_ID=$(historian_ulid)
172
+ REDACTION_COUNT=$(printf '%s' "$CHUNK" | jq -r '.redaction_count // 0')
173
+ BODY=$(printf '%s' "$CHUNK" | jq -r '.body_redacted // ""')
174
+
175
+ # Build the base record. The embedding (if any) is added below.
176
+ RECORD=$(jq -cn \
177
+ --arg chunk_id "$CHUNK_ID" \
178
+ --arg session_id "$SESSION_ID" \
179
+ --argjson chunk_input "$CHUNK" \
180
+ --arg created_at "$NOW_TS" \
181
+ --arg source "local" \
182
+ '$chunk_input + {
183
+ chunk_id: $chunk_id,
184
+ session_id: $session_id,
185
+ created_at: $created_at,
186
+ source: $source
187
+ }')
188
+
189
+ if (( EMBEDDER_READY == 1 )) && [[ -n "$BODY" ]]; then
190
+ EMBEDDING=$(historian_embedder_embed "$BODY")
191
+ if [[ -n "$EMBEDDING" ]]; then
192
+ RECORD=$(printf '%s' "$RECORD" | jq -c --argjson v "$EMBEDDING" \
193
+ '. + { embedding: $v }')
194
+ fi
195
+ fi
196
+
197
+ if historian_storage_append_chunk "$PROJECT_KEY" "$SESSION_ID" "$RECORD"; then
198
+ CHUNKS_INDEXED=$((CHUNKS_INDEXED + 1))
199
+ if (( REDACTION_COUNT > 0 )); then
200
+ historian_emit "historian.chunk.sanitized" "$SESSION_ID" "$(jq -cn \
201
+ --arg chunk_id "$CHUNK_ID" \
202
+ --argjson redaction_count "$REDACTION_COUNT" \
203
+ '{ chunk_id: $chunk_id, redaction_count: $redaction_count }')"
204
+ fi
205
+ fi
206
+ done
207
+
208
+ # Emit one chunk.dropped event per skip reason summary (caps at the
209
+ # number of unique reasons; per-chunk emission would spam the log).
210
+ DROPPED_COUNT=$(printf '%s' "$DROPPED" | jq 'length' 2>/dev/null) || DROPPED_COUNT=0
211
+ if (( DROPPED_COUNT > 0 )); then
212
+ for reason in $(printf '%s' "$DROPPED" | jq -r '.[].reason' | sort -u); do
213
+ historian_emit "historian.chunk.dropped" "$SESSION_ID" "$(jq -cn \
214
+ --arg reason "$reason" \
215
+ '{ reason: $reason }')"
216
+ done
217
+ fi
218
+
219
+ NOW_MS=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
220
+ || NOW_MS=$(($(date +%s) * 1000))
221
+ DURATION_MS=$((NOW_MS - SCAN_START_MS))
222
+
223
+ historian_emit "historian.indexing.complete" "$SESSION_ID" "$(jq -cn \
224
+ --arg outcome "ok" \
225
+ --argjson chunks_indexed "$CHUNKS_INDEXED" \
226
+ --argjson chunks_dropped "$DROPPED_COUNT" \
227
+ --argjson duration_ms "$DURATION_MS" \
228
+ '{
229
+ outcome: $outcome,
230
+ chunks_indexed: $chunks_indexed,
231
+ chunks_dropped: $chunks_dropped,
232
+ duration_ms: $duration_ms
233
+ }')"
234
+
235
+ exit 0
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env bash
2
+ # Chunker for Historian.
3
+ #
4
+ # Given a JSON array of normalized turns (from historian-transcript.sh),
5
+ # produces a JSON array of chunk records. Each chunk:
6
+ # - Respects turn boundaries (no mid-turn splits)
7
+ # - Targets `target_chars` characters with `overlap_chars` overlap
8
+ # (carrying the last N chars of one chunk's content as the start of
9
+ # the next)
10
+ # - Records start_turn_index, end_turn_index, body_chars
11
+ #
12
+ # Character-based chunking instead of token-based: tokenizers vary by
13
+ # embedder, and the chunker shouldn't have to know which embedder will
14
+ # run downstream. Char counts approximate token counts at ~4 chars / token
15
+ # for English-ish prose; configs are tunable.
16
+
17
+ # Usage: historian_chunker_split <turns_json> <target_chars> <overlap_chars>
18
+ # Output: JSON array of chunks.
19
+ historian_chunker_split() {
20
+ local turns="${1:-[]}"
21
+ local target_chars="${2:-2400}"
22
+ local overlap_chars="${3:-400}"
23
+
24
+ python3 - "$target_chars" "$overlap_chars" "$turns" <<'PY'
25
+ import json, sys
26
+
27
+ target = int(sys.argv[1])
28
+ overlap = max(0, int(sys.argv[2]))
29
+ turns = json.loads(sys.argv[3] or "[]")
30
+
31
+ chunks = []
32
+ chunk_index = 0
33
+ buf_parts = []
34
+ buf_chars = 0
35
+ buf_start = None
36
+ buf_end = None
37
+
38
+ # Pending overlap text carried from the previous chunk. It seeds the next
39
+ # chunk's body but doesn't get attributed a turn (the overlap is purely
40
+ # textual continuity for the embedder).
41
+ pending_overlap = ""
42
+
43
+
44
+ def flush(force_text=None):
45
+ """Emit the current buffer as a chunk. force_text overrides the
46
+ accumulated body and is used when a single turn exceeds the target."""
47
+ global chunk_index, buf_parts, buf_chars, buf_start, buf_end
48
+ if force_text is None:
49
+ if not buf_parts:
50
+ return
51
+ body = "\n\n".join(buf_parts)
52
+ else:
53
+ body = force_text
54
+ if not body.strip():
55
+ # Reset and skip empty bodies (can happen with overlap-only carry).
56
+ buf_parts = []
57
+ buf_chars = 0
58
+ buf_start = None
59
+ buf_end = None
60
+ return
61
+ chunks.append({
62
+ "chunk_index": chunk_index,
63
+ "start_turn_index": buf_start,
64
+ "end_turn_index": buf_end,
65
+ "body": body,
66
+ "body_chars": len(body),
67
+ })
68
+ chunk_index += 1
69
+ buf_parts = []
70
+ buf_chars = 0
71
+ buf_start = None
72
+ buf_end = None
73
+
74
+
75
+ for turn in turns:
76
+ role = turn.get("role", "")
77
+ content = turn.get("content", "")
78
+ if not content:
79
+ continue
80
+ rendered = f"{role}: {content}"
81
+ rendered_len = len(rendered)
82
+
83
+ # If this single turn exceeds the target, flush whatever's pending and
84
+ # emit the oversized turn as its own chunk. The next chunk's overlap
85
+ # carries the last `overlap` chars of this turn's body.
86
+ if rendered_len > target:
87
+ # Flush pending buffer first.
88
+ if buf_parts:
89
+ flush()
90
+ # Seed an oversized chunk on its own.
91
+ body_for_chunk = (pending_overlap + ("\n\n" if pending_overlap else "")) + rendered
92
+ # Set start/end markers for the standalone chunk.
93
+ buf_start = turn["turn_index"]
94
+ buf_end = turn["turn_index"]
95
+ flush(force_text=body_for_chunk)
96
+ pending_overlap = body_for_chunk[-overlap:] if overlap > 0 else ""
97
+ continue
98
+
99
+ candidate_len = buf_chars + rendered_len + (2 if buf_parts else 0) # 2 for "\n\n"
100
+ if buf_parts and candidate_len > target:
101
+ # Flush the buffer; start a new chunk seeded with overlap from the
102
+ # body we just emitted.
103
+ last_body = ""
104
+ if chunks:
105
+ last_body = chunks[-1]["body"]
106
+ flush()
107
+ if overlap > 0 and last_body:
108
+ pending_overlap = last_body[-overlap:]
109
+ else:
110
+ pending_overlap = ""
111
+
112
+ if not buf_parts and pending_overlap:
113
+ buf_parts.append(pending_overlap)
114
+ buf_chars += len(pending_overlap)
115
+ pending_overlap = ""
116
+
117
+ buf_parts.append(rendered)
118
+ buf_chars += rendered_len + (2 if len(buf_parts) > 1 else 0)
119
+ if buf_start is None:
120
+ buf_start = turn["turn_index"]
121
+ buf_end = turn["turn_index"]
122
+
123
+ # Final flush.
124
+ if buf_parts:
125
+ flush()
126
+
127
+ print(json.dumps(chunks))
128
+ PY
129
+ }
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env bash
2
+ # Config resolution for Historian.
3
+ #
4
+ # Reads three layers, latest wins:
5
+ # 1. plugins/historian/config.json (defaults shipped with the plugin)
6
+ # 2. ~/.claude/settings.json
7
+ # 3. <repo>/.claude/settings.json
8
+ #
9
+ # Exposes:
10
+ # historian_config_load <repo_root> # populates _HISTORIAN_CONFIG (JSON)
11
+ # historian_config_get <jq-path> # echoes string value (empty if unset)
12
+ # historian_config_enabled # 0 if historian.enabled is true
13
+ #
14
+ # Settings overlay only touches the `historian.*` subtree of settings.json.
15
+
16
+ _HISTORIAN_CONFIG="{}"
17
+
18
+ historian_config_load() {
19
+ local repo_root="${1:-}"
20
+ local plugin_root="${CLAUDE_PLUGIN_ROOT:-}"
21
+ local home_dir="${HOME:-}"
22
+
23
+ local merged="{}"
24
+ local file
25
+
26
+ file="${plugin_root}/config.json"
27
+ if [[ -f "$file" ]]; then
28
+ local defaults
29
+ defaults=$(jq '.' "$file" 2>/dev/null) || defaults="{}"
30
+ merged=$(jq -n --argjson a "$merged" --argjson b "$defaults" '$a * $b' 2>/dev/null) \
31
+ || merged="$defaults"
32
+ fi
33
+
34
+ for file in "${home_dir}/.claude/settings.json" "${repo_root}/.claude/settings.json"; do
35
+ [[ -n "$file" && -f "$file" ]] || continue
36
+ local overlay
37
+ overlay=$(jq '{ historian: (.historian // {}) }' "$file" 2>/dev/null) || continue
38
+ [[ -z "$overlay" ]] && continue
39
+ merged=$(jq -n --argjson a "$merged" --argjson b "$overlay" '
40
+ def deepmerge($a; $b):
41
+ if ($a|type) == "object" and ($b|type) == "object" then
42
+ reduce (($a|keys) + ($b|keys) | unique)[] as $k
43
+ ({}; .[$k] = deepmerge($a[$k]; $b[$k]))
44
+ elif $b == null then $a
45
+ else $b end;
46
+ deepmerge($a; $b)
47
+ ' 2>/dev/null) || true
48
+ done
49
+
50
+ _HISTORIAN_CONFIG="$merged"
51
+ }
52
+
53
+ # Read a value from the loaded config. The explicit null check (instead of
54
+ # `// empty`) preserves boolean `false` — `// empty` would treat it the same
55
+ # as null and silently drop "explicitly disabled" settings.
56
+ historian_config_get() {
57
+ local path="$1"
58
+ printf '%s' "$_HISTORIAN_CONFIG" \
59
+ | jq -r "${path} | if . == null then empty else . end" 2>/dev/null
60
+ }
61
+
62
+ historian_config_enabled() {
63
+ local v
64
+ v=$(historian_config_get '.historian.enabled')
65
+ [[ "$v" == "true" ]]
66
+ }
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env bash
2
+ # Embedder client for Historian.
3
+ #
4
+ # Per ADR-001, the default backend is local ollama with the
5
+ # `nomic-embed-text` model. The interface is intentionally a single
6
+ # function that takes a string and returns a JSON array of floats, so
7
+ # alternate backends (fastembed sidecar, remote API) can drop in later
8
+ # without changing callers.
9
+ #
10
+ # Fail-soft: returns empty string on any failure (ollama not reachable,
11
+ # JSON decode error, missing curl). Callers treat empty as "skip the
12
+ # embedding and emit historian.embedder.unavailable".
13
+
14
+ # Resolve config (the caller has typically run historian_config_load
15
+ # before invoking us). We re-read the config knobs here so this lib can
16
+ # be sourced and used outside the SessionEnd hook context.
17
+
18
+ _historian_embedder_backend() {
19
+ local v
20
+ v=$(historian_config_get '.historian.embedder.backend' 2>/dev/null)
21
+ [[ -z "$v" ]] && v="none"
22
+ printf '%s' "$v"
23
+ }
24
+
25
+ _historian_embedder_ollama_host() {
26
+ local v
27
+ v=$(historian_config_get '.historian.embedder.ollama.host' 2>/dev/null)
28
+ [[ -z "$v" ]] && v="http://127.0.0.1:11434"
29
+ printf '%s' "$v"
30
+ }
31
+
32
+ _historian_embedder_ollama_model() {
33
+ local v
34
+ v=$(historian_config_get '.historian.embedder.ollama.model' 2>/dev/null)
35
+ [[ -z "$v" ]] && v="nomic-embed-text"
36
+ printf '%s' "$v"
37
+ }
38
+
39
+ _historian_embedder_ollama_timeout() {
40
+ local v
41
+ v=$(historian_config_get '.historian.embedder.ollama.request_timeout_seconds' 2>/dev/null)
42
+ [[ -z "$v" || "$v" == "null" ]] && v=8
43
+ printf '%s' "$v"
44
+ }
45
+
46
+ # Returns 0 if the currently-configured embedder is reachable and the
47
+ # backend is something other than "none". A side-effect-free probe.
48
+ historian_embedder_available() {
49
+ local backend
50
+ backend=$(_historian_embedder_backend)
51
+ case "$backend" in
52
+ none|"")
53
+ return 1
54
+ ;;
55
+ ollama)
56
+ command -v curl >/dev/null 2>&1 || return 1
57
+ local host timeout
58
+ host=$(_historian_embedder_ollama_host)
59
+ timeout=$(_historian_embedder_ollama_timeout)
60
+ # HEAD `/api/tags` is the cheapest way to confirm the daemon
61
+ # is up without rendering a payload.
62
+ curl -fsS --max-time "$timeout" -o /dev/null "${host}/api/tags" 2>/dev/null
63
+ ;;
64
+ *)
65
+ # fastembed / remote backends not implemented yet — treat as
66
+ # unavailable.
67
+ return 1
68
+ ;;
69
+ esac
70
+ }
71
+
72
+ # Embed a single string. Prints a JSON array of floats on success
73
+ # (e.g. `[0.123,0.456,...]`), or empty string on any error.
74
+ # Usage: historian_embedder_embed <text>
75
+ historian_embedder_embed() {
76
+ local text="${1:-}"
77
+ [[ -z "$text" ]] && return 0
78
+
79
+ local backend
80
+ backend=$(_historian_embedder_backend)
81
+ case "$backend" in
82
+ none|"")
83
+ return 0
84
+ ;;
85
+ ollama)
86
+ _historian_embedder_embed_ollama "$text"
87
+ ;;
88
+ *)
89
+ # Backend declared but not implemented — fail-soft.
90
+ return 0
91
+ ;;
92
+ esac
93
+ }
94
+
95
+ # Internal: call ollama's /api/embeddings endpoint.
96
+ _historian_embedder_embed_ollama() {
97
+ local text="$1"
98
+ command -v curl >/dev/null 2>&1 || return 0
99
+
100
+ local host model timeout payload response
101
+ host=$(_historian_embedder_ollama_host)
102
+ model=$(_historian_embedder_ollama_model)
103
+ timeout=$(_historian_embedder_ollama_timeout)
104
+
105
+ payload=$(jq -cn --arg model "$model" --arg prompt "$text" \
106
+ '{ model: $model, prompt: $prompt }') || return 0
107
+
108
+ response=$(curl -fsS --max-time "$timeout" \
109
+ -H 'Content-Type: application/json' \
110
+ -d "$payload" \
111
+ "${host}/api/embeddings" 2>/dev/null) || return 0
112
+ [[ -z "$response" ]] && return 0
113
+
114
+ # The ollama embeddings endpoint returns `{"embedding":[...]}`. Pull
115
+ # just the array and validate it parses + is non-empty.
116
+ local vector
117
+ vector=$(printf '%s' "$response" | jq -c '.embedding // empty' 2>/dev/null)
118
+ [[ -z "$vector" || "$vector" == "null" ]] && return 0
119
+
120
+ # Sanity: must be an array of numbers, length > 0.
121
+ printf '%s' "$vector" | jq -e '
122
+ type == "array" and length > 0 and all(.[]; type == "number")
123
+ ' >/dev/null 2>&1 || return 0
124
+
125
+ printf '%s' "$vector"
126
+ }
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env bash
2
+ # Event emission helpers for Historian.
3
+ #
4
+ # Thin wrapper around onlooker-event.mjs `emit` mode for historian.* events.
5
+ # Fail-soft: returns 0 on success or when the substrate is unavailable.
6
+
7
+ _historian_resolve_event_js() {
8
+ local script_dir plugin_root ecosystem_root candidate
9
+ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
+ plugin_root="$(cd "${script_dir}/../.." && pwd)"
11
+
12
+ ecosystem_root="${ONLOOKER_ECOSYSTEM_ROOT:-}"
13
+ if [[ -z "$ecosystem_root" ]]; then
14
+ candidate="$(cd "${plugin_root}/../.." 2>/dev/null && pwd)"
15
+ if [[ -f "${candidate}/scripts/lib/onlooker-event.mjs" ]]; then
16
+ ecosystem_root="$candidate"
17
+ fi
18
+ fi
19
+
20
+ if [[ -n "$ecosystem_root" ]]; then
21
+ printf '%s/scripts/lib/onlooker-event.mjs' "$ecosystem_root"
22
+ fi
23
+ }
24
+
25
+ _HISTORIAN_EVENT_JS="${_HISTORIAN_EVENT_JS:-$(_historian_resolve_event_js)}"
26
+
27
+ # Emit a historian.* event. Fail-soft: returns 0 on any error.
28
+ # Usage: historian_emit <event_type> <session_id> <payload_json>
29
+ historian_emit() {
30
+ local event_type="${1:-}"
31
+ local session_id="${2:-}"
32
+ local payload="${3:-{\}}"
33
+
34
+ [[ -z "$event_type" || -z "$session_id" ]] && return 0
35
+ [[ -z "$_HISTORIAN_EVENT_JS" || ! -f "$_HISTORIAN_EVENT_JS" ]] && return 0
36
+ command -v node >/dev/null 2>&1 || return 0
37
+ [[ -z "${ONLOOKER_EVENTS_LOG:-}" ]] && return 0
38
+
39
+ local params event_json
40
+ params=$(jq -cn \
41
+ --arg plugin "historian" \
42
+ --arg session_id "$session_id" \
43
+ --arg event_type "$event_type" \
44
+ --argjson payload "$payload" \
45
+ '{
46
+ plugin: $plugin,
47
+ session_id: $session_id,
48
+ event_type: $event_type,
49
+ payload: $payload
50
+ }') || return 0
51
+
52
+ event_json=$(
53
+ ONLOOKER_DIR="${ONLOOKER_DIR:-$HOME/.onlooker}" \
54
+ ONLOOKER_PLUGIN_NAME="historian" \
55
+ printf '%s' "$params" | node "$_HISTORIAN_EVENT_JS" emit 2>/dev/null
56
+ ) || return 0
57
+ [[ -z "$event_json" ]] && return 0
58
+
59
+ mkdir -p "$(dirname "$ONLOOKER_EVENTS_LOG")" 2>/dev/null
60
+ printf '%s\n' "$event_json" >> "$ONLOOKER_EVENTS_LOG" 2>/dev/null
61
+ }