@onlooker-community/ecosystem 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.claude-plugin/marketplace.json +39 -0
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.release-please-manifest.json +4 -2
  4. package/CHANGELOG.md +14 -0
  5. package/package.json +2 -2
  6. package/plugins/curator/.claude-plugin/plugin.json +14 -0
  7. package/plugins/curator/CHANGELOG.md +10 -0
  8. package/plugins/curator/README.md +55 -0
  9. package/plugins/curator/config.json +41 -0
  10. package/plugins/curator/hooks/hooks.json +15 -0
  11. package/plugins/curator/scripts/hooks/curator-session-start.sh +343 -0
  12. package/plugins/curator/scripts/lib/curator-checks.sh +155 -0
  13. package/plugins/curator/scripts/lib/curator-config.sh +67 -0
  14. package/plugins/curator/scripts/lib/curator-emit.sh +61 -0
  15. package/plugins/curator/scripts/lib/curator-memory-reader.sh +225 -0
  16. package/plugins/curator/scripts/lib/curator-project-key.sh +82 -0
  17. package/plugins/curator/scripts/lib/curator-storage.sh +176 -0
  18. package/plugins/curator/scripts/lib/curator-ulid.sh +43 -0
  19. package/plugins/historian/.claude-plugin/plugin.json +14 -0
  20. package/plugins/historian/CHANGELOG.md +10 -0
  21. package/plugins/historian/README.md +70 -0
  22. package/plugins/historian/config.json +30 -0
  23. package/plugins/historian/hooks/hooks.json +26 -0
  24. package/plugins/historian/scripts/hooks/historian-prompt-submit.sh +15 -0
  25. package/plugins/historian/scripts/hooks/historian-session-end.sh +204 -0
  26. package/plugins/historian/scripts/lib/historian-chunker.sh +129 -0
  27. package/plugins/historian/scripts/lib/historian-config.sh +66 -0
  28. package/plugins/historian/scripts/lib/historian-emit.sh +61 -0
  29. package/plugins/historian/scripts/lib/historian-project-key.sh +80 -0
  30. package/plugins/historian/scripts/lib/historian-sanitizer.sh +123 -0
  31. package/plugins/historian/scripts/lib/historian-storage.sh +110 -0
  32. package/plugins/historian/scripts/lib/historian-transcript.sh +83 -0
  33. package/plugins/historian/scripts/lib/historian-ulid.sh +43 -0
  34. package/release-please-config.json +32 -0
  35. package/test/bats/curator-session-start.bats +316 -0
  36. package/test/bats/historian-session-end.bats +296 -0
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env bash
2
+ # Chunker for Historian.
3
+ #
4
+ # Given a JSON array of normalized turns (from historian-transcript.sh),
5
+ # produces a JSON array of chunk records. Each chunk:
6
+ # - Respects turn boundaries (no mid-turn splits)
7
+ # - Targets `target_chars` characters with `overlap_chars` overlap
8
+ # (carrying the last N chars of one chunk's content as the start of
9
+ # the next)
10
+ # - Records start_turn_index, end_turn_index, body_chars
11
+ #
12
+ # Character-based chunking instead of token-based: tokenizers vary by
13
+ # embedder, and the chunker shouldn't have to know which embedder will
14
+ # run downstream. Char counts approximate token counts at ~4 chars / token
15
+ # for English-ish prose; configs are tunable.
16
+
17
+ # Usage: historian_chunker_split <turns_json> <target_chars> <overlap_chars>
18
+ # Output: JSON array of chunks.
19
+ historian_chunker_split() {
20
+ local turns="${1:-[]}"
21
+ local target_chars="${2:-2400}"
22
+ local overlap_chars="${3:-400}"
23
+
24
+ python3 - "$target_chars" "$overlap_chars" "$turns" <<'PY'
25
+ import json, sys
26
+
27
+ target = int(sys.argv[1])
28
+ overlap = max(0, int(sys.argv[2]))
29
+ turns = json.loads(sys.argv[3] or "[]")
30
+
31
+ chunks = []
32
+ chunk_index = 0
33
+ buf_parts = []
34
+ buf_chars = 0
35
+ buf_start = None
36
+ buf_end = None
37
+
38
+ # Pending overlap text carried from the previous chunk. It seeds the next
39
+ # chunk's body but doesn't get attributed a turn (the overlap is purely
40
+ # textual continuity for the embedder).
41
+ pending_overlap = ""
42
+
43
+
44
+ def flush(force_text=None):
45
+ """Emit the current buffer as a chunk. force_text overrides the
46
+ accumulated body and is used when a single turn exceeds the target."""
47
+ global chunk_index, buf_parts, buf_chars, buf_start, buf_end
48
+ if force_text is None:
49
+ if not buf_parts:
50
+ return
51
+ body = "\n\n".join(buf_parts)
52
+ else:
53
+ body = force_text
54
+ if not body.strip():
55
+ # Reset and skip empty bodies (can happen with overlap-only carry).
56
+ buf_parts = []
57
+ buf_chars = 0
58
+ buf_start = None
59
+ buf_end = None
60
+ return
61
+ chunks.append({
62
+ "chunk_index": chunk_index,
63
+ "start_turn_index": buf_start,
64
+ "end_turn_index": buf_end,
65
+ "body": body,
66
+ "body_chars": len(body),
67
+ })
68
+ chunk_index += 1
69
+ buf_parts = []
70
+ buf_chars = 0
71
+ buf_start = None
72
+ buf_end = None
73
+
74
+
75
+ for turn in turns:
76
+ role = turn.get("role", "")
77
+ content = turn.get("content", "")
78
+ if not content:
79
+ continue
80
+ rendered = f"{role}: {content}"
81
+ rendered_len = len(rendered)
82
+
83
+ # If this single turn exceeds the target, flush whatever's pending and
84
+ # emit the oversized turn as its own chunk. The next chunk's overlap
85
+ # carries the last `overlap` chars of this turn's body.
86
+ if rendered_len > target:
87
+ # Flush pending buffer first.
88
+ if buf_parts:
89
+ flush()
90
+ # Seed an oversized chunk on its own.
91
+ body_for_chunk = (pending_overlap + ("\n\n" if pending_overlap else "")) + rendered
92
+ # Set start/end markers for the standalone chunk.
93
+ buf_start = turn["turn_index"]
94
+ buf_end = turn["turn_index"]
95
+ flush(force_text=body_for_chunk)
96
+ pending_overlap = body_for_chunk[-overlap:] if overlap > 0 else ""
97
+ continue
98
+
99
+ candidate_len = buf_chars + rendered_len + (2 if buf_parts else 0) # 2 for "\n\n"
100
+ if buf_parts and candidate_len > target:
101
+ # Flush the buffer; start a new chunk seeded with overlap from the
102
+ # body we just emitted.
103
+ last_body = ""
104
+ if chunks:
105
+ last_body = chunks[-1]["body"]
106
+ flush()
107
+ if overlap > 0 and last_body:
108
+ pending_overlap = last_body[-overlap:]
109
+ else:
110
+ pending_overlap = ""
111
+
112
+ if not buf_parts and pending_overlap:
113
+ buf_parts.append(pending_overlap)
114
+ buf_chars += len(pending_overlap)
115
+ pending_overlap = ""
116
+
117
+ buf_parts.append(rendered)
118
+ buf_chars += rendered_len + (2 if len(buf_parts) > 1 else 0)
119
+ if buf_start is None:
120
+ buf_start = turn["turn_index"]
121
+ buf_end = turn["turn_index"]
122
+
123
+ # Final flush.
124
+ if buf_parts:
125
+ flush()
126
+
127
+ print(json.dumps(chunks))
128
+ PY
129
+ }
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env bash
2
+ # Config resolution for Historian.
3
+ #
4
+ # Reads three layers, latest wins:
5
+ # 1. plugins/historian/config.json (defaults shipped with the plugin)
6
+ # 2. ~/.claude/settings.json
7
+ # 3. <repo>/.claude/settings.json
8
+ #
9
+ # Exposes:
10
+ # historian_config_load <repo_root> # populates _HISTORIAN_CONFIG (JSON)
11
+ # historian_config_get <jq-path> # echoes string value (empty if unset)
12
+ # historian_config_enabled # 0 if historian.enabled is true
13
+ #
14
+ # Settings overlay only touches the `historian.*` subtree of settings.json.
15
+
16
+ _HISTORIAN_CONFIG="{}"
17
+
18
+ historian_config_load() {
19
+ local repo_root="${1:-}"
20
+ local plugin_root="${CLAUDE_PLUGIN_ROOT:-}"
21
+ local home_dir="${HOME:-}"
22
+
23
+ local merged="{}"
24
+ local file
25
+
26
+ file="${plugin_root}/config.json"
27
+ if [[ -f "$file" ]]; then
28
+ local defaults
29
+ defaults=$(jq '.' "$file" 2>/dev/null) || defaults="{}"
30
+ merged=$(jq -n --argjson a "$merged" --argjson b "$defaults" '$a * $b' 2>/dev/null) \
31
+ || merged="$defaults"
32
+ fi
33
+
34
+ for file in "${home_dir}/.claude/settings.json" "${repo_root}/.claude/settings.json"; do
35
+ [[ -n "$file" && -f "$file" ]] || continue
36
+ local overlay
37
+ overlay=$(jq '{ historian: (.historian // {}) }' "$file" 2>/dev/null) || continue
38
+ [[ -z "$overlay" ]] && continue
39
+ merged=$(jq -n --argjson a "$merged" --argjson b "$overlay" '
40
+ def deepmerge($a; $b):
41
+ if ($a|type) == "object" and ($b|type) == "object" then
42
+ reduce (($a|keys) + ($b|keys) | unique)[] as $k
43
+ ({}; .[$k] = deepmerge($a[$k]; $b[$k]))
44
+ elif $b == null then $a
45
+ else $b end;
46
+ deepmerge($a; $b)
47
+ ' 2>/dev/null) || true
48
+ done
49
+
50
+ _HISTORIAN_CONFIG="$merged"
51
+ }
52
+
53
+ # Read a value from the loaded config. The explicit null check (instead of
54
+ # `// empty`) preserves boolean `false` — `// empty` would treat it the same
55
+ # as null and silently drop "explicitly disabled" settings.
56
+ historian_config_get() {
57
+ local path="$1"
58
+ printf '%s' "$_HISTORIAN_CONFIG" \
59
+ | jq -r "${path} | if . == null then empty else . end" 2>/dev/null
60
+ }
61
+
62
+ historian_config_enabled() {
63
+ local v
64
+ v=$(historian_config_get '.historian.enabled')
65
+ [[ "$v" == "true" ]]
66
+ }
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env bash
2
+ # Event emission helpers for Historian.
3
+ #
4
+ # Thin wrapper around onlooker-event.mjs `emit` mode for historian.* events.
5
+ # Fail-soft: returns 0 on success or when the substrate is unavailable.
6
+
7
+ _historian_resolve_event_js() {
8
+ local script_dir plugin_root ecosystem_root candidate
9
+ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
+ plugin_root="$(cd "${script_dir}/../.." && pwd)"
11
+
12
+ ecosystem_root="${ONLOOKER_ECOSYSTEM_ROOT:-}"
13
+ if [[ -z "$ecosystem_root" ]]; then
14
+ candidate="$(cd "${plugin_root}/../.." 2>/dev/null && pwd)"
15
+ if [[ -f "${candidate}/scripts/lib/onlooker-event.mjs" ]]; then
16
+ ecosystem_root="$candidate"
17
+ fi
18
+ fi
19
+
20
+ if [[ -n "$ecosystem_root" ]]; then
21
+ printf '%s/scripts/lib/onlooker-event.mjs' "$ecosystem_root"
22
+ fi
23
+ }
24
+
25
+ _HISTORIAN_EVENT_JS="${_HISTORIAN_EVENT_JS:-$(_historian_resolve_event_js)}"
26
+
27
+ # Emit a historian.* event. Fail-soft: returns 0 on any error.
28
+ # Usage: historian_emit <event_type> <session_id> <payload_json>
29
+ historian_emit() {
30
+ local event_type="${1:-}"
31
+ local session_id="${2:-}"
32
+ local payload="${3:-{\}}"
33
+
34
+ [[ -z "$event_type" || -z "$session_id" ]] && return 0
35
+ [[ -z "$_HISTORIAN_EVENT_JS" || ! -f "$_HISTORIAN_EVENT_JS" ]] && return 0
36
+ command -v node >/dev/null 2>&1 || return 0
37
+ [[ -z "${ONLOOKER_EVENTS_LOG:-}" ]] && return 0
38
+
39
+ local params event_json
40
+ params=$(jq -cn \
41
+ --arg plugin "historian" \
42
+ --arg session_id "$session_id" \
43
+ --arg event_type "$event_type" \
44
+ --argjson payload "$payload" \
45
+ '{
46
+ plugin: $plugin,
47
+ session_id: $session_id,
48
+ event_type: $event_type,
49
+ payload: $payload
50
+ }') || return 0
51
+
52
+ event_json=$(
53
+ ONLOOKER_DIR="${ONLOOKER_DIR:-$HOME/.onlooker}" \
54
+ ONLOOKER_PLUGIN_NAME="historian" \
55
+ printf '%s' "$params" | node "$_HISTORIAN_EVENT_JS" emit 2>/dev/null
56
+ ) || return 0
57
+ [[ -z "$event_json" ]] && return 0
58
+
59
+ mkdir -p "$(dirname "$ONLOOKER_EVENTS_LOG")" 2>/dev/null
60
+ printf '%s\n' "$event_json" >> "$ONLOOKER_EVENTS_LOG" 2>/dev/null
61
+ }
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env bash
2
+ # Project key derivation for Historian.
3
+ #
4
+ # Historian stores chunk records under the ecosystem-wide 12-char hex
5
+ # project key so state survives clone path changes and is shared across
6
+ # worktrees / clones of the same repo.
7
+ #
8
+ # Resolution order:
9
+ # 1. SHA256(`git remote get-url origin`) — preferred, machine-portable
10
+ # 2. SHA256(realpath of `git rev-parse --show-toplevel`) — fallback for
11
+ # repos without an origin remote
12
+ #
13
+ # Returns the first 12 hex chars. Empty when not in a git repo at all.
14
+
15
+ _historian_sha256_first12() {
16
+ local input="$1"
17
+ if command -v shasum >/dev/null 2>&1; then
18
+ printf '%s' "$input" | shasum -a 256 2>/dev/null | cut -c1-12
19
+ elif command -v sha256sum >/dev/null 2>&1; then
20
+ printf '%s' "$input" | sha256sum 2>/dev/null | cut -c1-12
21
+ else
22
+ return 1
23
+ fi
24
+ }
25
+
26
+ historian_project_remote_url() {
27
+ local cwd="${1:-}"
28
+ [[ -z "$cwd" || ! -d "$cwd" ]] && return 0
29
+ git -C "$cwd" remote get-url origin 2>/dev/null || true
30
+ }
31
+
32
+ historian_project_repo_root() {
33
+ local cwd="${1:-}"
34
+ [[ -z "$cwd" || ! -d "$cwd" ]] && return 0
35
+
36
+ if ! git -C "$cwd" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
37
+ return 0
38
+ fi
39
+
40
+ local common_dir toplevel
41
+ common_dir=$(git -C "$cwd" rev-parse --git-common-dir 2>/dev/null) || return 0
42
+
43
+ if [[ -n "$common_dir" && "$common_dir" != /* ]]; then
44
+ common_dir="$(cd "$cwd" && cd "$common_dir" 2>/dev/null && pwd -P)" || common_dir=""
45
+ fi
46
+
47
+ if [[ -n "$common_dir" && -d "$common_dir" ]]; then
48
+ toplevel="$(cd "$common_dir/.." 2>/dev/null && pwd -P)" || toplevel=""
49
+ fi
50
+
51
+ if [[ -z "$toplevel" ]]; then
52
+ toplevel=$(git -C "$cwd" rev-parse --show-toplevel 2>/dev/null || true)
53
+ [[ -n "$toplevel" ]] && toplevel="$(cd "$toplevel" 2>/dev/null && pwd -P)"
54
+ fi
55
+
56
+ printf '%s' "$toplevel"
57
+ }
58
+
59
+ # Compute the project key for the given cwd. Prints the key or empty.
60
+ # Usage: key=$(historian_project_key "$CWD")
61
+ historian_project_key() {
62
+ local cwd="${1:-}"
63
+ [[ -z "$cwd" ]] && cwd="$(pwd)"
64
+
65
+ local remote
66
+ remote=$(historian_project_remote_url "$cwd")
67
+ if [[ -n "$remote" ]]; then
68
+ _historian_sha256_first12 "remote:$remote"
69
+ return 0
70
+ fi
71
+
72
+ local root
73
+ root=$(historian_project_repo_root "$cwd")
74
+ if [[ -n "$root" ]]; then
75
+ _historian_sha256_first12 "root:$root"
76
+ return 0
77
+ fi
78
+
79
+ return 0
80
+ }
@@ -0,0 +1,123 @@
1
+ #!/usr/bin/env bash
2
+ # Sanitizer for Historian chunks.
3
+ #
4
+ # Three layers, in order:
5
+ # 1. Secret-shaped substrings are redacted to "[REDACTED:secret]".
6
+ # Patterns cover AWS access keys, GitHub PATs, Anthropic API keys,
7
+ # bearer tokens, and KEY=value-style env assignments containing
8
+ # key/secret/token in the key name.
9
+ # 2. `[historian:skip]` markers cause the entire chunk to be dropped.
10
+ # 3. Path-deny: if the chunk references any path under
11
+ # `never_index_paths` (substring match against each entry), the
12
+ # chunk is dropped.
13
+ #
14
+ # Input: JSON array of chunk records from the chunker (each with `body`).
15
+ # Output: JSON array of surviving chunk records, each with `body_redacted`
16
+ # (instead of `body`) and a `redaction_count`, plus a sibling
17
+ # array of `dropped` records keyed by reason.
18
+
19
+ # Usage: historian_sanitizer_run <chunks_json> <never_index_paths_json>
20
+ # <redact_secret_patterns> <drop_skip_marker>
21
+ #
22
+ # The two boolean args honor the corresponding config knobs:
23
+ # redact_secret_patterns: false → skip the secret regex substitutions
24
+ # (chunk bodies copy through unchanged)
25
+ # drop_skip_marker: false → keep chunks even when they contain the
26
+ # [historian:skip] marker
27
+ #
28
+ # Output: { "kept": [...], "dropped": [...] }
29
+ historian_sanitizer_run() {
30
+ local chunks="${1:-[]}"
31
+ local never_index_paths="${2:-[]}"
32
+ local redact_secrets="${3:-true}"
33
+ local drop_skip="${4:-true}"
34
+
35
+ python3 - "$chunks" "$never_index_paths" "$redact_secrets" "$drop_skip" <<'PY'
36
+ import json, re, sys
37
+
38
+ chunks = json.loads(sys.argv[1] or "[]")
39
+ deny_paths = json.loads(sys.argv[2] or "[]")
40
+ redact_secrets = sys.argv[3] != "false"
41
+ drop_skip = sys.argv[4] != "false"
42
+
43
+ # Secret-shaped patterns. Conservative — false positives are acceptable;
44
+ # false negatives are the failure mode we care about. Bearer matches
45
+ # case-insensitively because the "Bearer" scheme is case-insensitive per
46
+ # RFC 6750 and uppercase / lowercase variants occur in the wild.
47
+ SECRET_PATTERNS = [
48
+ # AWS access keys (AKIA followed by 16 base32-ish chars).
49
+ re.compile(r"\bAKIA[0-9A-Z]{16}\b"),
50
+ # GitHub PATs.
51
+ re.compile(r"\bghp_[A-Za-z0-9]{20,}\b"),
52
+ re.compile(r"\bgho_[A-Za-z0-9]{20,}\b"),
53
+ re.compile(r"\bghs_[A-Za-z0-9]{20,}\b"),
54
+ re.compile(r"\bghu_[A-Za-z0-9]{20,}\b"),
55
+ re.compile(r"\bghr_[A-Za-z0-9]{20,}\b"),
56
+ # Anthropic API keys.
57
+ re.compile(r"\bsk-ant-[A-Za-z0-9_-]{20,}\b"),
58
+ # Bearer tokens in headers. Case-insensitive on the scheme name only.
59
+ re.compile(r"(?i:Bearer)\s+[A-Za-z0-9._\-+/=]{20,}"),
60
+ # KEY=value where KEY contains key/secret/token (case-insensitive).
61
+ # We redact only the value (everything after the first =).
62
+ re.compile(
63
+ r"\b([A-Z][A-Z0-9_]*(?:KEY|SECRET|TOKEN|PASSWORD|PASSWD)[A-Z0-9_]*)\s*=\s*\S+",
64
+ re.IGNORECASE,
65
+ ),
66
+ ]
67
+
68
+
69
+ def sanitize(body):
70
+ count = 0
71
+ out = body
72
+ for pat in SECRET_PATTERNS[:-1]:
73
+ new = pat.sub("[REDACTED:secret]", out)
74
+ matches = pat.findall(out)
75
+ if matches:
76
+ count += len(matches)
77
+ out = new
78
+ # KEY=value form: preserve the key, redact the value.
79
+ last = SECRET_PATTERNS[-1]
80
+ matches = list(last.finditer(out))
81
+ if matches:
82
+ count += len(matches)
83
+
84
+ def repl(m):
85
+ key = m.group(1)
86
+ return f"{key}=[REDACTED:secret]"
87
+
88
+ out = last.sub(repl, out)
89
+ return out, count
90
+
91
+
92
+ SKIP_MARKER = "[historian:skip]"
93
+
94
+
95
+ kept = []
96
+ dropped = []
97
+ for chunk in chunks:
98
+ body = chunk.get("body", "")
99
+ if drop_skip and SKIP_MARKER in body:
100
+ dropped.append({
101
+ "chunk_index": chunk.get("chunk_index"),
102
+ "reason": "skip_marker",
103
+ })
104
+ continue
105
+ if deny_paths and any(p and p in body for p in deny_paths):
106
+ dropped.append({
107
+ "chunk_index": chunk.get("chunk_index"),
108
+ "reason": "never_index_path",
109
+ })
110
+ continue
111
+ if redact_secrets:
112
+ redacted, count = sanitize(body)
113
+ else:
114
+ redacted, count = body, 0
115
+ new_chunk = dict(chunk)
116
+ new_chunk.pop("body", None)
117
+ new_chunk["body_redacted"] = redacted
118
+ new_chunk["redaction_count"] = count
119
+ kept.append(new_chunk)
120
+
121
+ print(json.dumps({"kept": kept, "dropped": dropped}))
122
+ PY
123
+ }
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env bash
2
+ # Storage layout helpers for Historian.
3
+ #
4
+ # Layout (under $ONLOOKER_DIR/historian/<project-key>/):
5
+ # manifest.json project metadata (remote_url, repo_root, last_seen_at)
6
+ # sessions/<session_id>.jsonl append-only chunk records, one per line
7
+ #
8
+ # Chunk record shape:
9
+ # { chunk_id, session_id, chunk_index, start_turn_index, end_turn_index,
10
+ # body_redacted, body_chars, created_at, source, redaction_count }
11
+ #
12
+ # Append-only writes keep the indexing path simple and safe to re-run; if a
13
+ # session is re-indexed (rare; SessionEnd should fire once), callers can
14
+ # truncate the file before appending or accept duplicate chunk records.
15
+
16
+ historian_storage_root() {
17
+ local base="${ONLOOKER_DIR:-$HOME/.onlooker}"
18
+ printf '%s/historian' "$base"
19
+ }
20
+
21
+ historian_project_dir() {
22
+ local key="$1"
23
+ printf '%s/%s' "$(historian_storage_root)" "$key"
24
+ }
25
+
26
+ historian_sessions_dir() {
27
+ local key="$1"
28
+ printf '%s/sessions' "$(historian_project_dir "$key")"
29
+ }
30
+
31
+ historian_session_file() {
32
+ local key="$1"
33
+ local session_id="$2"
34
+ # Sanitize session_id for filesystem use: strip anything outside
35
+ # [A-Za-z0-9._-]. session_id comes from the Claude Code hook payload
36
+ # and is normally a clean ULID-ish string, but guard against
37
+ # unexpected shapes.
38
+ local safe
39
+ safe=$(printf '%s' "$session_id" | tr -cd '[:alnum:]._-')
40
+ [[ -z "$safe" ]] && safe="unknown"
41
+ printf '%s/%s.jsonl' "$(historian_sessions_dir "$key")" "$safe"
42
+ }
43
+
44
+ historian_storage_init() {
45
+ local key="$1"
46
+ [[ -z "$key" ]] && return 1
47
+ local project_dir
48
+ project_dir=$(historian_project_dir "$key")
49
+ mkdir -p "$project_dir/sessions" 2>/dev/null
50
+ }
51
+
52
+ # Usage: historian_storage_write_manifest <key> <remote_url> <repo_root>
53
+ historian_storage_write_manifest() {
54
+ local key="$1"
55
+ local remote_url="$2"
56
+ local repo_root="$3"
57
+ [[ -z "$key" ]] && return 1
58
+
59
+ historian_storage_init "$key" || return 1
60
+ local manifest_path now
61
+ manifest_path="$(historian_project_dir "$key")/manifest.json"
62
+ now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
63
+
64
+ jq -n \
65
+ --arg key "$key" \
66
+ --arg remote "$remote_url" \
67
+ --arg root "$repo_root" \
68
+ --arg now "$now" \
69
+ '{
70
+ project_key: $key,
71
+ remote_url: (if $remote == "" then null else $remote end),
72
+ repo_root: (if $root == "" then null else $root end),
73
+ last_seen_at: $now
74
+ }' > "$manifest_path" 2>/dev/null
75
+ }
76
+
77
+ # Append a single chunk record (one JSON line) to a session's file.
78
+ # Usage: historian_storage_append_chunk <key> <session_id> <chunk_json>
79
+ historian_storage_append_chunk() {
80
+ local key="$1"
81
+ local session_id="$2"
82
+ local chunk_json="$3"
83
+ [[ -z "$key" || -z "$session_id" || -z "$chunk_json" ]] && return 1
84
+
85
+ historian_storage_init "$key" || return 1
86
+ local path
87
+ path=$(historian_session_file "$key" "$session_id")
88
+ printf '%s\n' "$chunk_json" >> "$path" 2>/dev/null
89
+ }
90
+
91
+ # Count chunks for a session. Returns 0 when the file is absent.
92
+ historian_storage_chunk_count() {
93
+ local key="$1"
94
+ local session_id="$2"
95
+ local path
96
+ path=$(historian_session_file "$key" "$session_id")
97
+ [[ -f "$path" ]] || { echo 0; return 0; }
98
+ wc -l < "$path" 2>/dev/null | tr -d ' '
99
+ }
100
+
101
+ # Reset (truncate) the chunk file for a session. Used when SessionEnd
102
+ # re-runs against a transcript that was previously indexed.
103
+ historian_storage_reset_session() {
104
+ local key="$1"
105
+ local session_id="$2"
106
+ local path
107
+ path=$(historian_session_file "$key" "$session_id")
108
+ [[ -f "$path" ]] || return 0
109
+ : > "$path"
110
+ }
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env bash
2
+ # Transcript reading for Historian.
3
+ #
4
+ # Claude Code records each session's transcript as JSONL where each line
5
+ # is an entry like { "role": "user"|"assistant"|"system", "content": "...",
6
+ # ... }. Historian only embeds user + assistant turns — tool calls and tool
7
+ # results are dropped at this stage so the chunked content stays
8
+ # semantically focused on the conversation.
9
+
10
+ # Load the transcript and emit a JSON array of normalized turn records:
11
+ # [
12
+ # { "turn_index": 0, "role": "user", "content": "..." },
13
+ # { "turn_index": 1, "role": "assistant", "content": "..." },
14
+ # ...
15
+ # ]
16
+ #
17
+ # Returns an empty array when the transcript is absent or unreadable.
18
+ #
19
+ # Usage: historian_transcript_load <transcript_path>
20
+ historian_transcript_load() {
21
+ local path="${1:-}"
22
+ [[ -z "$path" || ! -f "$path" ]] && { echo '[]'; return 0; }
23
+
24
+ # Filter to user/assistant role entries with non-empty content, keep
25
+ # their original order (the JSONL is recorded chronologically), and
26
+ # attach a turn_index. Content may be a string OR an array of content
27
+ # blocks (Anthropic SDK shape); flatten array forms to text.
28
+ python3 - "$path" <<'PY'
29
+ import json, sys
30
+
31
+ path = sys.argv[1]
32
+ out = []
33
+ turn_index = 0
34
+ try:
35
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
36
+ for line in f:
37
+ line = line.strip()
38
+ if not line:
39
+ continue
40
+ try:
41
+ rec = json.loads(line)
42
+ except json.JSONDecodeError:
43
+ continue
44
+ role = rec.get("role") or rec.get("type")
45
+ if role not in ("user", "assistant"):
46
+ continue
47
+ raw = rec.get("content", "")
48
+ if isinstance(raw, list):
49
+ # Anthropic content-blocks form. Concatenate the text-typed
50
+ # blocks; drop tool_use / tool_result entries here.
51
+ parts = []
52
+ for block in raw:
53
+ if not isinstance(block, dict):
54
+ continue
55
+ if block.get("type") in (None, "text"):
56
+ t = block.get("text") or ""
57
+ if t:
58
+ parts.append(t)
59
+ content = "\n\n".join(parts)
60
+ else:
61
+ content = str(raw)
62
+ content = content.strip()
63
+ if not content:
64
+ continue
65
+ out.append({
66
+ "turn_index": turn_index,
67
+ "role": role,
68
+ "content": content,
69
+ })
70
+ turn_index += 1
71
+ except OSError:
72
+ pass
73
+
74
+ print(json.dumps(out))
75
+ PY
76
+ }
77
+
78
+ # Return the total content character count across normalized turns.
79
+ # Usage: historian_transcript_char_count <turns_json>
80
+ historian_transcript_char_count() {
81
+ local turns="${1:-[]}"
82
+ printf '%s' "$turns" | jq '[.[] | (.content | length)] | add // 0' 2>/dev/null
83
+ }