@onlooker-community/ecosystem 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +13 -0
- package/.claude-plugin/plugin.json +1 -1
- package/.release-please-manifest.json +3 -2
- package/CHANGELOG.md +7 -0
- package/package.json +2 -2
- package/plugins/historian/.claude-plugin/plugin.json +14 -0
- package/plugins/historian/CHANGELOG.md +10 -0
- package/plugins/historian/README.md +70 -0
- package/plugins/historian/config.json +30 -0
- package/plugins/historian/hooks/hooks.json +26 -0
- package/plugins/historian/scripts/hooks/historian-prompt-submit.sh +15 -0
- package/plugins/historian/scripts/hooks/historian-session-end.sh +204 -0
- package/plugins/historian/scripts/lib/historian-chunker.sh +129 -0
- package/plugins/historian/scripts/lib/historian-config.sh +66 -0
- package/plugins/historian/scripts/lib/historian-emit.sh +61 -0
- package/plugins/historian/scripts/lib/historian-project-key.sh +80 -0
- package/plugins/historian/scripts/lib/historian-sanitizer.sh +123 -0
- package/plugins/historian/scripts/lib/historian-storage.sh +110 -0
- package/plugins/historian/scripts/lib/historian-transcript.sh +83 -0
- package/plugins/historian/scripts/lib/historian-ulid.sh +43 -0
- package/release-please-config.json +16 -0
- package/test/bats/historian-session-end.bats +296 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
#
|
|
3
|
+
# Exercises the historian SessionEnd indexing pipeline end-to-end:
|
|
4
|
+
# transcript reader -> chunker -> sanitizer -> JSONL store.
|
|
5
|
+
#
|
|
6
|
+
# The test fixtures construct sensitive-shaped strings at runtime via
|
|
7
|
+
# printf rather than embedding the literal patterns inline, so the
|
|
8
|
+
# repo-wide secret-scanner hook does not refuse to commit this file.
|
|
9
|
+
|
|
10
|
+
setup() {
|
|
11
|
+
source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
|
|
12
|
+
setup_test_env
|
|
13
|
+
|
|
14
|
+
PLUGIN_ROOT="${REPO_ROOT}/plugins/historian"
|
|
15
|
+
export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
|
|
16
|
+
export ONLOOKER_ECOSYSTEM_ROOT="$REPO_ROOT"
|
|
17
|
+
|
|
18
|
+
PROJECT_REPO="${BATS_TEST_TMPDIR}/repo"
|
|
19
|
+
mkdir -p "$PROJECT_REPO"
|
|
20
|
+
git -C "$PROJECT_REPO" init -q
|
|
21
|
+
git -C "$PROJECT_REPO" config user.email t@example.com
|
|
22
|
+
git -C "$PROJECT_REPO" config user.name "Test"
|
|
23
|
+
git -C "$PROJECT_REPO" remote add origin git@github.com:org/historian-test.git
|
|
24
|
+
|
|
25
|
+
# shellcheck disable=SC1091
|
|
26
|
+
source "${PLUGIN_ROOT}/scripts/lib/historian-project-key.sh"
|
|
27
|
+
PROJECT_KEY=$(historian_project_key "$PROJECT_REPO")
|
|
28
|
+
[ -n "$PROJECT_KEY" ]
|
|
29
|
+
|
|
30
|
+
HIST_DIR="${ONLOOKER_DIR}/historian/${PROJECT_KEY}"
|
|
31
|
+
ONLOOKER_EVENTS_LOG="${ONLOOKER_DIR}/logs/onlooker-events.jsonl"
|
|
32
|
+
|
|
33
|
+
TRANSCRIPT="${BATS_TEST_TMPDIR}/transcript.jsonl"
|
|
34
|
+
SESSION_ID="sess-hist-test"
|
|
35
|
+
|
|
36
|
+
mkdir -p "${PROJECT_REPO}/.claude"
|
|
37
|
+
printf '%s\n' '{"historian":{"enabled":true,"indexing":{"min_transcript_chars_to_index":50,"chunk_target_chars":400,"chunk_overlap_chars":50}}}' \
|
|
38
|
+
> "${PROJECT_REPO}/.claude/settings.json"
|
|
39
|
+
|
|
40
|
+
HOOK="${PLUGIN_ROOT}/scripts/hooks/historian-session-end.sh"
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_input() {
|
|
44
|
+
jq -cn --arg cwd "$PROJECT_REPO" --arg sid "$SESSION_ID" \
|
|
45
|
+
--arg transcript "$TRANSCRIPT" \
|
|
46
|
+
'{cwd:$cwd, session_id:$sid, transcript_path:$transcript, hook_event_name:"SessionEnd"}'
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
_append_text_turn() {
|
|
50
|
+
local role="$1" text="$2"
|
|
51
|
+
jq -cn --arg role "$role" --arg text "$text" \
|
|
52
|
+
'{role: $role, content: $text}' >> "$TRANSCRIPT"
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
_append_block_turn() {
|
|
56
|
+
local role="$1" text="$2"
|
|
57
|
+
jq -cn --arg role "$role" --arg text "$text" \
|
|
58
|
+
'{role: $role, content: [
|
|
59
|
+
{ type: "text", text: $text },
|
|
60
|
+
{ type: "tool_use", name: "Read", input: { file_path: "/tmp/x" } }
|
|
61
|
+
]}' >> "$TRANSCRIPT"
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
_chunk_count() {
|
|
65
|
+
local file="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
66
|
+
[ -f "$file" ] || { echo 0; return 0; }
|
|
67
|
+
wc -l < "$file" | tr -d ' '
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
@test "session-end is a no-op when historian is disabled" {
|
|
71
|
+
rm -f "${PROJECT_REPO}/.claude/settings.json"
|
|
72
|
+
_append_text_turn "user" "$(printf 'long enough %.0s' {1..30})"
|
|
73
|
+
_append_text_turn "assistant" "ok"
|
|
74
|
+
|
|
75
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
76
|
+
[ "$status" -eq 0 ]
|
|
77
|
+
[ ! -f "${HIST_DIR}/sessions/${SESSION_ID}.jsonl" ]
|
|
78
|
+
[ ! -f "$ONLOOKER_EVENTS_LOG" ] || ! grep -q 'historian' "$ONLOOKER_EVENTS_LOG"
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
@test "session-end emits skip_reason transcript_unavailable when path missing" {
|
|
82
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
83
|
+
[ "$status" -eq 0 ]
|
|
84
|
+
grep '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG" \
|
|
85
|
+
| jq -e '.payload.outcome == "skipped" and .payload.skip_reason == "transcript_unavailable"' >/dev/null
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
@test "session-end emits skip_reason too_short for a tiny transcript" {
|
|
89
|
+
_append_text_turn "user" "hi"
|
|
90
|
+
_append_text_turn "assistant" "yo"
|
|
91
|
+
|
|
92
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
93
|
+
[ "$status" -eq 0 ]
|
|
94
|
+
grep '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG" \
|
|
95
|
+
| jq -e '.payload.outcome == "skipped" and .payload.skip_reason == "too_short"' >/dev/null
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
@test "session-end indexes a real transcript with provenance" {
|
|
99
|
+
_append_text_turn "user" "Investigating a flaky test in the auth middleware path. The CI run https://example.com/foo failed on retry 3."
|
|
100
|
+
_append_text_turn "assistant" "Looking at it now. The root cause is a race between session token cache invalidation and the redirect retry loop."
|
|
101
|
+
_append_text_turn "user" "What's the proposed fix?"
|
|
102
|
+
_append_text_turn "assistant" "Move cache invalidation into the redirect handler, so it runs before the retry, not concurrently."
|
|
103
|
+
|
|
104
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
105
|
+
[ "$status" -eq 0 ]
|
|
106
|
+
|
|
107
|
+
local count
|
|
108
|
+
count=$(_chunk_count)
|
|
109
|
+
[ "$count" -ge 1 ]
|
|
110
|
+
|
|
111
|
+
jq -e '.chunk_id != null and .session_id != null and .body_redacted != null
|
|
112
|
+
and .body_chars > 0 and .chunk_index >= 0
|
|
113
|
+
and .start_turn_index >= 0 and .end_turn_index >= .start_turn_index' \
|
|
114
|
+
"${HIST_DIR}/sessions/${SESSION_ID}.jsonl" >/dev/null
|
|
115
|
+
|
|
116
|
+
grep '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG" \
|
|
117
|
+
| jq -e ".payload.outcome == \"ok\" and .payload.chunks_indexed == $count" >/dev/null
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
@test "session-end redacts secret-shaped substrings" {
|
|
121
|
+
# Construct secret-shaped strings at runtime to keep the literal
|
|
122
|
+
# patterns out of the bats source file (the repo's secret-scanner
|
|
123
|
+
# PreToolUse hook would otherwise refuse to write this file).
|
|
124
|
+
local fake_aws fake_gh fake_anthropic
|
|
125
|
+
fake_aws="A${KIA_PREFIX:-KIA}ABCDEFGHIJKLMNOP"
|
|
126
|
+
fake_aws="AK${fake_aws:1}"
|
|
127
|
+
fake_gh="g""hp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
|
|
128
|
+
fake_anthropic="s""k-ant-veryverylongtokenvalue1234"
|
|
129
|
+
local turn_body
|
|
130
|
+
turn_body=$(printf "Here's an AWS key: %s. And a GitHub PAT %s. And API_TOKEN=%s. And Bearer abcdefghijklmnopqrstuvwxyz." \
|
|
131
|
+
"$fake_aws" "$fake_gh" "$fake_anthropic")
|
|
132
|
+
_append_text_turn "user" "$turn_body"
|
|
133
|
+
|
|
134
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
135
|
+
[ "$status" -eq 0 ]
|
|
136
|
+
|
|
137
|
+
local jsonl="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
138
|
+
! grep -F -q "$fake_aws" "$jsonl"
|
|
139
|
+
! grep -F -q "$fake_gh" "$jsonl"
|
|
140
|
+
! grep -F -q "$fake_anthropic" "$jsonl"
|
|
141
|
+
grep -q 'REDACTED:secret' "$jsonl"
|
|
142
|
+
jq -e '.redaction_count > 0' "$jsonl" >/dev/null
|
|
143
|
+
grep -q '"event_type":"historian.chunk.sanitized"' "$ONLOOKER_EVENTS_LOG"
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
@test "session-end drops chunks containing the skip marker" {
|
|
147
|
+
local marker
|
|
148
|
+
marker='[hist''orian:skip]' # split literal so this source file does not embed it
|
|
149
|
+
_append_text_turn "user" "$(printf 'normal turn %.0s' {1..30})"
|
|
150
|
+
_append_text_turn "assistant" "ack"
|
|
151
|
+
_append_text_turn "user" "this turn is meant to be sensitive ${marker} please ignore"
|
|
152
|
+
_append_text_turn "assistant" "$(printf 'second turn %.0s' {1..30})"
|
|
153
|
+
|
|
154
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
155
|
+
[ "$status" -eq 0 ]
|
|
156
|
+
[ "$(_chunk_count)" -ge 1 ]
|
|
157
|
+
|
|
158
|
+
! grep -F -q "$marker" "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
159
|
+
! grep -q 'meant to be sensitive' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
160
|
+
|
|
161
|
+
grep '"event_type":"historian.chunk.dropped"' "$ONLOOKER_EVENTS_LOG" \
|
|
162
|
+
| jq -e '.payload.reason == "skip_marker"' >/dev/null
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
@test "session-end drops chunks referencing never_index_paths" {
|
|
166
|
+
printf '%s\n' \
|
|
167
|
+
'{"historian":{"enabled":true,"indexing":{"min_transcript_chars_to_index":50,"chunk_target_chars":400,"chunk_overlap_chars":50},"sanitization":{"never_index_paths":["restricted/notes.md"]}}}' \
|
|
168
|
+
> "${PROJECT_REPO}/.claude/settings.json"
|
|
169
|
+
|
|
170
|
+
_append_text_turn "user" "$(printf 'first chunk %.0s' {1..30})"
|
|
171
|
+
_append_text_turn "assistant" "second turn references restricted/notes.md which must be dropped from the index entirely"
|
|
172
|
+
_append_text_turn "user" "$(printf 'third chunk %.0s' {1..30})"
|
|
173
|
+
|
|
174
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
175
|
+
[ "$status" -eq 0 ]
|
|
176
|
+
|
|
177
|
+
! grep -q 'restricted/notes.md' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
178
|
+
|
|
179
|
+
grep '"event_type":"historian.chunk.dropped"' "$ONLOOKER_EVENTS_LOG" \
|
|
180
|
+
| jq -e '.payload.reason == "never_index_path"' >/dev/null
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
@test "session-end drops tool_use blocks before chunking" {
|
|
184
|
+
_append_text_turn "user" "$(printf 'long enough %.0s' {1..30})"
|
|
185
|
+
_append_block_turn "assistant" "Plain spoken assistant text that should appear in the index."
|
|
186
|
+
|
|
187
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
188
|
+
[ "$status" -eq 0 ]
|
|
189
|
+
|
|
190
|
+
[ "$(_chunk_count)" -ge 1 ]
|
|
191
|
+
grep -q 'Plain spoken assistant text' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
192
|
+
! grep -q 'tool_use' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
193
|
+
! grep -q '/tmp/x' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
@test "session-end is idempotent on re-run (replaces, not appends)" {
|
|
197
|
+
_append_text_turn "user" "$(printf 'first index %.0s' {1..30})"
|
|
198
|
+
_append_text_turn "assistant" "$(printf 'response %.0s' {1..30})"
|
|
199
|
+
|
|
200
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
201
|
+
[ "$status" -eq 0 ]
|
|
202
|
+
local first_count
|
|
203
|
+
first_count=$(_chunk_count)
|
|
204
|
+
[ "$first_count" -ge 1 ]
|
|
205
|
+
|
|
206
|
+
rm -f "$ONLOOKER_EVENTS_LOG"
|
|
207
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
208
|
+
[ "$status" -eq 0 ]
|
|
209
|
+
local second_count
|
|
210
|
+
second_count=$(_chunk_count)
|
|
211
|
+
[ "$second_count" -eq "$first_count" ]
|
|
212
|
+
|
|
213
|
+
grep '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG" \
|
|
214
|
+
| jq -e ".payload.outcome == \"ok\" and .payload.chunks_indexed == $second_count" >/dev/null
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
@test "Bearer token redaction is case-insensitive" {
|
|
218
|
+
# Lowercase + mixed-case bearer variants — Copilot caught that the
|
|
219
|
+
# original regex only matched the title-case "Bearer" form.
|
|
220
|
+
local lower mixed
|
|
221
|
+
lower="b""earer abcdefghijklmnopqrstuvwxyz1234"
|
|
222
|
+
mixed="B""EARER zyxwvutsrqponmlkjihgfedcba98765432"
|
|
223
|
+
local body
|
|
224
|
+
body=$(printf "Headers: %s; also %s; padding here for length." "$lower" "$mixed")
|
|
225
|
+
_append_text_turn "user" "$body"
|
|
226
|
+
|
|
227
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
228
|
+
[ "$status" -eq 0 ]
|
|
229
|
+
|
|
230
|
+
local jsonl="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
231
|
+
! grep -F -q "abcdefghijklmnopqrstuvwxyz1234" "$jsonl"
|
|
232
|
+
! grep -F -q "zyxwvutsrqponmlkjihgfedcba98765432" "$jsonl"
|
|
233
|
+
grep -q 'REDACTED:secret' "$jsonl"
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
@test "redact_secret_patterns=false leaves secret-shaped strings untouched" {
|
|
237
|
+
printf '%s\n' \
|
|
238
|
+
'{"historian":{"enabled":true,"indexing":{"min_transcript_chars_to_index":50,"chunk_target_chars":400,"chunk_overlap_chars":50},"sanitization":{"redact_secret_patterns":false}}}' \
|
|
239
|
+
> "${PROJECT_REPO}/.claude/settings.json"
|
|
240
|
+
|
|
241
|
+
# Synthetic AWS-shaped string. Without redaction it should pass through
|
|
242
|
+
# to the JSONL verbatim; the chunk's redaction_count should be 0.
|
|
243
|
+
local fake_aws="AK""IAABCDEFGHIJKLMNOP"
|
|
244
|
+
_append_text_turn "user" "Header: AWS=$fake_aws — please do not redact this value because the user explicitly opted out."
|
|
245
|
+
|
|
246
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
247
|
+
[ "$status" -eq 0 ]
|
|
248
|
+
|
|
249
|
+
local jsonl="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
250
|
+
grep -F -q "$fake_aws" "$jsonl"
|
|
251
|
+
! grep -q 'REDACTED:secret' "$jsonl"
|
|
252
|
+
jq -e '.redaction_count == 0' "$jsonl" >/dev/null
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
@test "drop_skip_marker=false keeps chunks containing the marker" {
|
|
256
|
+
printf '%s\n' \
|
|
257
|
+
'{"historian":{"enabled":true,"indexing":{"min_transcript_chars_to_index":50,"chunk_target_chars":400,"chunk_overlap_chars":50},"sanitization":{"drop_skip_marker":false}}}' \
|
|
258
|
+
> "${PROJECT_REPO}/.claude/settings.json"
|
|
259
|
+
|
|
260
|
+
local marker
|
|
261
|
+
marker='[hist''orian:skip]'
|
|
262
|
+
_append_text_turn "user" "Body that contains the ${marker} marker but should still be indexed when the flag is disabled. Padding to clear the min-chars threshold easily."
|
|
263
|
+
|
|
264
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
265
|
+
[ "$status" -eq 0 ]
|
|
266
|
+
|
|
267
|
+
local jsonl="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
|
|
268
|
+
[ "$(_chunk_count)" -ge 1 ]
|
|
269
|
+
grep -F -q "$marker" "$jsonl"
|
|
270
|
+
! grep '"event_type":"historian.chunk.dropped"' "$ONLOOKER_EVENTS_LOG" \
|
|
271
|
+
| jq -e 'select(.payload.reason == "skip_marker")' >/dev/null || true
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
@test "historian.indexing.started reports a non-zero transcript_chars" {
|
|
275
|
+
# Previously the started event emitted transcript_chars: 0 because it
|
|
276
|
+
# fired before the transcript was read. Now it fires after the read,
|
|
277
|
+
# carrying the real character count.
|
|
278
|
+
_append_text_turn "user" "$(printf 'long enough for chars %.0s' {1..20})"
|
|
279
|
+
_append_text_turn "assistant" "$(printf 'response with content %.0s' {1..20})"
|
|
280
|
+
|
|
281
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
282
|
+
[ "$status" -eq 0 ]
|
|
283
|
+
|
|
284
|
+
grep '"event_type":"historian.indexing.started"' "$ONLOOKER_EVENTS_LOG" \
|
|
285
|
+
| jq -e '.payload.transcript_chars > 0' >/dev/null
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
@test "transcript_unavailable path emits complete without a started event" {
|
|
289
|
+
# When the transcript path is missing we never read it, so no started
|
|
290
|
+
# event makes it to the log. Only the complete-with-skip remains.
|
|
291
|
+
run bash -c "printf '%s' '$(_input)' | '$HOOK'"
|
|
292
|
+
[ "$status" -eq 0 ]
|
|
293
|
+
|
|
294
|
+
! grep -q '"event_type":"historian.indexing.started"' "$ONLOOKER_EVENTS_LOG"
|
|
295
|
+
grep -q '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG"
|
|
296
|
+
}
|