@onlooker-community/ecosystem 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/.claude-plugin/marketplace.json +39 -0
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.release-please-manifest.json +4 -2
  4. package/CHANGELOG.md +14 -0
  5. package/package.json +2 -2
  6. package/plugins/curator/.claude-plugin/plugin.json +14 -0
  7. package/plugins/curator/CHANGELOG.md +10 -0
  8. package/plugins/curator/README.md +55 -0
  9. package/plugins/curator/config.json +41 -0
  10. package/plugins/curator/hooks/hooks.json +15 -0
  11. package/plugins/curator/scripts/hooks/curator-session-start.sh +343 -0
  12. package/plugins/curator/scripts/lib/curator-checks.sh +155 -0
  13. package/plugins/curator/scripts/lib/curator-config.sh +67 -0
  14. package/plugins/curator/scripts/lib/curator-emit.sh +61 -0
  15. package/plugins/curator/scripts/lib/curator-memory-reader.sh +225 -0
  16. package/plugins/curator/scripts/lib/curator-project-key.sh +82 -0
  17. package/plugins/curator/scripts/lib/curator-storage.sh +176 -0
  18. package/plugins/curator/scripts/lib/curator-ulid.sh +43 -0
  19. package/plugins/historian/.claude-plugin/plugin.json +14 -0
  20. package/plugins/historian/CHANGELOG.md +10 -0
  21. package/plugins/historian/README.md +70 -0
  22. package/plugins/historian/config.json +30 -0
  23. package/plugins/historian/hooks/hooks.json +26 -0
  24. package/plugins/historian/scripts/hooks/historian-prompt-submit.sh +15 -0
  25. package/plugins/historian/scripts/hooks/historian-session-end.sh +204 -0
  26. package/plugins/historian/scripts/lib/historian-chunker.sh +129 -0
  27. package/plugins/historian/scripts/lib/historian-config.sh +66 -0
  28. package/plugins/historian/scripts/lib/historian-emit.sh +61 -0
  29. package/plugins/historian/scripts/lib/historian-project-key.sh +80 -0
  30. package/plugins/historian/scripts/lib/historian-sanitizer.sh +123 -0
  31. package/plugins/historian/scripts/lib/historian-storage.sh +110 -0
  32. package/plugins/historian/scripts/lib/historian-transcript.sh +83 -0
  33. package/plugins/historian/scripts/lib/historian-ulid.sh +43 -0
  34. package/release-please-config.json +32 -0
  35. package/test/bats/curator-session-start.bats +316 -0
  36. package/test/bats/historian-session-end.bats +296 -0
@@ -0,0 +1,296 @@
1
+ #!/usr/bin/env bats
2
+ #
3
+ # Exercises the historian SessionEnd indexing pipeline end-to-end:
4
+ # transcript reader -> chunker -> sanitizer -> JSONL store.
5
+ #
6
+ # The test fixtures construct sensitive-shaped strings at runtime via
7
+ # printf rather than embedding the literal patterns inline, so the
8
+ # repo-wide secret-scanner hook does not refuse to commit this file.
9
+
10
+ setup() {
11
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
12
+ setup_test_env
13
+
14
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/historian"
15
+ export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
16
+ export ONLOOKER_ECOSYSTEM_ROOT="$REPO_ROOT"
17
+
18
+ PROJECT_REPO="${BATS_TEST_TMPDIR}/repo"
19
+ mkdir -p "$PROJECT_REPO"
20
+ git -C "$PROJECT_REPO" init -q
21
+ git -C "$PROJECT_REPO" config user.email t@example.com
22
+ git -C "$PROJECT_REPO" config user.name "Test"
23
+ git -C "$PROJECT_REPO" remote add origin git@github.com:org/historian-test.git
24
+
25
+ # shellcheck disable=SC1091
26
+ source "${PLUGIN_ROOT}/scripts/lib/historian-project-key.sh"
27
+ PROJECT_KEY=$(historian_project_key "$PROJECT_REPO")
28
+ [ -n "$PROJECT_KEY" ]
29
+
30
+ HIST_DIR="${ONLOOKER_DIR}/historian/${PROJECT_KEY}"
31
+ ONLOOKER_EVENTS_LOG="${ONLOOKER_DIR}/logs/onlooker-events.jsonl"
32
+
33
+ TRANSCRIPT="${BATS_TEST_TMPDIR}/transcript.jsonl"
34
+ SESSION_ID="sess-hist-test"
35
+
36
+ mkdir -p "${PROJECT_REPO}/.claude"
37
+ printf '%s\n' '{"historian":{"enabled":true,"indexing":{"min_transcript_chars_to_index":50,"chunk_target_chars":400,"chunk_overlap_chars":50}}}' \
38
+ > "${PROJECT_REPO}/.claude/settings.json"
39
+
40
+ HOOK="${PLUGIN_ROOT}/scripts/hooks/historian-session-end.sh"
41
+ }
42
+
43
+ _input() {
44
+ jq -cn --arg cwd "$PROJECT_REPO" --arg sid "$SESSION_ID" \
45
+ --arg transcript "$TRANSCRIPT" \
46
+ '{cwd:$cwd, session_id:$sid, transcript_path:$transcript, hook_event_name:"SessionEnd"}'
47
+ }
48
+
49
+ _append_text_turn() {
50
+ local role="$1" text="$2"
51
+ jq -cn --arg role "$role" --arg text "$text" \
52
+ '{role: $role, content: $text}' >> "$TRANSCRIPT"
53
+ }
54
+
55
+ _append_block_turn() {
56
+ local role="$1" text="$2"
57
+ jq -cn --arg role "$role" --arg text "$text" \
58
+ '{role: $role, content: [
59
+ { type: "text", text: $text },
60
+ { type: "tool_use", name: "Read", input: { file_path: "/tmp/x" } }
61
+ ]}' >> "$TRANSCRIPT"
62
+ }
63
+
64
+ _chunk_count() {
65
+ local file="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
66
+ [ -f "$file" ] || { echo 0; return 0; }
67
+ wc -l < "$file" | tr -d ' '
68
+ }
69
+
70
+ @test "session-end is a no-op when historian is disabled" {
71
+ rm -f "${PROJECT_REPO}/.claude/settings.json"
72
+ _append_text_turn "user" "$(printf 'long enough %.0s' {1..30})"
73
+ _append_text_turn "assistant" "ok"
74
+
75
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
76
+ [ "$status" -eq 0 ]
77
+ [ ! -f "${HIST_DIR}/sessions/${SESSION_ID}.jsonl" ]
78
+ [ ! -f "$ONLOOKER_EVENTS_LOG" ] || ! grep -q 'historian' "$ONLOOKER_EVENTS_LOG"
79
+ }
80
+
81
+ @test "session-end emits skip_reason transcript_unavailable when path missing" {
82
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
83
+ [ "$status" -eq 0 ]
84
+ grep '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG" \
85
+ | jq -e '.payload.outcome == "skipped" and .payload.skip_reason == "transcript_unavailable"' >/dev/null
86
+ }
87
+
88
+ @test "session-end emits skip_reason too_short for a tiny transcript" {
89
+ _append_text_turn "user" "hi"
90
+ _append_text_turn "assistant" "yo"
91
+
92
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
93
+ [ "$status" -eq 0 ]
94
+ grep '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG" \
95
+ | jq -e '.payload.outcome == "skipped" and .payload.skip_reason == "too_short"' >/dev/null
96
+ }
97
+
98
+ @test "session-end indexes a real transcript with provenance" {
99
+ _append_text_turn "user" "Investigating a flaky test in the auth middleware path. The CI run https://example.com/foo failed on retry 3."
100
+ _append_text_turn "assistant" "Looking at it now. The root cause is a race between session token cache invalidation and the redirect retry loop."
101
+ _append_text_turn "user" "What's the proposed fix?"
102
+ _append_text_turn "assistant" "Move cache invalidation into the redirect handler, so it runs before the retry, not concurrently."
103
+
104
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
105
+ [ "$status" -eq 0 ]
106
+
107
+ local count
108
+ count=$(_chunk_count)
109
+ [ "$count" -ge 1 ]
110
+
111
+ jq -e '.chunk_id != null and .session_id != null and .body_redacted != null
112
+ and .body_chars > 0 and .chunk_index >= 0
113
+ and .start_turn_index >= 0 and .end_turn_index >= .start_turn_index' \
114
+ "${HIST_DIR}/sessions/${SESSION_ID}.jsonl" >/dev/null
115
+
116
+ grep '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG" \
117
+ | jq -e ".payload.outcome == \"ok\" and .payload.chunks_indexed == $count" >/dev/null
118
+ }
119
+
120
+ @test "session-end redacts secret-shaped substrings" {
121
+ # Construct secret-shaped strings at runtime to keep the literal
122
+ # patterns out of the bats source file (the repo's secret-scanner
123
+ # PreToolUse hook would otherwise refuse to write this file).
124
+ local fake_aws fake_gh fake_anthropic
125
+ fake_aws="A${KIA_PREFIX:-KIA}ABCDEFGHIJKLMNOP"
126
+ fake_aws="AK${fake_aws:1}"
127
+ fake_gh="g""hp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
128
+ fake_anthropic="s""k-ant-veryverylongtokenvalue1234"
129
+ local turn_body
130
+ turn_body=$(printf "Here's an AWS key: %s. And a GitHub PAT %s. And API_TOKEN=%s. And Bearer abcdefghijklmnopqrstuvwxyz." \
131
+ "$fake_aws" "$fake_gh" "$fake_anthropic")
132
+ _append_text_turn "user" "$turn_body"
133
+
134
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
135
+ [ "$status" -eq 0 ]
136
+
137
+ local jsonl="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
138
+ ! grep -F -q "$fake_aws" "$jsonl"
139
+ ! grep -F -q "$fake_gh" "$jsonl"
140
+ ! grep -F -q "$fake_anthropic" "$jsonl"
141
+ grep -q 'REDACTED:secret' "$jsonl"
142
+ jq -e '.redaction_count > 0' "$jsonl" >/dev/null
143
+ grep -q '"event_type":"historian.chunk.sanitized"' "$ONLOOKER_EVENTS_LOG"
144
+ }
145
+
146
+ @test "session-end drops chunks containing the skip marker" {
147
+ local marker
148
+ marker='[hist''orian:skip]' # split literal so this source file does not embed it
149
+ _append_text_turn "user" "$(printf 'normal turn %.0s' {1..30})"
150
+ _append_text_turn "assistant" "ack"
151
+ _append_text_turn "user" "this turn is meant to be sensitive ${marker} please ignore"
152
+ _append_text_turn "assistant" "$(printf 'second turn %.0s' {1..30})"
153
+
154
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
155
+ [ "$status" -eq 0 ]
156
+ [ "$(_chunk_count)" -ge 1 ]
157
+
158
+ ! grep -F -q "$marker" "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
159
+ ! grep -q 'meant to be sensitive' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
160
+
161
+ grep '"event_type":"historian.chunk.dropped"' "$ONLOOKER_EVENTS_LOG" \
162
+ | jq -e '.payload.reason == "skip_marker"' >/dev/null
163
+ }
164
+
165
+ @test "session-end drops chunks referencing never_index_paths" {
166
+ printf '%s\n' \
167
+ '{"historian":{"enabled":true,"indexing":{"min_transcript_chars_to_index":50,"chunk_target_chars":400,"chunk_overlap_chars":50},"sanitization":{"never_index_paths":["restricted/notes.md"]}}}' \
168
+ > "${PROJECT_REPO}/.claude/settings.json"
169
+
170
+ _append_text_turn "user" "$(printf 'first chunk %.0s' {1..30})"
171
+ _append_text_turn "assistant" "second turn references restricted/notes.md which must be dropped from the index entirely"
172
+ _append_text_turn "user" "$(printf 'third chunk %.0s' {1..30})"
173
+
174
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
175
+ [ "$status" -eq 0 ]
176
+
177
+ ! grep -q 'restricted/notes.md' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
178
+
179
+ grep '"event_type":"historian.chunk.dropped"' "$ONLOOKER_EVENTS_LOG" \
180
+ | jq -e '.payload.reason == "never_index_path"' >/dev/null
181
+ }
182
+
183
+ @test "session-end drops tool_use blocks before chunking" {
184
+ _append_text_turn "user" "$(printf 'long enough %.0s' {1..30})"
185
+ _append_block_turn "assistant" "Plain spoken assistant text that should appear in the index."
186
+
187
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
188
+ [ "$status" -eq 0 ]
189
+
190
+ [ "$(_chunk_count)" -ge 1 ]
191
+ grep -q 'Plain spoken assistant text' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
192
+ ! grep -q 'tool_use' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
193
+ ! grep -q '/tmp/x' "${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
194
+ }
195
+
196
+ @test "session-end is idempotent on re-run (replaces, not appends)" {
197
+ _append_text_turn "user" "$(printf 'first index %.0s' {1..30})"
198
+ _append_text_turn "assistant" "$(printf 'response %.0s' {1..30})"
199
+
200
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
201
+ [ "$status" -eq 0 ]
202
+ local first_count
203
+ first_count=$(_chunk_count)
204
+ [ "$first_count" -ge 1 ]
205
+
206
+ rm -f "$ONLOOKER_EVENTS_LOG"
207
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
208
+ [ "$status" -eq 0 ]
209
+ local second_count
210
+ second_count=$(_chunk_count)
211
+ [ "$second_count" -eq "$first_count" ]
212
+
213
+ grep '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG" \
214
+ | jq -e ".payload.outcome == \"ok\" and .payload.chunks_indexed == $second_count" >/dev/null
215
+ }
216
+
217
+ @test "Bearer token redaction is case-insensitive" {
218
+ # Lowercase + mixed-case bearer variants — Copilot caught that the
219
+ # original regex only matched the title-case "Bearer" form.
220
+ local lower mixed
221
+ lower="b""earer abcdefghijklmnopqrstuvwxyz1234"
222
+ mixed="B""EARER zyxwvutsrqponmlkjihgfedcba98765432"
223
+ local body
224
+ body=$(printf "Headers: %s; also %s; padding here for length." "$lower" "$mixed")
225
+ _append_text_turn "user" "$body"
226
+
227
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
228
+ [ "$status" -eq 0 ]
229
+
230
+ local jsonl="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
231
+ ! grep -F -q "abcdefghijklmnopqrstuvwxyz1234" "$jsonl"
232
+ ! grep -F -q "zyxwvutsrqponmlkjihgfedcba98765432" "$jsonl"
233
+ grep -q 'REDACTED:secret' "$jsonl"
234
+ }
235
+
236
+ @test "redact_secret_patterns=false leaves secret-shaped strings untouched" {
237
+ printf '%s\n' \
238
+ '{"historian":{"enabled":true,"indexing":{"min_transcript_chars_to_index":50,"chunk_target_chars":400,"chunk_overlap_chars":50},"sanitization":{"redact_secret_patterns":false}}}' \
239
+ > "${PROJECT_REPO}/.claude/settings.json"
240
+
241
+ # Synthetic AWS-shaped string. Without redaction it should pass through
242
+ # to the JSONL verbatim; the chunk's redaction_count should be 0.
243
+ local fake_aws="AK""IAABCDEFGHIJKLMNOP"
244
+ _append_text_turn "user" "Header: AWS=$fake_aws — please do not redact this value because the user explicitly opted out."
245
+
246
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
247
+ [ "$status" -eq 0 ]
248
+
249
+ local jsonl="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
250
+ grep -F -q "$fake_aws" "$jsonl"
251
+ ! grep -q 'REDACTED:secret' "$jsonl"
252
+ jq -e '.redaction_count == 0' "$jsonl" >/dev/null
253
+ }
254
+
255
+ @test "drop_skip_marker=false keeps chunks containing the marker" {
256
+ printf '%s\n' \
257
+ '{"historian":{"enabled":true,"indexing":{"min_transcript_chars_to_index":50,"chunk_target_chars":400,"chunk_overlap_chars":50},"sanitization":{"drop_skip_marker":false}}}' \
258
+ > "${PROJECT_REPO}/.claude/settings.json"
259
+
260
+ local marker
261
+ marker='[hist''orian:skip]'
262
+ _append_text_turn "user" "Body that contains the ${marker} marker but should still be indexed when the flag is disabled. Padding to clear the min-chars threshold easily."
263
+
264
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
265
+ [ "$status" -eq 0 ]
266
+
267
+ local jsonl="${HIST_DIR}/sessions/${SESSION_ID}.jsonl"
268
+ [ "$(_chunk_count)" -ge 1 ]
269
+ grep -F -q "$marker" "$jsonl"
270
+ ! grep '"event_type":"historian.chunk.dropped"' "$ONLOOKER_EVENTS_LOG" \
271
+ | jq -e 'select(.payload.reason == "skip_marker")' >/dev/null || true
272
+ }
273
+
274
+ @test "historian.indexing.started reports a non-zero transcript_chars" {
275
+ # Previously the started event emitted transcript_chars: 0 because it
276
+ # fired before the transcript was read. Now it fires after the read,
277
+ # carrying the real character count.
278
+ _append_text_turn "user" "$(printf 'long enough for chars %.0s' {1..20})"
279
+ _append_text_turn "assistant" "$(printf 'response with content %.0s' {1..20})"
280
+
281
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
282
+ [ "$status" -eq 0 ]
283
+
284
+ grep '"event_type":"historian.indexing.started"' "$ONLOOKER_EVENTS_LOG" \
285
+ | jq -e '.payload.transcript_chars > 0' >/dev/null
286
+ }
287
+
288
+ @test "transcript_unavailable path emits complete without a started event" {
289
+ # When the transcript path is missing we never read it, so no started
290
+ # event makes it to the log. Only the complete-with-skip remains.
291
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
292
+ [ "$status" -eq 0 ]
293
+
294
+ ! grep -q '"event_type":"historian.indexing.started"' "$ONLOOKER_EVENTS_LOG"
295
+ grep -q '"event_type":"historian.indexing.complete"' "$ONLOOKER_EVENTS_LOG"
296
+ }