@onlooker-community/ecosystem 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.claude-plugin/marketplace.json +13 -0
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.release-please-manifest.json +4 -2
  4. package/CHANGELOG.md +14 -0
  5. package/CLAUDE.md +1 -0
  6. package/docs/memory-architecture.md +102 -0
  7. package/package.json +3 -3
  8. package/plugins/curator/docs/adr/001-staleness-tiers.md +100 -0
  9. package/plugins/curator/docs/design.md +311 -0
  10. package/plugins/historian/docs/adr/001-local-embeddings-only.md +96 -0
  11. package/plugins/historian/docs/design.md +317 -0
  12. package/plugins/librarian/.claude-plugin/plugin.json +14 -0
  13. package/plugins/librarian/CHANGELOG.md +10 -0
  14. package/plugins/librarian/README.md +51 -0
  15. package/plugins/librarian/config.json +52 -0
  16. package/plugins/librarian/docs/adr/001-propose-dont-auto-write.md +87 -0
  17. package/plugins/librarian/docs/design.md +301 -0
  18. package/plugins/librarian/hooks/hooks.json +26 -0
  19. package/plugins/librarian/scripts/hooks/librarian-session-end.sh +312 -0
  20. package/plugins/librarian/scripts/hooks/librarian-session-start.sh +103 -0
  21. package/plugins/librarian/scripts/lib/librarian-archivist-reader.sh +67 -0
  22. package/plugins/librarian/scripts/lib/librarian-classifier.sh +139 -0
  23. package/plugins/librarian/scripts/lib/librarian-config.sh +74 -0
  24. package/plugins/librarian/scripts/lib/librarian-durability.sh +77 -0
  25. package/plugins/librarian/scripts/lib/librarian-emit.sh +72 -0
  26. package/plugins/librarian/scripts/lib/librarian-project-key.sh +83 -0
  27. package/plugins/librarian/scripts/lib/librarian-storage.sh +222 -0
  28. package/plugins/librarian/scripts/lib/librarian-ulid.sh +50 -0
  29. package/plugins/warden/.claude-plugin/plugin.json +14 -0
  30. package/plugins/warden/CHANGELOG.md +10 -0
  31. package/plugins/warden/config.json +51 -0
  32. package/plugins/warden/docs/adr/001-detect-after-ingest-gate-before-action.md +62 -0
  33. package/plugins/warden/docs/design.md +123 -0
  34. package/plugins/warden/hooks/hooks.json +73 -0
  35. package/plugins/warden/scripts/hooks/warden-post-tool-use.sh +201 -0
  36. package/plugins/warden/scripts/hooks/warden-pre-tool-use.sh +94 -0
  37. package/plugins/warden/scripts/hooks/warden-session-start.sh +52 -0
  38. package/plugins/warden/scripts/lib/warden-cli.sh +124 -0
  39. package/plugins/warden/scripts/lib/warden-config.sh +79 -0
  40. package/plugins/warden/scripts/lib/warden-evaluator.sh +246 -0
  41. package/plugins/warden/scripts/lib/warden-events.sh +85 -0
  42. package/plugins/warden/scripts/lib/warden-gate-state.sh +105 -0
  43. package/plugins/warden/scripts/lib/warden-patterns.sh +132 -0
  44. package/plugins/warden/scripts/lib/warden-sanitizer.sh +80 -0
  45. package/plugins/warden/scripts/lib/warden-scanner.sh +119 -0
  46. package/plugins/warden/scripts/lib/warden-ulid.sh +50 -0
  47. package/plugins/warden/skills/warden/SKILL.md +49 -0
  48. package/release-please-config.json +32 -0
  49. package/test/bats/librarian-session-end.bats +182 -0
  50. package/test/bats/librarian-session-start.bats +136 -0
  51. package/test/bats/warden-config.bats +54 -0
  52. package/test/bats/warden-events.bats +85 -0
  53. package/test/bats/warden-gate-state.bats +67 -0
  54. package/test/bats/warden-patterns.bats +58 -0
  55. package/test/bats/warden-sanitizer.bats +53 -0
  56. package/test/bats/warden-scanner.bats +56 -0
  57. package/test/bats/warden-ulid.bats +30 -0
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env bash
2
+ # Hybrid scanner orchestration for Warden.
3
+ #
4
+ # Combines the deterministic pattern floor (warden-patterns.sh) with optional
5
+ # LLM escalation (warden-evaluator.sh):
6
+ #
7
+ # strong pattern hit → detected immediately (no model call)
8
+ # weak pattern hit → escalate to the evaluator when enabled; otherwise
9
+ # fall back to the weak-pattern confidence
10
+ # no hit → clean (no model call)
11
+ #
12
+ # On evaluator error the scanner falls back to the pattern verdict, so a model
13
+ # outage degrades coverage but never silently closes the gate on every read.
14
+ #
15
+ # Depends on (sourced by the caller):
16
+ # warden-config.sh · warden-patterns.sh · warden-sanitizer.sh · warden-evaluator.sh
17
+ #
18
+ # Exposes:
19
+ # warden_scan <source_type> <content>
20
+ # → JSON {"detected":bool, "threat_type":"<t>", "confidence":<f>,
21
+ # "matched_pattern":"<p>", "method":"<m>", "rationale":"<str>"}
22
+
23
+ # awk-based float >= comparison. Returns 0 (true) if $1 >= $2.
24
+ #
25
+ # Values are passed via `awk -v` (data), never interpolated into the program
26
+ # string: thresholds can originate from repo-level .claude/settings.json, which
27
+ # is untrusted under warden's threat model. -v also makes non-numeric input
28
+ # degrade to 0 rather than executing as awk code.
29
+ _warden_ge() {
30
+ awk -v a="${1:-0}" -v b="${2:-0}" 'BEGIN {exit !(a >= b)}' 2>/dev/null
31
+ }
32
+
33
+ _warden_scan_result() {
34
+ local detected="$1" threat="$2" confidence="$3" pattern="$4" method="$5" rationale="$6"
35
+ jq -n \
36
+ --argjson detected "$detected" \
37
+ --arg t "$threat" \
38
+ --argjson c "${confidence:-0}" \
39
+ --arg p "$pattern" \
40
+ --arg m "$method" \
41
+ --arg r "$rationale" \
42
+ '{detected:$detected, threat_type:$t, confidence:$c, matched_pattern:$p, method:$m, rationale:$r}' \
43
+ 2>/dev/null \
44
+ || printf '{"detected":%s,"threat_type":"%s","confidence":%s,"matched_pattern":"%s","method":"%s","rationale":"%s"}' \
45
+ "$detected" "$threat" "${confidence:-0}" "$pattern" "$method" "$rationale"
46
+ }
47
+
48
+ warden_scan() {
49
+ local source_type="$1"
50
+ local content="$2"
51
+
52
+ local close_threshold strong_conf weak_conf
53
+ close_threshold=$(warden_config_get '.warden.detection.close_threshold')
54
+ close_threshold="${close_threshold:-0.65}"
55
+ strong_conf=$(warden_config_get '.warden.detection.strong_pattern_confidence')
56
+ strong_conf="${strong_conf:-0.9}"
57
+ weak_conf=$(warden_config_get '.warden.detection.weak_pattern_confidence')
58
+ weak_conf="${weak_conf:-0.5}"
59
+
60
+ local classify severity threat pattern
61
+ classify=$(warden_pattern_classify "$content")
62
+ severity=$(printf '%s' "$classify" | jq -r '.severity // "none"' 2>/dev/null) || severity="none"
63
+ threat=$(printf '%s' "$classify" | jq -r '.threat_type // "none"' 2>/dev/null) || threat="none"
64
+ pattern=$(printf '%s' "$classify" | jq -r '.matched_pattern // ""' 2>/dev/null) || pattern=""
65
+
66
+ # ---- Clean: no signal at all. ------------------------------------
67
+ if [[ "$severity" == "none" ]]; then
68
+ _warden_scan_result false "none" 0 "" "none" "no injection pattern matched"
69
+ return 0
70
+ fi
71
+
72
+ # ---- Strong: explicit, high-precision phrasing. ------------------
73
+ if [[ "$severity" == "strong" ]]; then
74
+ local detected="false"
75
+ _warden_ge "$strong_conf" "$close_threshold" && detected="true"
76
+ _warden_scan_result "$detected" "$threat" "$strong_conf" "$pattern" "pattern_strong" "matched a strong injection signature"
77
+ return 0
78
+ fi
79
+
80
+ # ---- Weak: borderline. Escalate when enabled. --------------------
81
+ local escalation_enabled
82
+ escalation_enabled=$(warden_config_get '.warden.escalation.enabled')
83
+ escalation_enabled="${escalation_enabled:-true}"
84
+
85
+ if [[ "$escalation_enabled" == "true" ]]; then
86
+ local max_chars excerpt
87
+ max_chars=$(warden_config_get '.warden.scan.max_content_chars')
88
+ max_chars="${max_chars:-20000}"
89
+ excerpt=$(warden_sanitize "$content" "$max_chars")
90
+
91
+ local eval_result decision eval_conf eval_threat eval_rationale
92
+ eval_result=$(warden_evaluate "$source_type" "$excerpt" "$threat")
93
+ decision=$(printf '%s' "$eval_result" | jq -r '.decision // "error"' 2>/dev/null) || decision="error"
94
+ eval_conf=$(printf '%s' "$eval_result" | jq -r '.confidence // 0' 2>/dev/null) || eval_conf="0"
95
+ eval_threat=$(printf '%s' "$eval_result" | jq -r '.threat_type // "none"' 2>/dev/null) || eval_threat="none"
96
+ eval_rationale=$(printf '%s' "$eval_result" | jq -r '.rationale // ""' 2>/dev/null) || eval_rationale=""
97
+
98
+ if [[ "$decision" == "injection" ]]; then
99
+ [[ "$eval_threat" == "none" || -z "$eval_threat" ]] && eval_threat="$threat"
100
+ local detected="false"
101
+ _warden_ge "$eval_conf" "$close_threshold" && detected="true"
102
+ _warden_scan_result "$detected" "$eval_threat" "$eval_conf" "$pattern" "escalation" "$eval_rationale"
103
+ return 0
104
+ fi
105
+
106
+ if [[ "$decision" == "clean" ]]; then
107
+ _warden_scan_result false "none" "$eval_conf" "$pattern" "escalation" "evaluator judged the borderline content clean"
108
+ return 0
109
+ fi
110
+
111
+ # decision == error → fall back to the weak-pattern verdict below.
112
+ fi
113
+
114
+ # ---- Weak fallback: no escalation, or evaluator errored. ---------
115
+ local detected="false"
116
+ _warden_ge "$weak_conf" "$close_threshold" && detected="true"
117
+ _warden_scan_result "$detected" "$threat" "$weak_conf" "$pattern" "pattern_weak" "weak injection signal; escalation unavailable"
118
+ return 0
119
+ }
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env bash
2
+ # Minimal ULID generator for Warden threat_id values.
3
+ #
4
+ # Spec: https://github.com/ulid/spec
5
+ # - 48-bit timestamp (ms since epoch) → 10 chars Crockford Base32
6
+ # - 80-bit randomness → 16 chars Crockford Base32
7
+ # - lexicographically sortable, time-ordered
8
+
9
+ _WARDEN_ULID_ALPHABET="0123456789ABCDEFGHJKMNPQRSTVWXYZ"
10
+
11
+ _warden_ulid_encode() {
12
+ local n="$1"
13
+ local len="$2"
14
+ local out=""
15
+ local i
16
+ for ((i = 0; i < len; i++)); do
17
+ out="${_WARDEN_ULID_ALPHABET:$((n % 32)):1}${out}"
18
+ n=$((n / 32))
19
+ done
20
+ printf '%s' "$out"
21
+ }
22
+
23
+ warden_ulid() {
24
+ local now_ms
25
+ if [[ "$(uname)" == "Darwin" ]]; then
26
+ now_ms=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
27
+ || now_ms=$(($(date +%s) * 1000))
28
+ else
29
+ now_ms=$(date +%s%3N 2>/dev/null) || now_ms=$(($(date +%s) * 1000))
30
+ fi
31
+
32
+ local rand_hex rand_hi rand_lo
33
+ rand_hex=$(openssl rand -hex 10 2>/dev/null)
34
+ if [[ -n "$rand_hex" && ${#rand_hex} -eq 20 ]]; then
35
+ rand_hi=$((16#${rand_hex:0:10}))
36
+ rand_lo=$((16#${rand_hex:10:10}))
37
+ else
38
+ rand_hi=$((RANDOM * 32768 + RANDOM))
39
+ rand_lo=$((RANDOM * 32768 + RANDOM))
40
+ rand_hi=$(((rand_hi * 256 + RANDOM % 256) & ((1 << 40) - 1)))
41
+ rand_lo=$(((rand_lo * 256 + RANDOM % 256) & ((1 << 40) - 1)))
42
+ fi
43
+
44
+ local ts_part hi_part lo_part
45
+ ts_part=$(_warden_ulid_encode "$now_ms" 10)
46
+ hi_part=$(_warden_ulid_encode "$rand_hi" 8)
47
+ lo_part=$(_warden_ulid_encode "$rand_lo" 8)
48
+
49
+ printf '%s%s%s' "$ts_part" "$hi_part" "$lo_part"
50
+ }
@@ -0,0 +1,49 @@
1
+ ---
2
+ name: warden
3
+ description: Inspect and control the Warden content gate. Shows whether the session's content gate is open or closed and the threat that closed it (`/warden` or `/warden status`), and explicitly clears a closed gate to re-enable Write/Edit/Bash (`/warden clear`). Clearing is the only sanctioned way to reopen the gate — it records a user override in the warden.* event stream. Use when Warden has blocked a write/edit/bash operation, or when the user asks to check or clear the content gate.
4
+ ---
5
+
6
+ # Warden: Content Gate Control
7
+
8
+ You are operating the **Warden** content gate — the user-facing control surface for the gate that Warden's hooks open and close automatically.
9
+
10
+ Warden enforces Meta's **Agents Rule of Two**: an agent should hold at most two of {access to private data, ability to take external actions, processing of untrusted content}. When Warden's detection hook finds an injection pattern in content ingested via WebFetch or Read, it closes a session-scoped gate that revokes the *external actions* property — blocking Write, Edit, MultiEdit, and Bash until the user explicitly clears it. This skill is that explicit clear (and a status readout).
11
+
12
+ ## Parse the request
13
+
14
+ Read the user's argument after `/warden`:
15
+
16
+ - no argument, or `status` → **status** action
17
+ - `clear`, `reopen`, `override`, `unblock` → **clear** action
18
+
19
+ If the user passed a session id explicitly (rare), capture it as the optional second argument.
20
+
21
+ ## Run the control surface
22
+
23
+ Source the plugin helpers and invoke `warden_cli`. Run this in a single bash call:
24
+
25
+ ```bash
26
+ set -uo pipefail
27
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/warden-config.sh"
28
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/warden-events.sh"
29
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/warden-gate-state.sh"
30
+ source "$CLAUDE_PLUGIN_ROOT/scripts/lib/warden-cli.sh"
31
+
32
+ # action is "status" or "clear"; SESSION_ID_ARG is optional and usually empty.
33
+ warden_cli "<action>" "${SESSION_ID_ARG:-}"
34
+ ```
35
+
36
+ `warden_cli` resolves the session automatically: it prefers `$CLAUDE_SESSION_ID`, falls back to the single closed gate if exactly one exists, and reports ambiguity if several sessions have closed gates (re-run with an explicit session id in that case).
37
+
38
+ ## Behavior
39
+
40
+ - **status** — prints whether the gate is OPEN or CLOSED. When closed, prints the recorded threat: `threat_type`, `source_type`, source URL/path, confidence, detection method, matched pattern, and the flagged snippet (if storage is enabled).
41
+ - **clear** — verifies the gate is closed, removes the lock, and emits `warden.threat.cleared` with `cleared_by: user_override`. This re-enables Write/Edit/Bash for the session.
42
+
43
+ ## After clearing
44
+
45
+ When you clear the gate on the user's behalf:
46
+
47
+ 1. Confirm the gate is reopened and name the source that triggered it.
48
+ 2. Remind the user briefly that the flagged content is still in the conversation context — clearing the gate does not remove it. If they have not reviewed the source, suggest they do before continuing with external actions.
49
+ 3. Do not clear a gate the user has not asked you to clear. Closing is automatic; clearing is always a deliberate user decision.
@@ -142,6 +142,38 @@
142
142
  "jsonpath": "$.version"
143
143
  }
144
144
  ]
145
+ },
146
+ "plugins/warden": {
147
+ "changelog-path": "CHANGELOG.md",
148
+ "release-type": "simple",
149
+ "bump-minor-pre-major": true,
150
+ "bump-patch-for-minor-pre-major": false,
151
+ "component": "warden",
152
+ "draft": false,
153
+ "prerelease": false,
154
+ "extra-files": [
155
+ {
156
+ "type": "json",
157
+ "path": ".claude-plugin/plugin.json",
158
+ "jsonpath": "$.version"
159
+ }
160
+ ]
161
+ },
162
+ "plugins/librarian": {
163
+ "changelog-path": "CHANGELOG.md",
164
+ "release-type": "simple",
165
+ "bump-minor-pre-major": true,
166
+ "bump-patch-for-minor-pre-major": false,
167
+ "component": "librarian",
168
+ "draft": false,
169
+ "prerelease": false,
170
+ "extra-files": [
171
+ {
172
+ "type": "json",
173
+ "path": ".claude-plugin/plugin.json",
174
+ "jsonpath": "$.version"
175
+ }
176
+ ]
145
177
  }
146
178
  },
147
179
  "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json"
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env bats
2
+ #
3
+ # Exercises the librarian SessionEnd scan pipeline end-to-end with a stub
4
+ # `claude` CLI. Verifies:
5
+ # - Disabled config: no proposals, no events.
6
+ # - Empty archivist dir: scan.started + scan.complete{outcome: empty}
7
+ # emitted, watermark advances.
8
+ # - Synthetic artifacts that pass durability filter and classifier:
9
+ # proposals land on disk with the expected provenance and scan events
10
+ # report the correct counts.
11
+ # - Durability-filtered artifacts (no marker phrase) emit
12
+ # candidate.dropped events.
13
+
14
+ setup() {
15
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
16
+ setup_test_env
17
+
18
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/librarian"
19
+ export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
20
+ export ONLOOKER_ECOSYSTEM_ROOT="$REPO_ROOT"
21
+
22
+ # Stand up a fake project repo so project-key resolution succeeds.
23
+ PROJECT_REPO="${BATS_TEST_TMPDIR}/repo"
24
+ mkdir -p "$PROJECT_REPO"
25
+ git -C "$PROJECT_REPO" init -q
26
+ git -C "$PROJECT_REPO" config user.email t@example.com
27
+ git -C "$PROJECT_REPO" config user.name "Test"
28
+ git -C "$PROJECT_REPO" remote add origin git@github.com:org/librarian-scan-test.git
29
+
30
+ # shellcheck disable=SC1091
31
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-project-key.sh"
32
+ PROJECT_KEY=$(librarian_project_key "$PROJECT_REPO")
33
+ [ -n "$PROJECT_KEY" ]
34
+
35
+ ARCHIVIST_DIR="${ONLOOKER_DIR}/archivist/${PROJECT_KEY}"
36
+ LIBRARIAN_DIR="${ONLOOKER_DIR}/librarian/${PROJECT_KEY}"
37
+ ONLOOKER_EVENTS_LOG="${ONLOOKER_DIR}/logs/onlooker-events.jsonl"
38
+
39
+ # Project-scoped settings.json that enables librarian.
40
+ mkdir -p "${PROJECT_REPO}/.claude"
41
+ printf '%s\n' '{"librarian":{"enabled":true}}' > "${PROJECT_REPO}/.claude/settings.json"
42
+
43
+ # Stub `claude` CLI on PATH. Returns a deterministic classifier response
44
+ # based on the artifact's summary contents.
45
+ STUB_BIN="${BATS_TEST_TMPDIR}/bin"
46
+ mkdir -p "$STUB_BIN"
47
+ cat > "${STUB_BIN}/claude" <<'STUB'
48
+ #!/usr/bin/env bash
49
+ # Read the prompt from stdin and decide which classifier response to emit.
50
+ prompt=$(cat)
51
+ if [[ "$prompt" == *"prefer-functional-stub"* ]]; then
52
+ printf '%s' '{"type":"feedback","title":"Prefer functional patterns","body":"User prefers functional patterns over class-based.\n\n**Why:** Stated explicitly during code review.\n**How to apply:** Default to plain functions and composition.","confidence":0.84}'
53
+ elif [[ "$prompt" == *"compliance-stub"* ]]; then
54
+ printf '%s' '{"type":"project","title":"Auth rewrite is compliance driven","body":"Auth middleware rewrite is driven by legal/compliance requirements around session token storage.\n\n**Why:** Compliance ask, not tech debt cleanup.\n**How to apply:** Favor compliance posture over ergonomics when scoping.","confidence":0.91}'
55
+ elif [[ "$prompt" == *"low-conf-stub"* ]]; then
56
+ printf '%s' '{"type":"user","title":"User edits","body":"User edits files.","confidence":0.4}'
57
+ else
58
+ printf '%s' '{"type":null,"title":"","body":"","confidence":0.2}'
59
+ fi
60
+ STUB
61
+ chmod +x "${STUB_BIN}/claude"
62
+ export PATH="${STUB_BIN}:${PATH}"
63
+
64
+ HOOK="${PLUGIN_ROOT}/scripts/hooks/librarian-session-end.sh"
65
+ }
66
+
67
+ # Helper: write an archivist artifact for the project.
68
+ _seed_artifact() {
69
+ local kind="$1" id="$2" summary="$3" detail="$4" created_at="${5:-2026-06-01T12:00:00Z}"
70
+ local dir="${ARCHIVIST_DIR}/${kind}"
71
+ mkdir -p "$dir"
72
+ jq -n \
73
+ --arg id "$id" --arg kind "${kind%s}" \
74
+ --arg project_key "$PROJECT_KEY" \
75
+ --arg summary "$summary" --arg detail "$detail" \
76
+ --arg created_at "$created_at" --arg session_id "sess-1" \
77
+ '{ id: $id, kind: $kind, project_key: $project_key, source: "local",
78
+ created_at: $created_at, updated_at: $created_at,
79
+ summary: $summary, detail: $detail, files: [], session_id: $session_id }' \
80
+ > "${dir}/${id}.json"
81
+ }
82
+
83
+ _hook_input() {
84
+ jq -cn --arg cwd "$PROJECT_REPO" --arg sid "sess-end-test" \
85
+ '{cwd: $cwd, session_id: $sid, hook_event_name: "SessionEnd"}'
86
+ }
87
+
88
+ @test "session-end is a no-op when librarian is disabled" {
89
+ rm -f "${PROJECT_REPO}/.claude/settings.json"
90
+ run bash -c "printf '%s' '$(_hook_input)' | '$HOOK'"
91
+ [ "$status" -eq 0 ]
92
+ # No proposals written.
93
+ [ ! -d "${LIBRARIAN_DIR}/proposals" ] || [ -z "$(ls -A "${LIBRARIAN_DIR}/proposals" 2>/dev/null)" ]
94
+ # No events emitted.
95
+ [ ! -f "$ONLOOKER_EVENTS_LOG" ] || ! grep -q 'librarian' "$ONLOOKER_EVENTS_LOG"
96
+ }
97
+
98
+ @test "session-end emits empty scan when archivist has nothing" {
99
+ run bash -c "printf '%s' '$(_hook_input)' | '$HOOK'"
100
+ [ "$status" -eq 0 ]
101
+
102
+ # scan.started fired with artifact_count_in_window = 0.
103
+ grep -q '"event_type":"librarian.scan.started"' "$ONLOOKER_EVENTS_LOG"
104
+ grep '"event_type":"librarian.scan.started"' "$ONLOOKER_EVENTS_LOG" \
105
+ | jq -e '.payload.artifact_count_in_window == 0' >/dev/null
106
+
107
+ # scan.complete fired with outcome=empty and zero counts.
108
+ grep -q '"event_type":"librarian.scan.complete"' "$ONLOOKER_EVENTS_LOG"
109
+ grep '"event_type":"librarian.scan.complete"' "$ONLOOKER_EVENTS_LOG" \
110
+ | jq -e '.payload.outcome == "empty" and .payload.candidates_proposed == 0 and .payload.candidates_dropped == 0' >/dev/null
111
+
112
+ # Watermark advanced for next scan.
113
+ [ -f "${LIBRARIAN_DIR}/last_scan.json" ]
114
+ jq -e '.scanned_at | test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T")' "${LIBRARIAN_DIR}/last_scan.json" >/dev/null
115
+ }
116
+
117
+ @test "session-end proposes promotion for marker-phrase + classifier success" {
118
+ # Seed two promotable artifacts and one filter-rejected one.
119
+ _seed_artifact "decisions" "01PROPOSEEFEEDBACK00000000" \
120
+ "User prefers functional patterns prefer-functional-stub" \
121
+ "User explicitly said: always prefer plain functions over classes when adding new code in the api layer."
122
+
123
+ _seed_artifact "decisions" "01PROPOSEEPROJECT000000000" \
124
+ "Compliance-driven auth rewrite compliance-stub" \
125
+ "The reason for the auth middleware rewrite is legal compliance, not tech debt; remember this when sizing scope."
126
+
127
+ _seed_artifact "open_questions" "01FILTERREJECTED000000000" \
128
+ "ad hoc question" \
129
+ "this short text contains no marker phrase and should be filtered out before the classifier runs"
130
+
131
+ run bash -c "printf '%s' '$(_hook_input)' | '$HOOK'"
132
+ [ "$status" -eq 0 ]
133
+
134
+ # Two proposals on disk.
135
+ proposals=("${LIBRARIAN_DIR}/proposals"/*.json)
136
+ [ "${#proposals[@]}" -eq 2 ]
137
+
138
+ # Both carry provenance back to their source artifact.
139
+ for p in "${proposals[@]}"; do
140
+ jq -e '.status == "pending" and .conflict_state == "none"' "$p" >/dev/null
141
+ jq -e '.proposed.type | IN("user", "feedback", "project", "reference")' "$p" >/dev/null
142
+ jq -e '.proposed.classifier_confidence >= 0.6' "$p" >/dev/null
143
+ jq -e '(.source_artifact_ids | length) > 0' "$p" >/dev/null
144
+ done
145
+
146
+ # scan.started reported the right window size (2 marker matches + 1 filtered = 3).
147
+ grep '"event_type":"librarian.scan.started"' "$ONLOOKER_EVENTS_LOG" \
148
+ | jq -e '.payload.artifact_count_in_window == 3' >/dev/null
149
+
150
+ # candidate.proposed fired twice with correct types.
151
+ proposed_types=$(grep '"event_type":"librarian.candidate.proposed"' "$ONLOOKER_EVENTS_LOG" \
152
+ | jq -r '.payload.memory_type' | sort | paste -sd, -)
153
+ [ "$proposed_types" = "feedback,project" ]
154
+
155
+ # candidate.dropped fired for the marker-missing artifact.
156
+ grep '"event_type":"librarian.candidate.dropped"' "$ONLOOKER_EVENTS_LOG" \
157
+ | jq -e 'select(.payload.reason == "filter_marker_missing")' >/dev/null
158
+
159
+ # scan.complete with ok outcome and accurate counts.
160
+ scan_complete=$(grep '"event_type":"librarian.scan.complete"' "$ONLOOKER_EVENTS_LOG")
161
+ echo "$scan_complete" | jq -e '.payload.outcome == "ok" and .payload.candidates_proposed == 2 and .payload.candidates_dropped >= 1' >/dev/null
162
+ }
163
+
164
+ @test "session-end drops candidates below confidence floor" {
165
+ _seed_artifact "decisions" "01LOWCONFCANDIDATE0000000" \
166
+ "low-conf-stub trigger" \
167
+ "always prefer some thing because reasons that show a marker phrase but the stub returns low confidence"
168
+
169
+ run bash -c "printf '%s' '$(_hook_input)' | '$HOOK'"
170
+ [ "$status" -eq 0 ]
171
+
172
+ # No proposal written.
173
+ [ ! -d "${LIBRARIAN_DIR}/proposals" ] || [ -z "$(ls -A "${LIBRARIAN_DIR}/proposals" 2>/dev/null)" ]
174
+
175
+ # candidate.dropped fired with low_confidence reason.
176
+ grep '"event_type":"librarian.candidate.dropped"' "$ONLOOKER_EVENTS_LOG" \
177
+ | jq -e 'select(.payload.reason == "low_confidence")' >/dev/null
178
+
179
+ # scan.complete reports empty outcome (zero proposals).
180
+ grep '"event_type":"librarian.scan.complete"' "$ONLOOKER_EVENTS_LOG" \
181
+ | jq -e '.payload.outcome == "empty" and .payload.candidates_proposed == 0 and .payload.candidates_dropped >= 1' >/dev/null
182
+ }
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env bats
2
+ #
3
+ # Tests the librarian SessionStart surfacer. Verifies:
4
+ # - Disabled config: empty additionalContext, exit 0.
5
+ # - No git context: empty additionalContext, exit 0.
6
+ # - Empty proposal queue + skip_inject_when_zero=true: empty context.
7
+ # - Pending proposals: one-line pointer with the count and pluralization.
8
+ # - Overflow: counts above max_pending_for_inject render as "<cap>+".
9
+
10
+ setup() {
11
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
12
+ setup_test_env
13
+
14
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/librarian"
15
+ export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
16
+ export ONLOOKER_ECOSYSTEM_ROOT="$REPO_ROOT"
17
+
18
+ PROJECT_REPO="${BATS_TEST_TMPDIR}/repo"
19
+ mkdir -p "$PROJECT_REPO"
20
+ git -C "$PROJECT_REPO" init -q
21
+ git -C "$PROJECT_REPO" config user.email t@example.com
22
+ git -C "$PROJECT_REPO" config user.name "Test"
23
+ git -C "$PROJECT_REPO" remote add origin git@github.com:org/librarian-surfacer-test.git
24
+
25
+ # shellcheck disable=SC1091
26
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-project-key.sh"
27
+ PROJECT_KEY=$(librarian_project_key "$PROJECT_REPO")
28
+ [ -n "$PROJECT_KEY" ]
29
+ LIBRARIAN_DIR="${ONLOOKER_DIR}/librarian/${PROJECT_KEY}"
30
+
31
+ mkdir -p "${PROJECT_REPO}/.claude"
32
+ printf '%s\n' '{"librarian":{"enabled":true}}' > "${PROJECT_REPO}/.claude/settings.json"
33
+
34
+ HOOK="${PLUGIN_ROOT}/scripts/hooks/librarian-session-start.sh"
35
+ }
36
+
37
+ _input() {
38
+ jq -cn --arg cwd "$PROJECT_REPO" \
39
+ '{cwd: $cwd, source: "startup", session_id: "sess-start-test"}'
40
+ }
41
+
42
+ # Helper: drop a proposal file with the given status into the queue.
43
+ _seed_proposal() {
44
+ local id="$1" status="${2:-pending}"
45
+ mkdir -p "${LIBRARIAN_DIR}/proposals"
46
+ jq -n --arg id "$id" --arg status "$status" \
47
+ '{
48
+ id: $id,
49
+ created_at: "2026-06-01T00:00:00Z",
50
+ source_artifact_ids: [],
51
+ source_session_ids: [],
52
+ proposed: { type: "feedback", filename: ($id + ".md"),
53
+ title: "t", body: "b", classifier_confidence: 0.8 },
54
+ conflict_state: "none",
55
+ conflict_with: [],
56
+ status: $status
57
+ }' > "${LIBRARIAN_DIR}/proposals/${id}.json"
58
+ }
59
+
60
+ @test "surfacer emits empty context when librarian is disabled" {
61
+ rm -f "${PROJECT_REPO}/.claude/settings.json"
62
+ _seed_proposal "01PROPOSALA000000000000000"
63
+
64
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
65
+ [ "$status" -eq 0 ]
66
+ echo "$output" | jq -e '.hookSpecificOutput.additionalContext == ""' >/dev/null
67
+ echo "$output" | jq -e '.hookSpecificOutput.hookEventName == "SessionStart"' >/dev/null
68
+ }
69
+
70
+ @test "surfacer emits empty context when there is no git context" {
71
+ local non_git="${BATS_TEST_TMPDIR}/no-git"
72
+ mkdir -p "$non_git"
73
+ local input
74
+ input=$(jq -cn --arg cwd "$non_git" '{cwd: $cwd, source: "startup", session_id: "s"}')
75
+
76
+ run bash -c "printf '%s' '$input' | '$HOOK'"
77
+ [ "$status" -eq 0 ]
78
+ echo "$output" | jq -e '.hookSpecificOutput.additionalContext == ""' >/dev/null
79
+ }
80
+
81
+ @test "surfacer emits empty context when no proposals are pending" {
82
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
83
+ [ "$status" -eq 0 ]
84
+ echo "$output" | jq -e '.hookSpecificOutput.additionalContext == ""' >/dev/null
85
+ }
86
+
87
+ @test "surfacer surfaces one-line pointer when proposals exist (plural)" {
88
+ _seed_proposal "01PROPOSAL11111111111111A"
89
+ _seed_proposal "01PROPOSAL11111111111111B"
90
+ _seed_proposal "01PROPOSAL11111111111111C"
91
+
92
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
93
+ [ "$status" -eq 0 ]
94
+ local ctx
95
+ ctx=$(echo "$output" | jq -r '.hookSpecificOutput.additionalContext')
96
+ [[ "$ctx" == *"Librarian has 3 pending memory promotion proposals"* ]]
97
+ [[ "$ctx" == *"/librarian review"* ]]
98
+ }
99
+
100
+ @test "surfacer pluralizes singular vs plural correctly" {
101
+ _seed_proposal "01PROPOSALSINGULAR0000000"
102
+
103
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
104
+ [ "$status" -eq 0 ]
105
+ local ctx
106
+ ctx=$(echo "$output" | jq -r '.hookSpecificOutput.additionalContext')
107
+ [[ "$ctx" == *"Librarian has 1 pending memory promotion proposal"* ]]
108
+ [[ "$ctx" != *"proposals."* ]]
109
+ }
110
+
111
+ @test "surfacer ignores accepted/rejected proposals when counting pending" {
112
+ _seed_proposal "01PROPOSALACCEPTED000000" "accepted"
113
+ _seed_proposal "01PROPOSALREJECTED000000" "rejected"
114
+ _seed_proposal "01PROPOSALPENDING0000000" "pending"
115
+
116
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
117
+ [ "$status" -eq 0 ]
118
+ local ctx
119
+ ctx=$(echo "$output" | jq -r '.hookSpecificOutput.additionalContext')
120
+ [[ "$ctx" == *"1 pending memory promotion proposal"* ]]
121
+ }
122
+
123
+ @test "surfacer caps display at max_pending_for_inject + '+'" {
124
+ # Override max to 3 via a project settings overlay.
125
+ printf '%s\n' '{"librarian":{"enabled":true,"surfacer":{"max_pending_for_inject":3}}}' \
126
+ > "${PROJECT_REPO}/.claude/settings.json"
127
+ for i in A B C D E; do
128
+ _seed_proposal "01PROPOSALCAP$i$i$i$i$i$i$i$i$i$i$i$i"
129
+ done
130
+
131
+ run bash -c "printf '%s' '$(_input)' | '$HOOK'"
132
+ [ "$status" -eq 0 ]
133
+ local ctx
134
+ ctx=$(echo "$output" | jq -r '.hookSpecificOutput.additionalContext')
135
+ [[ "$ctx" == *"Librarian has 3+ pending memory promotion proposals"* ]]
136
+ }
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env bats
2
+
3
+ setup() {
4
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
5
+ setup_test_env
6
+
7
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/warden"
8
+ export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
9
+ # shellcheck disable=SC1091
10
+ source "${PLUGIN_ROOT}/scripts/lib/warden-config.sh"
11
+ }
12
+
13
+ @test "warden is disabled by default" {
14
+ warden_config_load ""
15
+ run warden_config_enabled
16
+ [ "$status" -ne 0 ]
17
+ }
18
+
19
+ @test "user-level settings.json can enable warden" {
20
+ mkdir -p "${HOME}/.claude"
21
+ printf '%s\n' '{"warden":{"enabled":true}}' > "${HOME}/.claude/settings.json"
22
+ warden_config_load ""
23
+ run warden_config_enabled
24
+ [ "$status" -eq 0 ]
25
+ }
26
+
27
+ @test "repo-level settings.json overrides user-level" {
28
+ mkdir -p "${HOME}/.claude"
29
+ printf '%s\n' '{"warden":{"enabled":true}}' > "${HOME}/.claude/settings.json"
30
+ local repo="${BATS_TEST_TMPDIR}/repo"
31
+ mkdir -p "${repo}/.claude"
32
+ printf '%s\n' '{"warden":{"enabled":false}}' > "${repo}/.claude/settings.json"
33
+ warden_config_load "$repo"
34
+ run warden_config_enabled
35
+ [ "$status" -ne 0 ]
36
+ }
37
+
38
+ @test "defaults are preserved when an overlay sets only some keys" {
39
+ mkdir -p "${HOME}/.claude"
40
+ printf '%s\n' '{"warden":{"enabled":true,"escalation":{"enabled":false}}}' > "${HOME}/.claude/settings.json"
41
+ warden_config_load ""
42
+ # escalation.enabled overridden to false…
43
+ [ "$(warden_config_get '.warden.escalation.enabled')" = "false" ]
44
+ # …but shipped defaults survive the deep merge.
45
+ [ "$(warden_config_get '.warden.detection.close_threshold')" = "0.65" ]
46
+ [ "$(warden_config_get '.warden.scan.max_content_chars')" = "20000" ]
47
+ }
48
+
49
+ @test "config_get_json returns arrays" {
50
+ warden_config_load ""
51
+ run warden_config_get_json '.warden.scan.sources'
52
+ [ "$status" -eq 0 ]
53
+ printf '%s' "$output" | jq -e 'index("web_fetch") != null and index("file_read") != null' >/dev/null
54
+ }