@onlooker-community/ecosystem 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/.claude-plugin/marketplace.json +26 -0
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.release-please-manifest.json +4 -2
  4. package/CHANGELOG.md +14 -0
  5. package/docs/memory-architecture.md +102 -0
  6. package/package.json +3 -3
  7. package/plugins/curator/.claude-plugin/plugin.json +14 -0
  8. package/plugins/curator/CHANGELOG.md +10 -0
  9. package/plugins/curator/README.md +55 -0
  10. package/plugins/curator/config.json +41 -0
  11. package/plugins/curator/docs/adr/001-staleness-tiers.md +100 -0
  12. package/plugins/curator/docs/design.md +311 -0
  13. package/plugins/curator/hooks/hooks.json +15 -0
  14. package/plugins/curator/scripts/hooks/curator-session-start.sh +343 -0
  15. package/plugins/curator/scripts/lib/curator-checks.sh +155 -0
  16. package/plugins/curator/scripts/lib/curator-config.sh +67 -0
  17. package/plugins/curator/scripts/lib/curator-emit.sh +61 -0
  18. package/plugins/curator/scripts/lib/curator-memory-reader.sh +225 -0
  19. package/plugins/curator/scripts/lib/curator-project-key.sh +82 -0
  20. package/plugins/curator/scripts/lib/curator-storage.sh +176 -0
  21. package/plugins/curator/scripts/lib/curator-ulid.sh +43 -0
  22. package/plugins/historian/docs/adr/001-local-embeddings-only.md +96 -0
  23. package/plugins/historian/docs/design.md +317 -0
  24. package/plugins/librarian/.claude-plugin/plugin.json +14 -0
  25. package/plugins/librarian/CHANGELOG.md +10 -0
  26. package/plugins/librarian/README.md +51 -0
  27. package/plugins/librarian/config.json +52 -0
  28. package/plugins/librarian/docs/adr/001-propose-dont-auto-write.md +87 -0
  29. package/plugins/librarian/docs/design.md +301 -0
  30. package/plugins/librarian/hooks/hooks.json +26 -0
  31. package/plugins/librarian/scripts/hooks/librarian-session-end.sh +312 -0
  32. package/plugins/librarian/scripts/hooks/librarian-session-start.sh +103 -0
  33. package/plugins/librarian/scripts/lib/librarian-archivist-reader.sh +67 -0
  34. package/plugins/librarian/scripts/lib/librarian-classifier.sh +139 -0
  35. package/plugins/librarian/scripts/lib/librarian-config.sh +74 -0
  36. package/plugins/librarian/scripts/lib/librarian-durability.sh +77 -0
  37. package/plugins/librarian/scripts/lib/librarian-emit.sh +72 -0
  38. package/plugins/librarian/scripts/lib/librarian-project-key.sh +83 -0
  39. package/plugins/librarian/scripts/lib/librarian-storage.sh +222 -0
  40. package/plugins/librarian/scripts/lib/librarian-ulid.sh +50 -0
  41. package/release-please-config.json +32 -0
  42. package/test/bats/curator-session-start.bats +316 -0
  43. package/test/bats/librarian-session-end.bats +182 -0
  44. package/test/bats/librarian-session-start.bats +136 -0
@@ -0,0 +1,312 @@
1
+ #!/usr/bin/env bash
2
+ # Librarian SessionEnd scan.
3
+ #
4
+ # Reads archivist artifacts created since the last librarian scan, runs them
5
+ # through the durability filter, classifies survivors via Haiku, and writes
6
+ # proposals to the queue for review at next SessionStart.
7
+ #
8
+ # Hook contract:
9
+ # - Always exits 0. Never blocks session shutdown.
10
+ # - No-ops when librarian.enabled is not true.
11
+ # - No-ops when no project key (no git context) or no archivist artifacts.
12
+ # - Classifier failures degrade gracefully: the affected candidate is
13
+ # dropped, the rest of the scan proceeds.
14
+
15
+ set -uo pipefail
16
+
17
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
18
+ PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
19
+
20
+ # Source the ecosystem substrate so $ONLOOKER_DIR / $ONLOOKER_EVENTS_LOG
21
+ # resolve correctly under the test harness's isolated temp home.
22
+ _ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
23
+ if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
24
+ _candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
25
+ if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
26
+ _ECOSYSTEM_ROOT="$_candidate"
27
+ fi
28
+ fi
29
+
30
+ if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
31
+ # shellcheck disable=SC1091
32
+ CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
33
+ fi
34
+
35
+ # shellcheck source=../lib/librarian-config.sh
36
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-config.sh"
37
+ # shellcheck source=../lib/librarian-project-key.sh
38
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-project-key.sh"
39
+ # shellcheck source=../lib/librarian-ulid.sh
40
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-ulid.sh"
41
+ # shellcheck source=../lib/librarian-storage.sh
42
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-storage.sh"
43
+ # shellcheck source=../lib/librarian-emit.sh
44
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-emit.sh"
45
+ # shellcheck source=../lib/librarian-archivist-reader.sh
46
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-archivist-reader.sh"
47
+ # shellcheck source=../lib/librarian-durability.sh
48
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-durability.sh"
49
+ # shellcheck source=../lib/librarian-classifier.sh
50
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-classifier.sh"
51
+
52
+ INPUT=$(cat 2>/dev/null || true)
53
+ CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
54
+ SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
55
+ [[ -z "$CWD" ]] && CWD="$(pwd)"
56
+ [[ -z "$SESSION_ID" ]] && SESSION_ID="unknown"
57
+
58
+ librarian_config_load "$(librarian_project_repo_root "$CWD")"
59
+ librarian_config_enabled || exit 0
60
+
61
+ PROJECT_KEY=$(librarian_project_key "$CWD")
62
+ [[ -z "$PROJECT_KEY" ]] && exit 0
63
+
64
+ # Storage init + manifest refresh.
65
+ librarian_storage_init "$PROJECT_KEY" || exit 0
66
+ REMOTE_URL=$(librarian_project_remote_url "$CWD")
67
+ REPO_ROOT=$(librarian_project_repo_root "$CWD")
68
+ librarian_storage_write_manifest "$PROJECT_KEY" "$REMOTE_URL" "$REPO_ROOT" || true
69
+
70
+ # ----------------------------------------------------------------------------
71
+ # Determine the watermark. Empty means "first scan" — fall back to N days ago.
72
+ # ----------------------------------------------------------------------------
73
+
74
+ WATERMARK=$(librarian_storage_read_last_scan "$PROJECT_KEY")
75
+
76
+ if [[ -z "$WATERMARK" ]]; then
77
+ BOOTSTRAP_DAYS=$(librarian_config_get '.librarian.scan.bootstrap_lookback_days')
78
+ [[ -z "$BOOTSTRAP_DAYS" || "$BOOTSTRAP_DAYS" == "null" ]] && BOOTSTRAP_DAYS=14
79
+ WATERMARK=$(python3 -c "
80
+ import datetime
81
+ delta = datetime.timedelta(days=${BOOTSTRAP_DAYS})
82
+ now = datetime.datetime.now(datetime.timezone.utc)
83
+ print((now - delta).strftime('%Y-%m-%dT%H:%M:%SZ'))
84
+ " 2>/dev/null) || WATERMARK=""
85
+ fi
86
+
87
+ # ----------------------------------------------------------------------------
88
+ # Emit scan.started and load candidate window.
89
+ # ----------------------------------------------------------------------------
90
+
91
+ SCAN_START_TS_S=$(date +%s)
92
+ ARTIFACTS=$(librarian_archivist_load_since "$PROJECT_KEY" "$WATERMARK")
93
+ ARTIFACT_COUNT=$(printf '%s' "$ARTIFACTS" | jq 'length' 2>/dev/null) || ARTIFACT_COUNT=0
94
+
95
+ librarian_emit "librarian.scan.started" "$SESSION_ID" "$(jq -cn \
96
+ --arg trigger "session_end" \
97
+ --arg last_scan_at "$WATERMARK" \
98
+ --argjson artifact_count_in_window "$ARTIFACT_COUNT" \
99
+ '{ trigger: $trigger, last_scan_at: (if $last_scan_at == "" then null else $last_scan_at end),
100
+ artifact_count_in_window: $artifact_count_in_window } | with_entries(select(.value != null))')"
101
+
102
+ # Bail with scan.complete{outcome: ok, candidates: 0} when archivist has
103
+ # nothing new for us. We still advance the watermark so subsequent scans
104
+ # don't re-walk the same window.
105
+ if [[ "$ARTIFACT_COUNT" == "0" ]]; then
106
+ librarian_storage_write_last_scan "$PROJECT_KEY" || true
107
+ DURATION_MS=$(( ($(date +%s) - SCAN_START_TS_S) * 1000 ))
108
+ librarian_emit "librarian.scan.complete" "$SESSION_ID" "$(jq -cn \
109
+ --arg outcome "empty" \
110
+ --argjson duration_ms "$DURATION_MS" \
111
+ --argjson candidates_proposed 0 \
112
+ --argjson candidates_dropped 0 \
113
+ --argjson artifact_count_in_window 0 \
114
+ '{ outcome: $outcome, duration_ms: $duration_ms,
115
+ candidates_proposed: $candidates_proposed,
116
+ candidates_dropped: $candidates_dropped,
117
+ artifact_count_in_window: $artifact_count_in_window }')"
118
+ exit 0
119
+ fi
120
+
121
+ # ----------------------------------------------------------------------------
122
+ # Durability filter — cheap, deterministic, no network.
123
+ # ----------------------------------------------------------------------------
124
+
125
+ MARKERS_JSON=$(librarian_config_get '.librarian.durability_filter.marker_phrases | tojson')
126
+ [[ -z "$MARKERS_JSON" || "$MARKERS_JSON" == "null" ]] && MARKERS_JSON='[]'
127
+ MIN_DETAIL=$(librarian_config_get '.librarian.scan.min_detail_chars')
128
+ [[ -z "$MIN_DETAIL" || "$MIN_DETAIL" == "null" ]] && MIN_DETAIL=40
129
+
130
+ FILTERED=$(librarian_durability_filter "$ARTIFACTS" "$MARKERS_JSON" "$MIN_DETAIL")
131
+ KEPT=$(printf '%s' "$FILTERED" | jq '.kept')
132
+ DROPPED=$(printf '%s' "$FILTERED" | jq '.dropped')
133
+
134
+ # Emit one librarian.candidate.dropped event per artifact we filtered out
135
+ # pre-classifier. Caps at a sane number per scan so the event log stays
136
+ # scannable even if archivist piled up months of artifacts.
137
+ MAX_DROPPED_EVENTS=20
138
+ DROPPED_TOTAL=$(printf '%s' "$DROPPED" | jq 'length' 2>/dev/null) || DROPPED_TOTAL=0
139
+ DROPPED_EMIT_COUNT=$(( DROPPED_TOTAL < MAX_DROPPED_EVENTS ? DROPPED_TOTAL : MAX_DROPPED_EVENTS ))
140
+ for ((i = 0; i < DROPPED_EMIT_COUNT; i++)); do
141
+ DROP=$(printf '%s' "$DROPPED" | jq -c ".[$i]")
142
+ librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
143
+ --argjson drop "$DROP" \
144
+ '{ reason: $drop.reason, source_artifact_id: $drop.artifact_id }
145
+ | with_entries(select(.value != null))')"
146
+ done
147
+
148
+ # ----------------------------------------------------------------------------
149
+ # Classifier loop — one Haiku call per surviving candidate.
150
+ # ----------------------------------------------------------------------------
151
+
152
+ CLASSIFIER_MODEL=$(librarian_config_get '.librarian.classifier.model')
153
+ CLASSIFIER_TEMP=$(librarian_config_get '.librarian.classifier.temperature')
154
+ CLASSIFIER_MAX=$(librarian_config_get '.librarian.classifier.max_output_tokens')
155
+ MIN_CONFIDENCE=$(librarian_config_get '.librarian.classifier.min_classifier_confidence')
156
+ [[ -z "$MIN_CONFIDENCE" || "$MIN_CONFIDENCE" == "null" ]] && MIN_CONFIDENCE="0.6"
157
+ TOMBSTONE_TTL=$(librarian_config_get '.librarian.tombstones.ttl_days')
158
+ [[ -z "$TOMBSTONE_TTL" || "$TOMBSTONE_TTL" == "null" ]] && TOMBSTONE_TTL=180
159
+ AUTO_PROMOTE_THRESHOLD=$(librarian_config_get '.librarian.auto_promote_threshold')
160
+ [[ -z "$AUTO_PROMOTE_THRESHOLD" || "$AUTO_PROMOTE_THRESHOLD" == "null" ]] && AUTO_PROMOTE_THRESHOLD="0.85"
161
+
162
+ KEPT_COUNT=$(printf '%s' "$KEPT" | jq 'length' 2>/dev/null) || KEPT_COUNT=0
163
+ PROPOSED_COUNT=0
164
+ POST_CLASSIFIER_DROPPED=0
165
+ NOW_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
166
+
167
+ for ((i = 0; i < KEPT_COUNT; i++)); do
168
+ ARTIFACT=$(printf '%s' "$KEPT" | jq -c ".[$i]")
169
+ [[ -z "$ARTIFACT" || "$ARTIFACT" == "null" ]] && continue
170
+
171
+ RESPONSE=$(librarian_classifier_call \
172
+ "$ARTIFACT" "$CLASSIFIER_MODEL" "$CLASSIFIER_TEMP" "$CLASSIFIER_MAX")
173
+
174
+ if [[ -z "$RESPONSE" ]]; then
175
+ POST_CLASSIFIER_DROPPED=$((POST_CLASSIFIER_DROPPED + 1))
176
+ librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
177
+ --arg reason "classified_null" \
178
+ --arg src "$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')" \
179
+ '{ reason: $reason, source_artifact_id: (if $src == "" then null else $src end) }
180
+ | with_entries(select(.value != null))')"
181
+ continue
182
+ fi
183
+
184
+ # Drop nulls and low-confidence classifications silently — by design,
185
+ # the proposal queue prefers misses over noise.
186
+ MEMORY_TYPE=$(printf '%s' "$RESPONSE" | jq -r '.type // ""')
187
+ CONFIDENCE=$(printf '%s' "$RESPONSE" | jq -r '.confidence // 0')
188
+ BODY=$(printf '%s' "$RESPONSE" | jq -r '.body // ""')
189
+ TITLE=$(printf '%s' "$RESPONSE" | jq -r '.title // ""')
190
+
191
+ BELOW_MIN=$(awk -v a="$CONFIDENCE" -v b="$MIN_CONFIDENCE" 'BEGIN { print (a < b) ? 1 : 0 }')
192
+
193
+ if [[ -z "$MEMORY_TYPE" || "$MEMORY_TYPE" == "null" ]]; then
194
+ POST_CLASSIFIER_DROPPED=$((POST_CLASSIFIER_DROPPED + 1))
195
+ librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
196
+ --arg reason "classified_null" \
197
+ --arg src "$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')" \
198
+ '{ reason: $reason, source_artifact_id: (if $src == "" then null else $src end) }
199
+ | with_entries(select(.value != null))')"
200
+ continue
201
+ fi
202
+
203
+ if [[ "$BELOW_MIN" == "1" ]]; then
204
+ POST_CLASSIFIER_DROPPED=$((POST_CLASSIFIER_DROPPED + 1))
205
+ librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
206
+ --arg reason "low_confidence" \
207
+ --arg src "$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')" \
208
+ '{ reason: $reason, source_artifact_id: (if $src == "" then null else $src end) }
209
+ | with_entries(select(.value != null))')"
210
+ continue
211
+ fi
212
+
213
+ # Skip if a tombstone exists for this exact body — the user already
214
+ # rejected this content, don't re-surface it.
215
+ BODY_HASH=$(librarian_body_hash "$BODY")
216
+ if [[ -n "$BODY_HASH" ]] && librarian_storage_has_tombstone \
217
+ "$PROJECT_KEY" "$BODY_HASH" "$TOMBSTONE_TTL"; then
218
+ POST_CLASSIFIER_DROPPED=$((POST_CLASSIFIER_DROPPED + 1))
219
+ librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
220
+ --arg reason "duplicate" \
221
+ --arg src "$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')" \
222
+ '{ reason: $reason, source_artifact_id: (if $src == "" then null else $src end) }
223
+ | with_entries(select(.value != null))')"
224
+ continue
225
+ fi
226
+
227
+ # Build and persist the proposal. Conflict detection against the user's
228
+ # memory store is deferred to a follow-up commit; everything ships as
229
+ # conflict_state: "none" for now.
230
+ PROPOSAL_ID=$(librarian_ulid)
231
+ FILENAME=$(librarian_classifier_filename "$MEMORY_TYPE" "$TITLE")
232
+ ARTIFACT_ID=$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')
233
+ ARTIFACT_SESSION=$(printf '%s' "$ARTIFACT" | jq -r '.session_id // ""')
234
+
235
+ PROPOSAL_JSON=$(jq -n \
236
+ --arg id "$PROPOSAL_ID" \
237
+ --arg created_at "$NOW_TS" \
238
+ --arg memory_type "$MEMORY_TYPE" \
239
+ --arg filename "$FILENAME" \
240
+ --arg title "$TITLE" \
241
+ --arg body "$BODY" \
242
+ --argjson classifier_confidence "$CONFIDENCE" \
243
+ --arg conflict_state "none" \
244
+ --arg artifact_id "$ARTIFACT_ID" \
245
+ --arg artifact_session "$ARTIFACT_SESSION" \
246
+ '{
247
+ id: $id,
248
+ created_at: $created_at,
249
+ source_artifact_ids: (if $artifact_id == "" then [] else [$artifact_id] end),
250
+ source_session_ids: (if $artifact_session == "" then [] else [$artifact_session] end),
251
+ proposed: {
252
+ type: $memory_type,
253
+ filename: $filename,
254
+ title: $title,
255
+ body: $body,
256
+ classifier_confidence: $classifier_confidence
257
+ },
258
+ conflict_state: $conflict_state,
259
+ conflict_with: [],
260
+ status: "pending"
261
+ }')
262
+
263
+ librarian_storage_write_proposal "$PROJECT_KEY" "$PROPOSAL_ID" "$PROPOSAL_JSON" >/dev/null \
264
+ || continue
265
+
266
+ PROPOSED_COUNT=$((PROPOSED_COUNT + 1))
267
+
268
+ librarian_emit "librarian.candidate.proposed" "$SESSION_ID" "$(jq -cn \
269
+ --arg proposal_id "$PROPOSAL_ID" \
270
+ --arg memory_type "$MEMORY_TYPE" \
271
+ --argjson classifier_confidence "$CONFIDENCE" \
272
+ --arg conflict_state "none" \
273
+ --arg src "$ARTIFACT_ID" \
274
+ '{
275
+ proposal_id: $proposal_id,
276
+ memory_type: $memory_type,
277
+ classifier_confidence: $classifier_confidence,
278
+ conflict_state: $conflict_state,
279
+ source_artifact_ids: (if $src == "" then [] else [$src] end)
280
+ }')"
281
+ done
282
+
283
+ # ----------------------------------------------------------------------------
284
+ # Watermark advance + scan.complete.
285
+ # ----------------------------------------------------------------------------
286
+
287
+ librarian_storage_write_last_scan "$PROJECT_KEY" || true
288
+
289
+ TOTAL_DROPPED=$((DROPPED_TOTAL + POST_CLASSIFIER_DROPPED))
290
+ OUTCOME="ok"
291
+ [[ "$PROPOSED_COUNT" == "0" ]] && OUTCOME="empty"
292
+ DURATION_MS=$(( ($(date +%s) - SCAN_START_TS_S) * 1000 ))
293
+
294
+ librarian_emit "librarian.scan.complete" "$SESSION_ID" "$(jq -cn \
295
+ --arg outcome "$OUTCOME" \
296
+ --argjson candidates_proposed "$PROPOSED_COUNT" \
297
+ --argjson candidates_dropped "$TOTAL_DROPPED" \
298
+ --argjson duration_ms "$DURATION_MS" \
299
+ --argjson artifact_count_in_window "$ARTIFACT_COUNT" \
300
+ '{
301
+ outcome: $outcome,
302
+ candidates_proposed: $candidates_proposed,
303
+ candidates_dropped: $candidates_dropped,
304
+ duration_ms: $duration_ms,
305
+ artifact_count_in_window: $artifact_count_in_window
306
+ }')"
307
+
308
+ # Suppress AUTO_PROMOTE_THRESHOLD shellcheck warning — read for future use
309
+ # (auto-promote path lands in the next commit).
310
+ : "${AUTO_PROMOTE_THRESHOLD}"
311
+
312
+ exit 0
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env bash
2
+ # Librarian SessionStart surfacer.
3
+ #
4
+ # Counts pending proposals in the project's queue and injects a one-line
5
+ # `additionalContext` pointer if any exist. The full proposal bodies live
6
+ # in ~/.onlooker/librarian/<project-key>/proposals/ and are reviewed via
7
+ # the /librarian review skill rather than inlined here — SessionStart
8
+ # context is precious, and a queue of 20 distilled-but-unreviewed memories
9
+ # isn't where it should go.
10
+ #
11
+ # Hook contract:
12
+ # - Always exits 0. Never blocks session start.
13
+ # - Emits valid hookSpecificOutput JSON, even when nothing to say.
14
+ # - No-ops when librarian.enabled is not true.
15
+ # - No-ops when there is no project key (no git context).
16
+
17
+ set -uo pipefail
18
+
19
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
20
+ PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
21
+
22
+ _ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
23
+ if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
24
+ _candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
25
+ if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
26
+ _ECOSYSTEM_ROOT="$_candidate"
27
+ fi
28
+ fi
29
+ if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
30
+ # shellcheck disable=SC1091
31
+ CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
32
+ fi
33
+
34
+ # shellcheck source=../lib/librarian-config.sh
35
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-config.sh"
36
+ # shellcheck source=../lib/librarian-project-key.sh
37
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-project-key.sh"
38
+ # shellcheck source=../lib/librarian-storage.sh
39
+ source "${PLUGIN_ROOT}/scripts/lib/librarian-storage.sh"
40
+
41
+ # Emit hookSpecificOutput with the given additionalContext string. An
42
+ # empty string is fine — the harness sees "nothing to say".
43
+ _emit() {
44
+ local context="${1:-}"
45
+ jq -cn --arg ctx "$context" '
46
+ {
47
+ hookSpecificOutput: {
48
+ hookEventName: "SessionStart",
49
+ additionalContext: $ctx
50
+ }
51
+ }
52
+ '
53
+ }
54
+
55
+ INPUT=$(cat 2>/dev/null || true)
56
+ CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
57
+ [[ -z "$CWD" ]] && CWD="$(pwd)"
58
+
59
+ REPO_ROOT=$(librarian_project_repo_root "$CWD")
60
+ librarian_config_load "$REPO_ROOT"
61
+
62
+ if ! librarian_config_enabled; then
63
+ _emit ""
64
+ exit 0
65
+ fi
66
+
67
+ PROJECT_KEY=$(librarian_project_key "$CWD")
68
+ if [[ -z "$PROJECT_KEY" ]]; then
69
+ _emit ""
70
+ exit 0
71
+ fi
72
+
73
+ SKIP_WHEN_ZERO=$(librarian_config_get '.librarian.surfacer.skip_inject_when_zero')
74
+ [[ -z "$SKIP_WHEN_ZERO" || "$SKIP_WHEN_ZERO" == "null" ]] && SKIP_WHEN_ZERO="true"
75
+
76
+ MAX_PENDING=$(librarian_config_get '.librarian.surfacer.max_pending_for_inject')
77
+ [[ -z "$MAX_PENDING" || "$MAX_PENDING" == "null" ]] && MAX_PENDING=20
78
+
79
+ PENDING=$(librarian_storage_count_pending "$PROJECT_KEY")
80
+ [[ -z "$PENDING" || "$PENDING" == "null" ]] && PENDING=0
81
+
82
+ if [[ "$PENDING" -eq 0 && "$SKIP_WHEN_ZERO" == "true" ]]; then
83
+ _emit ""
84
+ exit 0
85
+ fi
86
+
87
+ # Cap the surfaced number so a runaway queue doesn't make the pointer
88
+ # itself look alarming. Users still see the truthful count in
89
+ # /librarian review.
90
+ if [[ "$PENDING" -gt "$MAX_PENDING" ]]; then
91
+ DISPLAY_COUNT="${MAX_PENDING}+"
92
+ else
93
+ DISPLAY_COUNT="$PENDING"
94
+ fi
95
+
96
+ NOUN="proposals"
97
+ [[ "$PENDING" -eq 1 ]] && NOUN="proposal"
98
+
99
+ CONTEXT=$(printf 'Librarian has %s pending memory promotion %s. Review with `/librarian review`.' \
100
+ "$DISPLAY_COUNT" "$NOUN")
101
+
102
+ _emit "$CONTEXT"
103
+ exit 0
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env bash
2
+ # Reads archivist artifacts for the librarian scan pipeline.
3
+ #
4
+ # Archivist stores per-session artifacts under:
5
+ # $ONLOOKER_DIR/archivist/<project-key>/{decisions,dead_ends,open_questions}/<ulid>.json
6
+ #
7
+ # Each artifact has the shape (see archivist's storage.sh):
8
+ # { id, kind, project_key, source, created_at, updated_at, summary,
9
+ # detail, files, session_id, trigger }
10
+ #
11
+ # Librarian reads the same project-key directory and filters by created_at,
12
+ # returning candidates newer than the watermark.
13
+
14
+ # Resolve the archivist project dir for a given project key.
15
+ # Returns empty if archivist artifacts are not present.
16
+ librarian_archivist_project_dir() {
17
+ local project_key="$1"
18
+ [[ -z "$project_key" ]] && return 0
19
+ local base="${ONLOOKER_DIR:-$HOME/.onlooker}"
20
+ local dir="${base}/archivist/${project_key}"
21
+ [[ -d "$dir" ]] || return 0
22
+ printf '%s' "$dir"
23
+ }
24
+
25
+ # Load archivist artifacts created since the given watermark.
26
+ #
27
+ # Usage: librarian_archivist_load_since <project_key> <watermark_iso>
28
+ #
29
+ # Watermark format: ISO-8601 (e.g., "2026-06-01T12:34:56Z"). When the
30
+ # watermark is empty, all artifacts are returned (used on first scan).
31
+ #
32
+ # Output: JSON array, one element per artifact, in chronological order.
33
+ librarian_archivist_load_since() {
34
+ local project_key="$1"
35
+ local watermark="${2:-}"
36
+
37
+ local project_dir
38
+ project_dir=$(librarian_archivist_project_dir "$project_key")
39
+ [[ -z "$project_dir" ]] && { echo '[]'; return 0; }
40
+
41
+ local kind file all='[]'
42
+ for kind in decisions dead_ends open_questions; do
43
+ [[ -d "${project_dir}/${kind}" ]] || continue
44
+ for file in "${project_dir}/${kind}"/*.json; do
45
+ [[ -f "$file" ]] || continue
46
+ local item created_at
47
+ item=$(jq '.' "$file" 2>/dev/null) || continue
48
+ [[ -z "$item" || "$item" == "null" ]] && continue
49
+
50
+ # Filter by watermark when provided.
51
+ if [[ -n "$watermark" ]]; then
52
+ created_at=$(printf '%s' "$item" | jq -r '.created_at // .updated_at // ""' 2>/dev/null)
53
+ [[ -z "$created_at" ]] && continue
54
+ # Lexicographic compare works for ISO-8601 UTC strings.
55
+ if [[ "$created_at" < "$watermark" || "$created_at" == "$watermark" ]]; then
56
+ continue
57
+ fi
58
+ fi
59
+
60
+ all=$(printf '%s' "$all" | jq --argjson item "$item" '. + [$item]')
61
+ done
62
+ done
63
+
64
+ # Sort chronologically; downstream classifier groups by session_id and
65
+ # benefits from stable order.
66
+ printf '%s' "$all" | jq 'sort_by(.created_at // .updated_at // "")'
67
+ }
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env bash
2
+ # Type classifier for librarian candidates.
3
+ #
4
+ # Calls `claude -p` with a structured prompt that maps a single archivist
5
+ # artifact to one of the four memory types (user, feedback, project,
6
+ # reference) or null when the artifact is interesting but session-only.
7
+ #
8
+ # Returns the model's JSON response on stdout, or empty string on any
9
+ # error (timeout, missing CLI, invalid JSON, low confidence). Callers
10
+ # treat empty as "drop this candidate".
11
+ #
12
+ # Config inputs (read via librarian_config_get from the caller):
13
+ # librarian.classifier.model Anthropic model id
14
+ # librarian.classifier.temperature Sampling temperature
15
+ # librarian.classifier.max_output_tokens Output cap
16
+ # librarian.classifier.min_classifier_confidence Drop below this
17
+
18
+ # Hard wall-clock ceiling for a single classifier call. We never want a
19
+ # hung LLM to delay SessionEnd more than this.
20
+ _LIBRARIAN_CLASSIFIER_TIMEOUT_SECONDS=20
21
+
22
+ # Build the classifier prompt for a single artifact.
23
+ # Usage: librarian_classifier_build_prompt <artifact_json>
24
+ librarian_classifier_build_prompt() {
25
+ local artifact="$1"
26
+ local kind summary detail files_list session_id created_at
27
+
28
+ kind=$(printf '%s' "$artifact" | jq -r '.kind // ""')
29
+ summary=$(printf '%s' "$artifact" | jq -r '.summary // ""')
30
+ detail=$(printf '%s' "$artifact" | jq -r '.detail // ""')
31
+ files_list=$(printf '%s' "$artifact" | jq -r '(.files // []) | join(", ")')
32
+ session_id=$(printf '%s' "$artifact" | jq -r '.session_id // ""')
33
+ created_at=$(printf '%s' "$artifact" | jq -r '.created_at // ""')
34
+
35
+ cat <<EOF
36
+ You are classifying a session artifact for promotion into a long-term memory store.
37
+
38
+ The store has four types:
39
+ - user: durable facts about the user's role, expertise, or working style
40
+ - feedback: corrections or validated preferences ("don't do X", "yes, keep doing Y")
41
+ - project: ongoing work facts, decisions, constraints not derivable from the code
42
+ - reference: pointers to external systems (issue trackers, dashboards, channels)
43
+
44
+ RULES:
45
+ - Output ONLY a single JSON object on one line, no markdown fences, no prose.
46
+ - Schema: { "type": "<user|feedback|project|reference|null>",
47
+ "title": "<<=60 chars>",
48
+ "body": "<the memory content; structure per type>",
49
+ "confidence": <float 0-1> }
50
+ - Use "type": null when the artifact is interesting but session-only (a
51
+ specific bug fix, a one-off question that got answered, an exploration
52
+ that didn't change anything).
53
+ - For feedback and project types, include **Why:** and **How to apply:**
54
+ lines inside the body.
55
+
56
+ <artifact>
57
+ kind: ${kind}
58
+ summary: ${summary}
59
+ detail: ${detail}
60
+ files: ${files_list}
61
+ session_id: ${session_id}
62
+ created_at: ${created_at}
63
+ </artifact>
64
+ EOF
65
+ }
66
+
67
+ # Call the classifier for one artifact. Prints the model's JSON output or
68
+ # empty string on error.
69
+ #
70
+ # Usage: librarian_classifier_call <artifact_json> <model> <temperature>
71
+ # <max_output_tokens>
72
+ librarian_classifier_call() {
73
+ local artifact="$1"
74
+ local model="${2:-}"
75
+ local temperature="${3:-0.2}"
76
+ local max_tokens="${4:-256}"
77
+
78
+ command -v claude >/dev/null 2>&1 || return 0
79
+ [[ -z "$artifact" ]] && return 0
80
+
81
+ local prompt_file
82
+ prompt_file=$(mktemp -t librarian-classify.XXXXXX 2>/dev/null) \
83
+ || prompt_file="/tmp/librarian-classify.$$"
84
+ # shellcheck disable=SC2064
85
+ trap "rm -f '$prompt_file'" EXIT
86
+
87
+ librarian_classifier_build_prompt "$artifact" > "$prompt_file" || return 0
88
+
89
+ local args=(-p --max-turns 1)
90
+ [[ -n "$model" ]] && args+=(--model "$model")
91
+
92
+ local response=""
93
+ if command -v timeout >/dev/null 2>&1; then
94
+ response=$(timeout "$_LIBRARIAN_CLASSIFIER_TIMEOUT_SECONDS" \
95
+ claude "${args[@]}" < "$prompt_file" 2>/dev/null) || response=""
96
+ elif command -v gtimeout >/dev/null 2>&1; then
97
+ response=$(gtimeout "$_LIBRARIAN_CLASSIFIER_TIMEOUT_SECONDS" \
98
+ claude "${args[@]}" < "$prompt_file" 2>/dev/null) || response=""
99
+ else
100
+ response=$(claude "${args[@]}" < "$prompt_file" 2>/dev/null) || response=""
101
+ fi
102
+
103
+ rm -f "$prompt_file"
104
+ trap - EXIT
105
+
106
+ [[ -z "$response" ]] && return 0
107
+
108
+ # Strip accidental markdown fences before parsing.
109
+ local clean
110
+ clean=$(printf '%s' "$response" | sed -e 's/^```json//' -e 's/^```//' -e 's/```$//')
111
+
112
+ # Validate the response shape before passing it back.
113
+ if ! printf '%s' "$clean" | jq -e '
114
+ (.type == null or (.type | IN("user", "feedback", "project", "reference")))
115
+ and (.title | type) == "string"
116
+ and (.body | type) == "string"
117
+ and (.confidence | type) == "number"
118
+ ' >/dev/null 2>&1; then
119
+ return 0
120
+ fi
121
+
122
+ printf '%s' "$clean"
123
+ }
124
+
125
+ # Synthesize a deterministic filename from a classifier result.
126
+ # Used when writing accepted promotions into the typed memory store.
127
+ # Format: <type>_<slugified-title>.md
128
+ #
129
+ # Usage: librarian_classifier_filename <type> <title>
130
+ librarian_classifier_filename() {
131
+ local type="$1"
132
+ local title="$2"
133
+ local slug
134
+ slug=$(printf '%s' "$title" | tr '[:upper:]' '[:lower:]' \
135
+ | sed -E 's/[^a-z0-9]+/_/g; s/^_+|_+$//g' \
136
+ | cut -c1-60)
137
+ [[ -z "$slug" ]] && slug="memory"
138
+ printf '%s_%s.md' "$type" "$slug"
139
+ }