@onlooker-community/ecosystem 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +26 -0
- package/.claude-plugin/plugin.json +1 -1
- package/.release-please-manifest.json +4 -2
- package/CHANGELOG.md +14 -0
- package/docs/memory-architecture.md +102 -0
- package/package.json +3 -3
- package/plugins/curator/.claude-plugin/plugin.json +14 -0
- package/plugins/curator/CHANGELOG.md +10 -0
- package/plugins/curator/README.md +55 -0
- package/plugins/curator/config.json +41 -0
- package/plugins/curator/docs/adr/001-staleness-tiers.md +100 -0
- package/plugins/curator/docs/design.md +311 -0
- package/plugins/curator/hooks/hooks.json +15 -0
- package/plugins/curator/scripts/hooks/curator-session-start.sh +343 -0
- package/plugins/curator/scripts/lib/curator-checks.sh +155 -0
- package/plugins/curator/scripts/lib/curator-config.sh +67 -0
- package/plugins/curator/scripts/lib/curator-emit.sh +61 -0
- package/plugins/curator/scripts/lib/curator-memory-reader.sh +225 -0
- package/plugins/curator/scripts/lib/curator-project-key.sh +82 -0
- package/plugins/curator/scripts/lib/curator-storage.sh +176 -0
- package/plugins/curator/scripts/lib/curator-ulid.sh +43 -0
- package/plugins/historian/docs/adr/001-local-embeddings-only.md +96 -0
- package/plugins/historian/docs/design.md +317 -0
- package/plugins/librarian/.claude-plugin/plugin.json +14 -0
- package/plugins/librarian/CHANGELOG.md +10 -0
- package/plugins/librarian/README.md +51 -0
- package/plugins/librarian/config.json +52 -0
- package/plugins/librarian/docs/adr/001-propose-dont-auto-write.md +87 -0
- package/plugins/librarian/docs/design.md +301 -0
- package/plugins/librarian/hooks/hooks.json +26 -0
- package/plugins/librarian/scripts/hooks/librarian-session-end.sh +312 -0
- package/plugins/librarian/scripts/hooks/librarian-session-start.sh +103 -0
- package/plugins/librarian/scripts/lib/librarian-archivist-reader.sh +67 -0
- package/plugins/librarian/scripts/lib/librarian-classifier.sh +139 -0
- package/plugins/librarian/scripts/lib/librarian-config.sh +74 -0
- package/plugins/librarian/scripts/lib/librarian-durability.sh +77 -0
- package/plugins/librarian/scripts/lib/librarian-emit.sh +72 -0
- package/plugins/librarian/scripts/lib/librarian-project-key.sh +83 -0
- package/plugins/librarian/scripts/lib/librarian-storage.sh +222 -0
- package/plugins/librarian/scripts/lib/librarian-ulid.sh +50 -0
- package/release-please-config.json +32 -0
- package/test/bats/curator-session-start.bats +316 -0
- package/test/bats/librarian-session-end.bats +182 -0
- package/test/bats/librarian-session-start.bats +136 -0
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Librarian SessionEnd scan.
|
|
3
|
+
#
|
|
4
|
+
# Reads archivist artifacts created since the last librarian scan, runs them
|
|
5
|
+
# through the durability filter, classifies survivors via Haiku, and writes
|
|
6
|
+
# proposals to the queue for review at next SessionStart.
|
|
7
|
+
#
|
|
8
|
+
# Hook contract:
|
|
9
|
+
# - Always exits 0. Never blocks session shutdown.
|
|
10
|
+
# - No-ops when librarian.enabled is not true.
|
|
11
|
+
# - No-ops when no project key (no git context) or no archivist artifacts.
|
|
12
|
+
# - Classifier failures degrade gracefully: the affected candidate is
|
|
13
|
+
# dropped, the rest of the scan proceeds.
|
|
14
|
+
|
|
15
|
+
set -uo pipefail
|
|
16
|
+
|
|
17
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
18
|
+
PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
19
|
+
|
|
20
|
+
# Source the ecosystem substrate so $ONLOOKER_DIR / $ONLOOKER_EVENTS_LOG
|
|
21
|
+
# resolve correctly under the test harness's isolated temp home.
|
|
22
|
+
_ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
|
|
23
|
+
if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
|
|
24
|
+
_candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
|
|
25
|
+
if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
|
|
26
|
+
_ECOSYSTEM_ROOT="$_candidate"
|
|
27
|
+
fi
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
|
|
31
|
+
# shellcheck disable=SC1091
|
|
32
|
+
CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
# shellcheck source=../lib/librarian-config.sh
|
|
36
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-config.sh"
|
|
37
|
+
# shellcheck source=../lib/librarian-project-key.sh
|
|
38
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-project-key.sh"
|
|
39
|
+
# shellcheck source=../lib/librarian-ulid.sh
|
|
40
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-ulid.sh"
|
|
41
|
+
# shellcheck source=../lib/librarian-storage.sh
|
|
42
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-storage.sh"
|
|
43
|
+
# shellcheck source=../lib/librarian-emit.sh
|
|
44
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-emit.sh"
|
|
45
|
+
# shellcheck source=../lib/librarian-archivist-reader.sh
|
|
46
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-archivist-reader.sh"
|
|
47
|
+
# shellcheck source=../lib/librarian-durability.sh
|
|
48
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-durability.sh"
|
|
49
|
+
# shellcheck source=../lib/librarian-classifier.sh
|
|
50
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-classifier.sh"
|
|
51
|
+
|
|
52
|
+
INPUT=$(cat 2>/dev/null || true)
|
|
53
|
+
CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
|
|
54
|
+
SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
|
|
55
|
+
[[ -z "$CWD" ]] && CWD="$(pwd)"
|
|
56
|
+
[[ -z "$SESSION_ID" ]] && SESSION_ID="unknown"
|
|
57
|
+
|
|
58
|
+
librarian_config_load "$(librarian_project_repo_root "$CWD")"
|
|
59
|
+
librarian_config_enabled || exit 0
|
|
60
|
+
|
|
61
|
+
PROJECT_KEY=$(librarian_project_key "$CWD")
|
|
62
|
+
[[ -z "$PROJECT_KEY" ]] && exit 0
|
|
63
|
+
|
|
64
|
+
# Storage init + manifest refresh.
|
|
65
|
+
librarian_storage_init "$PROJECT_KEY" || exit 0
|
|
66
|
+
REMOTE_URL=$(librarian_project_remote_url "$CWD")
|
|
67
|
+
REPO_ROOT=$(librarian_project_repo_root "$CWD")
|
|
68
|
+
librarian_storage_write_manifest "$PROJECT_KEY" "$REMOTE_URL" "$REPO_ROOT" || true
|
|
69
|
+
|
|
70
|
+
# ----------------------------------------------------------------------------
|
|
71
|
+
# Determine the watermark. Empty means "first scan" — fall back to N days ago.
|
|
72
|
+
# ----------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
WATERMARK=$(librarian_storage_read_last_scan "$PROJECT_KEY")
|
|
75
|
+
|
|
76
|
+
if [[ -z "$WATERMARK" ]]; then
|
|
77
|
+
BOOTSTRAP_DAYS=$(librarian_config_get '.librarian.scan.bootstrap_lookback_days')
|
|
78
|
+
[[ -z "$BOOTSTRAP_DAYS" || "$BOOTSTRAP_DAYS" == "null" ]] && BOOTSTRAP_DAYS=14
|
|
79
|
+
WATERMARK=$(python3 -c "
|
|
80
|
+
import datetime
|
|
81
|
+
delta = datetime.timedelta(days=${BOOTSTRAP_DAYS})
|
|
82
|
+
now = datetime.datetime.now(datetime.timezone.utc)
|
|
83
|
+
print((now - delta).strftime('%Y-%m-%dT%H:%M:%SZ'))
|
|
84
|
+
" 2>/dev/null) || WATERMARK=""
|
|
85
|
+
fi
|
|
86
|
+
|
|
87
|
+
# ----------------------------------------------------------------------------
|
|
88
|
+
# Emit scan.started and load candidate window.
|
|
89
|
+
# ----------------------------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
SCAN_START_TS_S=$(date +%s)
|
|
92
|
+
ARTIFACTS=$(librarian_archivist_load_since "$PROJECT_KEY" "$WATERMARK")
|
|
93
|
+
ARTIFACT_COUNT=$(printf '%s' "$ARTIFACTS" | jq 'length' 2>/dev/null) || ARTIFACT_COUNT=0
|
|
94
|
+
|
|
95
|
+
librarian_emit "librarian.scan.started" "$SESSION_ID" "$(jq -cn \
|
|
96
|
+
--arg trigger "session_end" \
|
|
97
|
+
--arg last_scan_at "$WATERMARK" \
|
|
98
|
+
--argjson artifact_count_in_window "$ARTIFACT_COUNT" \
|
|
99
|
+
'{ trigger: $trigger, last_scan_at: (if $last_scan_at == "" then null else $last_scan_at end),
|
|
100
|
+
artifact_count_in_window: $artifact_count_in_window } | with_entries(select(.value != null))')"
|
|
101
|
+
|
|
102
|
+
# Bail with scan.complete{outcome: ok, candidates: 0} when archivist has
|
|
103
|
+
# nothing new for us. We still advance the watermark so subsequent scans
|
|
104
|
+
# don't re-walk the same window.
|
|
105
|
+
if [[ "$ARTIFACT_COUNT" == "0" ]]; then
|
|
106
|
+
librarian_storage_write_last_scan "$PROJECT_KEY" || true
|
|
107
|
+
DURATION_MS=$(( ($(date +%s) - SCAN_START_TS_S) * 1000 ))
|
|
108
|
+
librarian_emit "librarian.scan.complete" "$SESSION_ID" "$(jq -cn \
|
|
109
|
+
--arg outcome "empty" \
|
|
110
|
+
--argjson duration_ms "$DURATION_MS" \
|
|
111
|
+
--argjson candidates_proposed 0 \
|
|
112
|
+
--argjson candidates_dropped 0 \
|
|
113
|
+
--argjson artifact_count_in_window 0 \
|
|
114
|
+
'{ outcome: $outcome, duration_ms: $duration_ms,
|
|
115
|
+
candidates_proposed: $candidates_proposed,
|
|
116
|
+
candidates_dropped: $candidates_dropped,
|
|
117
|
+
artifact_count_in_window: $artifact_count_in_window }')"
|
|
118
|
+
exit 0
|
|
119
|
+
fi
|
|
120
|
+
|
|
121
|
+
# ----------------------------------------------------------------------------
|
|
122
|
+
# Durability filter — cheap, deterministic, no network.
|
|
123
|
+
# ----------------------------------------------------------------------------
|
|
124
|
+
|
|
125
|
+
MARKERS_JSON=$(librarian_config_get '.librarian.durability_filter.marker_phrases | tojson')
|
|
126
|
+
[[ -z "$MARKERS_JSON" || "$MARKERS_JSON" == "null" ]] && MARKERS_JSON='[]'
|
|
127
|
+
MIN_DETAIL=$(librarian_config_get '.librarian.scan.min_detail_chars')
|
|
128
|
+
[[ -z "$MIN_DETAIL" || "$MIN_DETAIL" == "null" ]] && MIN_DETAIL=40
|
|
129
|
+
|
|
130
|
+
FILTERED=$(librarian_durability_filter "$ARTIFACTS" "$MARKERS_JSON" "$MIN_DETAIL")
|
|
131
|
+
KEPT=$(printf '%s' "$FILTERED" | jq '.kept')
|
|
132
|
+
DROPPED=$(printf '%s' "$FILTERED" | jq '.dropped')
|
|
133
|
+
|
|
134
|
+
# Emit one librarian.candidate.dropped event per artifact we filtered out
|
|
135
|
+
# pre-classifier. Caps at a sane number per scan so the event log stays
|
|
136
|
+
# scannable even if archivist piled up months of artifacts.
|
|
137
|
+
MAX_DROPPED_EVENTS=20
|
|
138
|
+
DROPPED_TOTAL=$(printf '%s' "$DROPPED" | jq 'length' 2>/dev/null) || DROPPED_TOTAL=0
|
|
139
|
+
DROPPED_EMIT_COUNT=$(( DROPPED_TOTAL < MAX_DROPPED_EVENTS ? DROPPED_TOTAL : MAX_DROPPED_EVENTS ))
|
|
140
|
+
for ((i = 0; i < DROPPED_EMIT_COUNT; i++)); do
|
|
141
|
+
DROP=$(printf '%s' "$DROPPED" | jq -c ".[$i]")
|
|
142
|
+
librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
|
|
143
|
+
--argjson drop "$DROP" \
|
|
144
|
+
'{ reason: $drop.reason, source_artifact_id: $drop.artifact_id }
|
|
145
|
+
| with_entries(select(.value != null))')"
|
|
146
|
+
done
|
|
147
|
+
|
|
148
|
+
# ----------------------------------------------------------------------------
|
|
149
|
+
# Classifier loop — one Haiku call per surviving candidate.
|
|
150
|
+
# ----------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
CLASSIFIER_MODEL=$(librarian_config_get '.librarian.classifier.model')
|
|
153
|
+
CLASSIFIER_TEMP=$(librarian_config_get '.librarian.classifier.temperature')
|
|
154
|
+
CLASSIFIER_MAX=$(librarian_config_get '.librarian.classifier.max_output_tokens')
|
|
155
|
+
MIN_CONFIDENCE=$(librarian_config_get '.librarian.classifier.min_classifier_confidence')
|
|
156
|
+
[[ -z "$MIN_CONFIDENCE" || "$MIN_CONFIDENCE" == "null" ]] && MIN_CONFIDENCE="0.6"
|
|
157
|
+
TOMBSTONE_TTL=$(librarian_config_get '.librarian.tombstones.ttl_days')
|
|
158
|
+
[[ -z "$TOMBSTONE_TTL" || "$TOMBSTONE_TTL" == "null" ]] && TOMBSTONE_TTL=180
|
|
159
|
+
AUTO_PROMOTE_THRESHOLD=$(librarian_config_get '.librarian.auto_promote_threshold')
|
|
160
|
+
[[ -z "$AUTO_PROMOTE_THRESHOLD" || "$AUTO_PROMOTE_THRESHOLD" == "null" ]] && AUTO_PROMOTE_THRESHOLD="0.85"
|
|
161
|
+
|
|
162
|
+
KEPT_COUNT=$(printf '%s' "$KEPT" | jq 'length' 2>/dev/null) || KEPT_COUNT=0
|
|
163
|
+
PROPOSED_COUNT=0
|
|
164
|
+
POST_CLASSIFIER_DROPPED=0
|
|
165
|
+
NOW_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
166
|
+
|
|
167
|
+
for ((i = 0; i < KEPT_COUNT; i++)); do
|
|
168
|
+
ARTIFACT=$(printf '%s' "$KEPT" | jq -c ".[$i]")
|
|
169
|
+
[[ -z "$ARTIFACT" || "$ARTIFACT" == "null" ]] && continue
|
|
170
|
+
|
|
171
|
+
RESPONSE=$(librarian_classifier_call \
|
|
172
|
+
"$ARTIFACT" "$CLASSIFIER_MODEL" "$CLASSIFIER_TEMP" "$CLASSIFIER_MAX")
|
|
173
|
+
|
|
174
|
+
if [[ -z "$RESPONSE" ]]; then
|
|
175
|
+
POST_CLASSIFIER_DROPPED=$((POST_CLASSIFIER_DROPPED + 1))
|
|
176
|
+
librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
|
|
177
|
+
--arg reason "classified_null" \
|
|
178
|
+
--arg src "$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')" \
|
|
179
|
+
'{ reason: $reason, source_artifact_id: (if $src == "" then null else $src end) }
|
|
180
|
+
| with_entries(select(.value != null))')"
|
|
181
|
+
continue
|
|
182
|
+
fi
|
|
183
|
+
|
|
184
|
+
# Drop nulls and low-confidence classifications silently — by design,
|
|
185
|
+
# the proposal queue prefers misses over noise.
|
|
186
|
+
MEMORY_TYPE=$(printf '%s' "$RESPONSE" | jq -r '.type // ""')
|
|
187
|
+
CONFIDENCE=$(printf '%s' "$RESPONSE" | jq -r '.confidence // 0')
|
|
188
|
+
BODY=$(printf '%s' "$RESPONSE" | jq -r '.body // ""')
|
|
189
|
+
TITLE=$(printf '%s' "$RESPONSE" | jq -r '.title // ""')
|
|
190
|
+
|
|
191
|
+
BELOW_MIN=$(awk -v a="$CONFIDENCE" -v b="$MIN_CONFIDENCE" 'BEGIN { print (a < b) ? 1 : 0 }')
|
|
192
|
+
|
|
193
|
+
if [[ -z "$MEMORY_TYPE" || "$MEMORY_TYPE" == "null" ]]; then
|
|
194
|
+
POST_CLASSIFIER_DROPPED=$((POST_CLASSIFIER_DROPPED + 1))
|
|
195
|
+
librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
|
|
196
|
+
--arg reason "classified_null" \
|
|
197
|
+
--arg src "$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')" \
|
|
198
|
+
'{ reason: $reason, source_artifact_id: (if $src == "" then null else $src end) }
|
|
199
|
+
| with_entries(select(.value != null))')"
|
|
200
|
+
continue
|
|
201
|
+
fi
|
|
202
|
+
|
|
203
|
+
if [[ "$BELOW_MIN" == "1" ]]; then
|
|
204
|
+
POST_CLASSIFIER_DROPPED=$((POST_CLASSIFIER_DROPPED + 1))
|
|
205
|
+
librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
|
|
206
|
+
--arg reason "low_confidence" \
|
|
207
|
+
--arg src "$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')" \
|
|
208
|
+
'{ reason: $reason, source_artifact_id: (if $src == "" then null else $src end) }
|
|
209
|
+
| with_entries(select(.value != null))')"
|
|
210
|
+
continue
|
|
211
|
+
fi
|
|
212
|
+
|
|
213
|
+
# Skip if a tombstone exists for this exact body — the user already
|
|
214
|
+
# rejected this content, don't re-surface it.
|
|
215
|
+
BODY_HASH=$(librarian_body_hash "$BODY")
|
|
216
|
+
if [[ -n "$BODY_HASH" ]] && librarian_storage_has_tombstone \
|
|
217
|
+
"$PROJECT_KEY" "$BODY_HASH" "$TOMBSTONE_TTL"; then
|
|
218
|
+
POST_CLASSIFIER_DROPPED=$((POST_CLASSIFIER_DROPPED + 1))
|
|
219
|
+
librarian_emit "librarian.candidate.dropped" "$SESSION_ID" "$(jq -cn \
|
|
220
|
+
--arg reason "duplicate" \
|
|
221
|
+
--arg src "$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')" \
|
|
222
|
+
'{ reason: $reason, source_artifact_id: (if $src == "" then null else $src end) }
|
|
223
|
+
| with_entries(select(.value != null))')"
|
|
224
|
+
continue
|
|
225
|
+
fi
|
|
226
|
+
|
|
227
|
+
# Build and persist the proposal. Conflict detection against the user's
|
|
228
|
+
# memory store is deferred to a follow-up commit; everything ships as
|
|
229
|
+
# conflict_state: "none" for now.
|
|
230
|
+
PROPOSAL_ID=$(librarian_ulid)
|
|
231
|
+
FILENAME=$(librarian_classifier_filename "$MEMORY_TYPE" "$TITLE")
|
|
232
|
+
ARTIFACT_ID=$(printf '%s' "$ARTIFACT" | jq -r '.id // ""')
|
|
233
|
+
ARTIFACT_SESSION=$(printf '%s' "$ARTIFACT" | jq -r '.session_id // ""')
|
|
234
|
+
|
|
235
|
+
PROPOSAL_JSON=$(jq -n \
|
|
236
|
+
--arg id "$PROPOSAL_ID" \
|
|
237
|
+
--arg created_at "$NOW_TS" \
|
|
238
|
+
--arg memory_type "$MEMORY_TYPE" \
|
|
239
|
+
--arg filename "$FILENAME" \
|
|
240
|
+
--arg title "$TITLE" \
|
|
241
|
+
--arg body "$BODY" \
|
|
242
|
+
--argjson classifier_confidence "$CONFIDENCE" \
|
|
243
|
+
--arg conflict_state "none" \
|
|
244
|
+
--arg artifact_id "$ARTIFACT_ID" \
|
|
245
|
+
--arg artifact_session "$ARTIFACT_SESSION" \
|
|
246
|
+
'{
|
|
247
|
+
id: $id,
|
|
248
|
+
created_at: $created_at,
|
|
249
|
+
source_artifact_ids: (if $artifact_id == "" then [] else [$artifact_id] end),
|
|
250
|
+
source_session_ids: (if $artifact_session == "" then [] else [$artifact_session] end),
|
|
251
|
+
proposed: {
|
|
252
|
+
type: $memory_type,
|
|
253
|
+
filename: $filename,
|
|
254
|
+
title: $title,
|
|
255
|
+
body: $body,
|
|
256
|
+
classifier_confidence: $classifier_confidence
|
|
257
|
+
},
|
|
258
|
+
conflict_state: $conflict_state,
|
|
259
|
+
conflict_with: [],
|
|
260
|
+
status: "pending"
|
|
261
|
+
}')
|
|
262
|
+
|
|
263
|
+
librarian_storage_write_proposal "$PROJECT_KEY" "$PROPOSAL_ID" "$PROPOSAL_JSON" >/dev/null \
|
|
264
|
+
|| continue
|
|
265
|
+
|
|
266
|
+
PROPOSED_COUNT=$((PROPOSED_COUNT + 1))
|
|
267
|
+
|
|
268
|
+
librarian_emit "librarian.candidate.proposed" "$SESSION_ID" "$(jq -cn \
|
|
269
|
+
--arg proposal_id "$PROPOSAL_ID" \
|
|
270
|
+
--arg memory_type "$MEMORY_TYPE" \
|
|
271
|
+
--argjson classifier_confidence "$CONFIDENCE" \
|
|
272
|
+
--arg conflict_state "none" \
|
|
273
|
+
--arg src "$ARTIFACT_ID" \
|
|
274
|
+
'{
|
|
275
|
+
proposal_id: $proposal_id,
|
|
276
|
+
memory_type: $memory_type,
|
|
277
|
+
classifier_confidence: $classifier_confidence,
|
|
278
|
+
conflict_state: $conflict_state,
|
|
279
|
+
source_artifact_ids: (if $src == "" then [] else [$src] end)
|
|
280
|
+
}')"
|
|
281
|
+
done
|
|
282
|
+
|
|
283
|
+
# ----------------------------------------------------------------------------
|
|
284
|
+
# Watermark advance + scan.complete.
|
|
285
|
+
# ----------------------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
librarian_storage_write_last_scan "$PROJECT_KEY" || true
|
|
288
|
+
|
|
289
|
+
TOTAL_DROPPED=$((DROPPED_TOTAL + POST_CLASSIFIER_DROPPED))
|
|
290
|
+
OUTCOME="ok"
|
|
291
|
+
[[ "$PROPOSED_COUNT" == "0" ]] && OUTCOME="empty"
|
|
292
|
+
DURATION_MS=$(( ($(date +%s) - SCAN_START_TS_S) * 1000 ))
|
|
293
|
+
|
|
294
|
+
librarian_emit "librarian.scan.complete" "$SESSION_ID" "$(jq -cn \
|
|
295
|
+
--arg outcome "$OUTCOME" \
|
|
296
|
+
--argjson candidates_proposed "$PROPOSED_COUNT" \
|
|
297
|
+
--argjson candidates_dropped "$TOTAL_DROPPED" \
|
|
298
|
+
--argjson duration_ms "$DURATION_MS" \
|
|
299
|
+
--argjson artifact_count_in_window "$ARTIFACT_COUNT" \
|
|
300
|
+
'{
|
|
301
|
+
outcome: $outcome,
|
|
302
|
+
candidates_proposed: $candidates_proposed,
|
|
303
|
+
candidates_dropped: $candidates_dropped,
|
|
304
|
+
duration_ms: $duration_ms,
|
|
305
|
+
artifact_count_in_window: $artifact_count_in_window
|
|
306
|
+
}')"
|
|
307
|
+
|
|
308
|
+
# Suppress AUTO_PROMOTE_THRESHOLD shellcheck warning — read for future use
|
|
309
|
+
# (auto-promote path lands in the next commit).
|
|
310
|
+
: "${AUTO_PROMOTE_THRESHOLD}"
|
|
311
|
+
|
|
312
|
+
exit 0
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Librarian SessionStart surfacer.
|
|
3
|
+
#
|
|
4
|
+
# Counts pending proposals in the project's queue and injects a one-line
|
|
5
|
+
# `additionalContext` pointer if any exist. The full proposal bodies live
|
|
6
|
+
# in ~/.onlooker/librarian/<project-key>/proposals/ and are reviewed via
|
|
7
|
+
# the /librarian review skill rather than inlined here — SessionStart
|
|
8
|
+
# context is precious, and a queue of 20 distilled-but-unreviewed memories
|
|
9
|
+
# isn't where it should go.
|
|
10
|
+
#
|
|
11
|
+
# Hook contract:
|
|
12
|
+
# - Always exits 0. Never blocks session start.
|
|
13
|
+
# - Emits valid hookSpecificOutput JSON, even when nothing to say.
|
|
14
|
+
# - No-ops when librarian.enabled is not true.
|
|
15
|
+
# - No-ops when there is no project key (no git context).
|
|
16
|
+
|
|
17
|
+
set -uo pipefail
|
|
18
|
+
|
|
19
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
20
|
+
PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
21
|
+
|
|
22
|
+
_ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
|
|
23
|
+
if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
|
|
24
|
+
_candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
|
|
25
|
+
if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
|
|
26
|
+
_ECOSYSTEM_ROOT="$_candidate"
|
|
27
|
+
fi
|
|
28
|
+
fi
|
|
29
|
+
if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
|
|
30
|
+
# shellcheck disable=SC1091
|
|
31
|
+
CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
# shellcheck source=../lib/librarian-config.sh
|
|
35
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-config.sh"
|
|
36
|
+
# shellcheck source=../lib/librarian-project-key.sh
|
|
37
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-project-key.sh"
|
|
38
|
+
# shellcheck source=../lib/librarian-storage.sh
|
|
39
|
+
source "${PLUGIN_ROOT}/scripts/lib/librarian-storage.sh"
|
|
40
|
+
|
|
41
|
+
# Emit hookSpecificOutput with the given additionalContext string. An
|
|
42
|
+
# empty string is fine — the harness sees "nothing to say".
|
|
43
|
+
_emit() {
|
|
44
|
+
local context="${1:-}"
|
|
45
|
+
jq -cn --arg ctx "$context" '
|
|
46
|
+
{
|
|
47
|
+
hookSpecificOutput: {
|
|
48
|
+
hookEventName: "SessionStart",
|
|
49
|
+
additionalContext: $ctx
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
'
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
INPUT=$(cat 2>/dev/null || true)
|
|
56
|
+
CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
|
|
57
|
+
[[ -z "$CWD" ]] && CWD="$(pwd)"
|
|
58
|
+
|
|
59
|
+
REPO_ROOT=$(librarian_project_repo_root "$CWD")
|
|
60
|
+
librarian_config_load "$REPO_ROOT"
|
|
61
|
+
|
|
62
|
+
if ! librarian_config_enabled; then
|
|
63
|
+
_emit ""
|
|
64
|
+
exit 0
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
PROJECT_KEY=$(librarian_project_key "$CWD")
|
|
68
|
+
if [[ -z "$PROJECT_KEY" ]]; then
|
|
69
|
+
_emit ""
|
|
70
|
+
exit 0
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
SKIP_WHEN_ZERO=$(librarian_config_get '.librarian.surfacer.skip_inject_when_zero')
|
|
74
|
+
[[ -z "$SKIP_WHEN_ZERO" || "$SKIP_WHEN_ZERO" == "null" ]] && SKIP_WHEN_ZERO="true"
|
|
75
|
+
|
|
76
|
+
MAX_PENDING=$(librarian_config_get '.librarian.surfacer.max_pending_for_inject')
|
|
77
|
+
[[ -z "$MAX_PENDING" || "$MAX_PENDING" == "null" ]] && MAX_PENDING=20
|
|
78
|
+
|
|
79
|
+
PENDING=$(librarian_storage_count_pending "$PROJECT_KEY")
|
|
80
|
+
[[ -z "$PENDING" || "$PENDING" == "null" ]] && PENDING=0
|
|
81
|
+
|
|
82
|
+
if [[ "$PENDING" -eq 0 && "$SKIP_WHEN_ZERO" == "true" ]]; then
|
|
83
|
+
_emit ""
|
|
84
|
+
exit 0
|
|
85
|
+
fi
|
|
86
|
+
|
|
87
|
+
# Cap the surfaced number so a runaway queue doesn't make the pointer
|
|
88
|
+
# itself look alarming. Users still see the truthful count in
|
|
89
|
+
# /librarian review.
|
|
90
|
+
if [[ "$PENDING" -gt "$MAX_PENDING" ]]; then
|
|
91
|
+
DISPLAY_COUNT="${MAX_PENDING}+"
|
|
92
|
+
else
|
|
93
|
+
DISPLAY_COUNT="$PENDING"
|
|
94
|
+
fi
|
|
95
|
+
|
|
96
|
+
NOUN="proposals"
|
|
97
|
+
[[ "$PENDING" -eq 1 ]] && NOUN="proposal"
|
|
98
|
+
|
|
99
|
+
CONTEXT=$(printf 'Librarian has %s pending memory promotion %s. Review with `/librarian review`.' \
|
|
100
|
+
"$DISPLAY_COUNT" "$NOUN")
|
|
101
|
+
|
|
102
|
+
_emit "$CONTEXT"
|
|
103
|
+
exit 0
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Reads archivist artifacts for the librarian scan pipeline.
|
|
3
|
+
#
|
|
4
|
+
# Archivist stores per-session artifacts under:
|
|
5
|
+
# $ONLOOKER_DIR/archivist/<project-key>/{decisions,dead_ends,open_questions}/<ulid>.json
|
|
6
|
+
#
|
|
7
|
+
# Each artifact has the shape (see archivist's storage.sh):
|
|
8
|
+
# { id, kind, project_key, source, created_at, updated_at, summary,
|
|
9
|
+
# detail, files, session_id, trigger }
|
|
10
|
+
#
|
|
11
|
+
# Librarian reads the same project-key directory and filters by created_at,
|
|
12
|
+
# returning candidates newer than the watermark.
|
|
13
|
+
|
|
14
|
+
# Resolve the archivist project dir for a given project key.
|
|
15
|
+
# Returns empty if archivist artifacts are not present.
|
|
16
|
+
librarian_archivist_project_dir() {
|
|
17
|
+
local project_key="$1"
|
|
18
|
+
[[ -z "$project_key" ]] && return 0
|
|
19
|
+
local base="${ONLOOKER_DIR:-$HOME/.onlooker}"
|
|
20
|
+
local dir="${base}/archivist/${project_key}"
|
|
21
|
+
[[ -d "$dir" ]] || return 0
|
|
22
|
+
printf '%s' "$dir"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# Load archivist artifacts created since the given watermark.
|
|
26
|
+
#
|
|
27
|
+
# Usage: librarian_archivist_load_since <project_key> <watermark_iso>
|
|
28
|
+
#
|
|
29
|
+
# Watermark format: ISO-8601 (e.g., "2026-06-01T12:34:56Z"). When the
|
|
30
|
+
# watermark is empty, all artifacts are returned (used on first scan).
|
|
31
|
+
#
|
|
32
|
+
# Output: JSON array, one element per artifact, in chronological order.
|
|
33
|
+
librarian_archivist_load_since() {
|
|
34
|
+
local project_key="$1"
|
|
35
|
+
local watermark="${2:-}"
|
|
36
|
+
|
|
37
|
+
local project_dir
|
|
38
|
+
project_dir=$(librarian_archivist_project_dir "$project_key")
|
|
39
|
+
[[ -z "$project_dir" ]] && { echo '[]'; return 0; }
|
|
40
|
+
|
|
41
|
+
local kind file all='[]'
|
|
42
|
+
for kind in decisions dead_ends open_questions; do
|
|
43
|
+
[[ -d "${project_dir}/${kind}" ]] || continue
|
|
44
|
+
for file in "${project_dir}/${kind}"/*.json; do
|
|
45
|
+
[[ -f "$file" ]] || continue
|
|
46
|
+
local item created_at
|
|
47
|
+
item=$(jq '.' "$file" 2>/dev/null) || continue
|
|
48
|
+
[[ -z "$item" || "$item" == "null" ]] && continue
|
|
49
|
+
|
|
50
|
+
# Filter by watermark when provided.
|
|
51
|
+
if [[ -n "$watermark" ]]; then
|
|
52
|
+
created_at=$(printf '%s' "$item" | jq -r '.created_at // .updated_at // ""' 2>/dev/null)
|
|
53
|
+
[[ -z "$created_at" ]] && continue
|
|
54
|
+
# Lexicographic compare works for ISO-8601 UTC strings.
|
|
55
|
+
if [[ "$created_at" < "$watermark" || "$created_at" == "$watermark" ]]; then
|
|
56
|
+
continue
|
|
57
|
+
fi
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
all=$(printf '%s' "$all" | jq --argjson item "$item" '. + [$item]')
|
|
61
|
+
done
|
|
62
|
+
done
|
|
63
|
+
|
|
64
|
+
# Sort chronologically; downstream classifier groups by session_id and
|
|
65
|
+
# benefits from stable order.
|
|
66
|
+
printf '%s' "$all" | jq 'sort_by(.created_at // .updated_at // "")'
|
|
67
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Type classifier for librarian candidates.
|
|
3
|
+
#
|
|
4
|
+
# Calls `claude -p` with a structured prompt that maps a single archivist
|
|
5
|
+
# artifact to one of the four memory types (user, feedback, project,
|
|
6
|
+
# reference) or null when the artifact is interesting but session-only.
|
|
7
|
+
#
|
|
8
|
+
# Returns the model's JSON response on stdout, or empty string on any
|
|
9
|
+
# error (timeout, missing CLI, invalid JSON, low confidence). Callers
|
|
10
|
+
# treat empty as "drop this candidate".
|
|
11
|
+
#
|
|
12
|
+
# Config inputs (read via librarian_config_get from the caller):
|
|
13
|
+
# librarian.classifier.model Anthropic model id
|
|
14
|
+
# librarian.classifier.temperature Sampling temperature
|
|
15
|
+
# librarian.classifier.max_output_tokens Output cap
|
|
16
|
+
# librarian.classifier.min_classifier_confidence Drop below this
|
|
17
|
+
|
|
18
|
+
# Hard wall-clock ceiling for a single classifier call. We never want a
|
|
19
|
+
# hung LLM to delay SessionEnd more than this.
|
|
20
|
+
_LIBRARIAN_CLASSIFIER_TIMEOUT_SECONDS=20
|
|
21
|
+
|
|
22
|
+
# Build the classifier prompt for a single artifact.
|
|
23
|
+
# Usage: librarian_classifier_build_prompt <artifact_json>
|
|
24
|
+
librarian_classifier_build_prompt() {
|
|
25
|
+
local artifact="$1"
|
|
26
|
+
local kind summary detail files_list session_id created_at
|
|
27
|
+
|
|
28
|
+
kind=$(printf '%s' "$artifact" | jq -r '.kind // ""')
|
|
29
|
+
summary=$(printf '%s' "$artifact" | jq -r '.summary // ""')
|
|
30
|
+
detail=$(printf '%s' "$artifact" | jq -r '.detail // ""')
|
|
31
|
+
files_list=$(printf '%s' "$artifact" | jq -r '(.files // []) | join(", ")')
|
|
32
|
+
session_id=$(printf '%s' "$artifact" | jq -r '.session_id // ""')
|
|
33
|
+
created_at=$(printf '%s' "$artifact" | jq -r '.created_at // ""')
|
|
34
|
+
|
|
35
|
+
cat <<EOF
|
|
36
|
+
You are classifying a session artifact for promotion into a long-term memory store.
|
|
37
|
+
|
|
38
|
+
The store has four types:
|
|
39
|
+
- user: durable facts about the user's role, expertise, or working style
|
|
40
|
+
- feedback: corrections or validated preferences ("don't do X", "yes, keep doing Y")
|
|
41
|
+
- project: ongoing work facts, decisions, constraints not derivable from the code
|
|
42
|
+
- reference: pointers to external systems (issue trackers, dashboards, channels)
|
|
43
|
+
|
|
44
|
+
RULES:
|
|
45
|
+
- Output ONLY a single JSON object on one line, no markdown fences, no prose.
|
|
46
|
+
- Schema: { "type": "<user|feedback|project|reference|null>",
|
|
47
|
+
"title": "<<=60 chars>",
|
|
48
|
+
"body": "<the memory content; structure per type>",
|
|
49
|
+
"confidence": <float 0-1> }
|
|
50
|
+
- Use "type": null when the artifact is interesting but session-only (a
|
|
51
|
+
specific bug fix, a one-off question that got answered, an exploration
|
|
52
|
+
that didn't change anything).
|
|
53
|
+
- For feedback and project types, include **Why:** and **How to apply:**
|
|
54
|
+
lines inside the body.
|
|
55
|
+
|
|
56
|
+
<artifact>
|
|
57
|
+
kind: ${kind}
|
|
58
|
+
summary: ${summary}
|
|
59
|
+
detail: ${detail}
|
|
60
|
+
files: ${files_list}
|
|
61
|
+
session_id: ${session_id}
|
|
62
|
+
created_at: ${created_at}
|
|
63
|
+
</artifact>
|
|
64
|
+
EOF
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Call the classifier for one artifact. Prints the model's JSON output or
|
|
68
|
+
# empty string on error.
|
|
69
|
+
#
|
|
70
|
+
# Usage: librarian_classifier_call <artifact_json> <model> <temperature>
|
|
71
|
+
# <max_output_tokens>
|
|
72
|
+
librarian_classifier_call() {
|
|
73
|
+
local artifact="$1"
|
|
74
|
+
local model="${2:-}"
|
|
75
|
+
local temperature="${3:-0.2}"
|
|
76
|
+
local max_tokens="${4:-256}"
|
|
77
|
+
|
|
78
|
+
command -v claude >/dev/null 2>&1 || return 0
|
|
79
|
+
[[ -z "$artifact" ]] && return 0
|
|
80
|
+
|
|
81
|
+
local prompt_file
|
|
82
|
+
prompt_file=$(mktemp -t librarian-classify.XXXXXX 2>/dev/null) \
|
|
83
|
+
|| prompt_file="/tmp/librarian-classify.$$"
|
|
84
|
+
# shellcheck disable=SC2064
|
|
85
|
+
trap "rm -f '$prompt_file'" EXIT
|
|
86
|
+
|
|
87
|
+
librarian_classifier_build_prompt "$artifact" > "$prompt_file" || return 0
|
|
88
|
+
|
|
89
|
+
local args=(-p --max-turns 1)
|
|
90
|
+
[[ -n "$model" ]] && args+=(--model "$model")
|
|
91
|
+
|
|
92
|
+
local response=""
|
|
93
|
+
if command -v timeout >/dev/null 2>&1; then
|
|
94
|
+
response=$(timeout "$_LIBRARIAN_CLASSIFIER_TIMEOUT_SECONDS" \
|
|
95
|
+
claude "${args[@]}" < "$prompt_file" 2>/dev/null) || response=""
|
|
96
|
+
elif command -v gtimeout >/dev/null 2>&1; then
|
|
97
|
+
response=$(gtimeout "$_LIBRARIAN_CLASSIFIER_TIMEOUT_SECONDS" \
|
|
98
|
+
claude "${args[@]}" < "$prompt_file" 2>/dev/null) || response=""
|
|
99
|
+
else
|
|
100
|
+
response=$(claude "${args[@]}" < "$prompt_file" 2>/dev/null) || response=""
|
|
101
|
+
fi
|
|
102
|
+
|
|
103
|
+
rm -f "$prompt_file"
|
|
104
|
+
trap - EXIT
|
|
105
|
+
|
|
106
|
+
[[ -z "$response" ]] && return 0
|
|
107
|
+
|
|
108
|
+
# Strip accidental markdown fences before parsing.
|
|
109
|
+
local clean
|
|
110
|
+
clean=$(printf '%s' "$response" | sed -e 's/^```json//' -e 's/^```//' -e 's/```$//')
|
|
111
|
+
|
|
112
|
+
# Validate the response shape before passing it back.
|
|
113
|
+
if ! printf '%s' "$clean" | jq -e '
|
|
114
|
+
(.type == null or (.type | IN("user", "feedback", "project", "reference")))
|
|
115
|
+
and (.title | type) == "string"
|
|
116
|
+
and (.body | type) == "string"
|
|
117
|
+
and (.confidence | type) == "number"
|
|
118
|
+
' >/dev/null 2>&1; then
|
|
119
|
+
return 0
|
|
120
|
+
fi
|
|
121
|
+
|
|
122
|
+
printf '%s' "$clean"
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Synthesize a deterministic filename from a classifier result.
|
|
126
|
+
# Used when writing accepted promotions into the typed memory store.
|
|
127
|
+
# Format: <type>_<slugified-title>.md
|
|
128
|
+
#
|
|
129
|
+
# Usage: librarian_classifier_filename <type> <title>
|
|
130
|
+
librarian_classifier_filename() {
|
|
131
|
+
local type="$1"
|
|
132
|
+
local title="$2"
|
|
133
|
+
local slug
|
|
134
|
+
slug=$(printf '%s' "$title" | tr '[:upper:]' '[:lower:]' \
|
|
135
|
+
| sed -E 's/[^a-z0-9]+/_/g; s/^_+|_+$//g' \
|
|
136
|
+
| cut -c1-60)
|
|
137
|
+
[[ -z "$slug" ]] && slug="memory"
|
|
138
|
+
printf '%s_%s.md' "$type" "$slug"
|
|
139
|
+
}
|