@onlooker-community/ecosystem 0.24.0 → 0.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +39 -13
- package/.claude-plugin/plugin.json +2 -2
- package/.release-please-manifest.json +5 -4
- package/CHANGELOG.md +14 -0
- package/CLAUDE.md +1 -0
- package/package.json +3 -3
- package/plugins/assayer/.claude-plugin/plugin.json +14 -0
- package/plugins/assayer/CHANGELOG.md +10 -0
- package/plugins/assayer/README.md +114 -0
- package/plugins/assayer/config.json +14 -0
- package/plugins/assayer/docs/adr/001-verify-claims-against-transcript-evidence.md +57 -0
- package/plugins/assayer/docs/design.md +72 -0
- package/plugins/assayer/hooks/hooks.json +15 -0
- package/plugins/assayer/scripts/hooks/assayer-stop.sh +249 -0
- package/plugins/assayer/scripts/lib/assayer-config.sh +88 -0
- package/plugins/assayer/scripts/lib/assayer-events.sh +85 -0
- package/plugins/assayer/scripts/lib/assayer-extract.sh +87 -0
- package/plugins/assayer/scripts/lib/assayer-project-key.sh +69 -0
- package/plugins/assayer/scripts/lib/assayer-transcript.sh +99 -0
- package/plugins/assayer/scripts/lib/assayer-ulid.sh +46 -0
- package/plugins/assayer/scripts/lib/assayer-verify.sh +95 -0
- package/plugins/cartographer/.claude-plugin/plugin.json +1 -1
- package/plugins/cartographer/CHANGELOG.md +7 -0
- package/plugins/cartographer/scripts/lib/cartographer-lock.sh +17 -7
- package/plugins/cartographer/scripts/lib/portable-lock.sh +57 -0
- package/plugins/governor/.claude-plugin/plugin.json +1 -1
- package/plugins/governor/CHANGELOG.md +7 -0
- package/plugins/governor/scripts/hooks/governor-post-tool-use.sh +6 -2
- package/plugins/governor/scripts/hooks/governor-pre-tool-use.sh +6 -2
- package/plugins/governor/scripts/hooks/governor-session-start.sh +6 -2
- package/plugins/governor/scripts/hooks/governor-stop.sh +6 -2
- package/plugins/governor/scripts/lib/portable-lock.sh +59 -0
- package/release-please-config.json +16 -0
- package/scripts/lib/portable-lock.sh +1 -1
- package/test/bats/assayer-config.bats +60 -0
- package/test/bats/assayer-events.bats +99 -0
- package/test/bats/assayer-extract.bats +76 -0
- package/test/bats/assayer-project-key.bats +58 -0
- package/test/bats/assayer-stop-hook.bats +81 -0
- package/test/bats/assayer-transcript.bats +72 -0
- package/test/bats/assayer-ulid.bats +31 -0
- package/test/bats/assayer-verify.bats +89 -0
- package/test/bats/cartographer-lock.bats +19 -0
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Assayer Stop hook.
|
|
3
|
+
#
|
|
4
|
+
# Triggered by Stop. Off by default — gated on assayer.enabled in config.
|
|
5
|
+
# When enabled, it reads the just-finished session's transcript, extracts the
|
|
6
|
+
# agent's testable success claims from its final message, and cross-checks each
|
|
7
|
+
# against the actual Bash command results in the same transcript. Each claim is
|
|
8
|
+
# classified corroborated / contradicted / unverified and emitted as an event.
|
|
9
|
+
#
|
|
10
|
+
# Hook contract:
|
|
11
|
+
# - Always exits 0. Advisory only — never blocks Stop.
|
|
12
|
+
# - Skips silently if disabled, no git context, no transcript, or no claims.
|
|
13
|
+
# - Recursion guard: exits immediately if ASSAYER_NESTED=1 to prevent a
|
|
14
|
+
# claude -p subprocess from re-triggering this hook on its own Stop.
|
|
15
|
+
# - Errors from `claude -p` are swallowed; worst case is no audit written.
|
|
16
|
+
|
|
17
|
+
set -uo pipefail
|
|
18
|
+
|
|
19
|
+
# Recursion guard — must be first.
|
|
20
|
+
[[ "${ASSAYER_NESTED:-}" == "1" ]] && exit 0
|
|
21
|
+
export ASSAYER_NESTED=1
|
|
22
|
+
|
|
23
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
24
|
+
PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
25
|
+
|
|
26
|
+
# Resolve the ecosystem root (sibling to this plugin's parent).
|
|
27
|
+
_ECOSYSTEM_ROOT="${ONLOOKER_ECOSYSTEM_ROOT:-}"
|
|
28
|
+
if [[ -z "$_ECOSYSTEM_ROOT" ]]; then
|
|
29
|
+
_candidate="$(cd "${PLUGIN_ROOT}/../.." 2>/dev/null && pwd)"
|
|
30
|
+
if [[ -f "${_candidate}/scripts/lib/validate-path.sh" ]]; then
|
|
31
|
+
_ECOSYSTEM_ROOT="$_candidate"
|
|
32
|
+
fi
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
if [[ -n "$_ECOSYSTEM_ROOT" && -f "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh" ]]; then
|
|
36
|
+
# shellcheck disable=SC1091
|
|
37
|
+
CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/validate-path.sh"
|
|
38
|
+
# shellcheck disable=SC1091
|
|
39
|
+
CLAUDE_PLUGIN_ROOT="$_ECOSYSTEM_ROOT" source "${_ECOSYSTEM_ROOT}/scripts/lib/onlooker-schema.sh"
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
# shellcheck source=../lib/assayer-config.sh
|
|
43
|
+
source "${PLUGIN_ROOT}/scripts/lib/assayer-config.sh"
|
|
44
|
+
# shellcheck source=../lib/assayer-project-key.sh
|
|
45
|
+
source "${PLUGIN_ROOT}/scripts/lib/assayer-project-key.sh"
|
|
46
|
+
# shellcheck source=../lib/assayer-ulid.sh
|
|
47
|
+
source "${PLUGIN_ROOT}/scripts/lib/assayer-ulid.sh"
|
|
48
|
+
# shellcheck source=../lib/assayer-transcript.sh
|
|
49
|
+
source "${PLUGIN_ROOT}/scripts/lib/assayer-transcript.sh"
|
|
50
|
+
# shellcheck source=../lib/assayer-extract.sh
|
|
51
|
+
source "${PLUGIN_ROOT}/scripts/lib/assayer-extract.sh"
|
|
52
|
+
# shellcheck source=../lib/assayer-verify.sh
|
|
53
|
+
source "${PLUGIN_ROOT}/scripts/lib/assayer-verify.sh"
|
|
54
|
+
# shellcheck source=../lib/assayer-events.sh
|
|
55
|
+
CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" source "${PLUGIN_ROOT}/scripts/lib/assayer-events.sh"
|
|
56
|
+
|
|
57
|
+
INPUT=$(cat)
|
|
58
|
+
CWD=$(printf '%s' "$INPUT" | jq -r '.cwd // ""' 2>/dev/null) || CWD=""
|
|
59
|
+
SESSION_ID=$(printf '%s' "$INPUT" | jq -r '.session_id // ""' 2>/dev/null) || SESSION_ID=""
|
|
60
|
+
TRANSCRIPT_PATH=$(printf '%s' "$INPUT" | jq -r '.transcript_path // ""' 2>/dev/null) || TRANSCRIPT_PATH=""
|
|
61
|
+
[[ -z "$TRANSCRIPT_PATH" ]] && TRANSCRIPT_PATH="${CLAUDE_TRANSCRIPT_PATH:-}"
|
|
62
|
+
|
|
63
|
+
export _HOOK_SESSION_ID="${SESSION_ID:-unknown}"
|
|
64
|
+
|
|
65
|
+
_done() { exit 0; }
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# Config + prerequisites
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
REPO_ROOT=$(assayer_project_repo_root "$CWD")
|
|
72
|
+
[[ -z "$REPO_ROOT" ]] && _done
|
|
73
|
+
|
|
74
|
+
CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" assayer_config_load "$REPO_ROOT"
|
|
75
|
+
assayer_config_enabled || _done
|
|
76
|
+
|
|
77
|
+
PROJECT_KEY=$(assayer_project_key "$CWD")
|
|
78
|
+
[[ -z "$PROJECT_KEY" ]] && _done
|
|
79
|
+
|
|
80
|
+
command -v claude >/dev/null 2>&1 || _done
|
|
81
|
+
command -v jq >/dev/null 2>&1 || _done
|
|
82
|
+
|
|
83
|
+
[[ -f "$TRANSCRIPT_PATH" ]] || _done
|
|
84
|
+
|
|
85
|
+
# ---------------------------------------------------------------------------
|
|
86
|
+
# Read transcript: final message + command evidence
|
|
87
|
+
# ---------------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
FINAL_MESSAGE_CHARS=$(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" assayer_config_final_message_chars)
|
|
90
|
+
FINAL_MESSAGE=$(assayer_final_assistant_message "$TRANSCRIPT_PATH" "$FINAL_MESSAGE_CHARS")
|
|
91
|
+
[[ -z "$FINAL_MESSAGE" ]] && _done
|
|
92
|
+
|
|
93
|
+
COMMANDS=$(assayer_collect_commands "$TRANSCRIPT_PATH")
|
|
94
|
+
COMMAND_COUNT=$(printf '%s' "$COMMANDS" | jq 'length' 2>/dev/null) || COMMAND_COUNT=0
|
|
95
|
+
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
# Extract claims via claude -p
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
MAX_CLAIMS=$(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" assayer_config_max_claims)
|
|
101
|
+
MIN_CONFIDENCE=$(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" assayer_config_min_confidence)
|
|
102
|
+
EVAL_MODEL=$(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" assayer_config_model)
|
|
103
|
+
TIMEOUT_SECS=$(CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" assayer_config_timeout)
|
|
104
|
+
|
|
105
|
+
PROMPT_FILE=$(mktemp -t assayer-prompt.XXXXXX 2>/dev/null) || PROMPT_FILE="/tmp/assayer-prompt.$$"
|
|
106
|
+
trap 'rm -f "$PROMPT_FILE"' EXIT
|
|
107
|
+
assayer_build_extraction_prompt "$FINAL_MESSAGE" "$MAX_CLAIMS" >"$PROMPT_FILE"
|
|
108
|
+
|
|
109
|
+
CLAUDE_ARGS=(-p --max-turns 1)
|
|
110
|
+
[[ -n "$EVAL_MODEL" ]] && CLAUDE_ARGS+=(--model "$EVAL_MODEL")
|
|
111
|
+
|
|
112
|
+
RESPONSE=""
|
|
113
|
+
if command -v timeout >/dev/null 2>&1; then
|
|
114
|
+
RESPONSE=$(timeout "$TIMEOUT_SECS" claude "${CLAUDE_ARGS[@]}" <"$PROMPT_FILE" 2>/dev/null) || RESPONSE=""
|
|
115
|
+
elif command -v gtimeout >/dev/null 2>&1; then
|
|
116
|
+
RESPONSE=$(gtimeout "$TIMEOUT_SECS" claude "${CLAUDE_ARGS[@]}" <"$PROMPT_FILE" 2>/dev/null) || RESPONSE=""
|
|
117
|
+
else
|
|
118
|
+
RESPONSE=$(claude "${CLAUDE_ARGS[@]}" <"$PROMPT_FILE" 2>/dev/null) || RESPONSE=""
|
|
119
|
+
fi
|
|
120
|
+
[[ -z "$RESPONSE" ]] && _done
|
|
121
|
+
|
|
122
|
+
CLAIMS=$(assayer_parse_claims "$RESPONSE")
|
|
123
|
+
CLAIM_COUNT=$(printf '%s' "$CLAIMS" | jq 'length' 2>/dev/null) || CLAIM_COUNT=0
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Audit
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
AUDIT_ID=$(assayer_ulid)
|
|
130
|
+
AUDIT_START=$(python3 -c 'import time; print(int(time.time()*1000))' 2>/dev/null || echo 0)
|
|
131
|
+
|
|
132
|
+
started_payload=$(jq -n \
|
|
133
|
+
--arg audit_id "$AUDIT_ID" \
|
|
134
|
+
--argjson claim_count "$CLAIM_COUNT" \
|
|
135
|
+
--arg trigger "stop" \
|
|
136
|
+
--argjson command_count "${COMMAND_COUNT:-0}" \
|
|
137
|
+
'{audit_id: $audit_id, claim_count: $claim_count, trigger: $trigger, command_count: $command_count}')
|
|
138
|
+
assayer_emit_event "assayer.audit.started" "$started_payload" || true
|
|
139
|
+
|
|
140
|
+
ONLOOKER_BASE="${ONLOOKER_DIR:-$HOME/.onlooker}"
|
|
141
|
+
ASSAYER_DIR="${ONLOOKER_BASE}/assayer/${PROJECT_KEY}"
|
|
142
|
+
mkdir -p "$ASSAYER_DIR" 2>/dev/null || true
|
|
143
|
+
|
|
144
|
+
count_corroborated=0
|
|
145
|
+
count_contradicted=0
|
|
146
|
+
count_unverified=0
|
|
147
|
+
checked_claims="[]"
|
|
148
|
+
|
|
149
|
+
while IFS= read -r claim; do
|
|
150
|
+
[[ -z "$claim" ]] && continue
|
|
151
|
+
|
|
152
|
+
# Confidence floor — skip low-confidence extractions. Compare with awk via
|
|
153
|
+
# -v bindings (not string-interpolated into code), so an LLM- or
|
|
154
|
+
# config-supplied value is treated as a number and a non-numeric value
|
|
155
|
+
# degrades to 0 instead of executing as code.
|
|
156
|
+
conf=$(printf '%s' "$claim" | jq -r '.confidence // 0.6' 2>/dev/null) || conf="0.6"
|
|
157
|
+
if awk -v a="$conf" -v b="$MIN_CONFIDENCE" 'BEGIN { exit !(a >= b) }' 2>/dev/null; then
|
|
158
|
+
keep=1
|
|
159
|
+
else
|
|
160
|
+
keep=0
|
|
161
|
+
fi
|
|
162
|
+
[[ "$keep" != "1" ]] && continue
|
|
163
|
+
|
|
164
|
+
claim_text=$(printf '%s' "$claim" | jq -r '.text // ""' 2>/dev/null) || claim_text=""
|
|
165
|
+
claim_type=$(printf '%s' "$claim" | jq -r '.type // "generic"' 2>/dev/null) || claim_type="generic"
|
|
166
|
+
[[ -z "$claim_text" ]] && continue
|
|
167
|
+
|
|
168
|
+
verdict_obj=$(assayer_classify_claim "$claim" "$COMMANDS")
|
|
169
|
+
verdict=$(printf '%s' "$verdict_obj" | jq -r '.verdict // "unverified"' 2>/dev/null) || verdict="unverified"
|
|
170
|
+
|
|
171
|
+
case "$verdict" in
|
|
172
|
+
contradicted)
|
|
173
|
+
count_contradicted=$((count_contradicted + 1))
|
|
174
|
+
evidence_command=$(printf '%s' "$verdict_obj" | jq -r '.evidence_command // ""' 2>/dev/null) || evidence_command=""
|
|
175
|
+
excerpt=$(printf '%s' "$verdict_obj" | jq -r '.excerpt // ""' 2>/dev/null) || excerpt=""
|
|
176
|
+
contradicted_payload=$(jq -n \
|
|
177
|
+
--arg audit_id "$AUDIT_ID" \
|
|
178
|
+
--arg claim "$claim_text" \
|
|
179
|
+
--arg claim_type "$claim_type" \
|
|
180
|
+
--arg evidence_command "$evidence_command" \
|
|
181
|
+
--arg result_excerpt "$excerpt" \
|
|
182
|
+
--argjson confidence "$conf" \
|
|
183
|
+
'{audit_id: $audit_id, claim: $claim, claim_type: $claim_type,
|
|
184
|
+
evidence_command: $evidence_command, result_excerpt: $result_excerpt,
|
|
185
|
+
confidence: $confidence}')
|
|
186
|
+
assayer_emit_event "assayer.claim.contradicted" "$contradicted_payload" || true
|
|
187
|
+
;;
|
|
188
|
+
corroborated)
|
|
189
|
+
count_corroborated=$((count_corroborated + 1))
|
|
190
|
+
;;
|
|
191
|
+
*)
|
|
192
|
+
count_unverified=$((count_unverified + 1))
|
|
193
|
+
reason=$(printf '%s' "$verdict_obj" | jq -r '.reason // "no_evidence"' 2>/dev/null) || reason="no_evidence"
|
|
194
|
+
unverified_payload=$(jq -n \
|
|
195
|
+
--arg audit_id "$AUDIT_ID" \
|
|
196
|
+
--arg claim "$claim_text" \
|
|
197
|
+
--arg claim_type "$claim_type" \
|
|
198
|
+
--arg reason "$reason" \
|
|
199
|
+
'{audit_id: $audit_id, claim: $claim, claim_type: $claim_type, reason: $reason}')
|
|
200
|
+
assayer_emit_event "assayer.claim.unverified" "$unverified_payload" || true
|
|
201
|
+
;;
|
|
202
|
+
esac
|
|
203
|
+
|
|
204
|
+
checked_claims=$(printf '%s' "$checked_claims" | jq -c \
|
|
205
|
+
--arg text "$claim_text" \
|
|
206
|
+
--arg verdict "$verdict" \
|
|
207
|
+
'. + [{text: $text, verdict: $verdict}]' 2>/dev/null) || true
|
|
208
|
+
done < <(printf '%s' "$CLAIMS" | jq -c '.[]' 2>/dev/null)
|
|
209
|
+
|
|
210
|
+
# ---------------------------------------------------------------------------
|
|
211
|
+
# Audit summary
|
|
212
|
+
# ---------------------------------------------------------------------------
|
|
213
|
+
|
|
214
|
+
AUDIT_END=$(python3 -c 'import time; print(int(time.time()*1000))' 2>/dev/null || echo 0)
|
|
215
|
+
DURATION_MS=$((AUDIT_END - AUDIT_START))
|
|
216
|
+
[[ "$DURATION_MS" -lt 0 ]] && DURATION_MS=0
|
|
217
|
+
|
|
218
|
+
VERDICT=$(assayer_audit_verdict "$count_contradicted" "$count_corroborated" "$count_unverified")
|
|
219
|
+
|
|
220
|
+
complete_payload=$(jq -n \
|
|
221
|
+
--arg audit_id "$AUDIT_ID" \
|
|
222
|
+
--argjson claim_count "$CLAIM_COUNT" \
|
|
223
|
+
--argjson corroborated "$count_corroborated" \
|
|
224
|
+
--argjson contradicted "$count_contradicted" \
|
|
225
|
+
--argjson unverified "$count_unverified" \
|
|
226
|
+
--arg verdict "$VERDICT" \
|
|
227
|
+
--argjson duration_ms "$DURATION_MS" \
|
|
228
|
+
'{audit_id: $audit_id, claim_count: $claim_count,
|
|
229
|
+
corroborated: $corroborated, contradicted: $contradicted,
|
|
230
|
+
unverified: $unverified, verdict: $verdict, duration_ms: $duration_ms}')
|
|
231
|
+
assayer_emit_event "assayer.audit.complete" "$complete_payload" || true
|
|
232
|
+
|
|
233
|
+
# Advisory file for review in the next session.
|
|
234
|
+
SAFE_SESSION_ID=$(printf '%s' "${SESSION_ID:-unknown}" | tr -c 'a-zA-Z0-9-' '_')
|
|
235
|
+
jq -n \
|
|
236
|
+
--arg audit_id "$AUDIT_ID" \
|
|
237
|
+
--arg session_id "${SESSION_ID:-unknown}" \
|
|
238
|
+
--argjson claim_count "$CLAIM_COUNT" \
|
|
239
|
+
--argjson corroborated "$count_corroborated" \
|
|
240
|
+
--argjson contradicted "$count_contradicted" \
|
|
241
|
+
--argjson unverified "$count_unverified" \
|
|
242
|
+
--arg verdict "$VERDICT" \
|
|
243
|
+
--argjson claims "$checked_claims" \
|
|
244
|
+
'{audit_id: $audit_id, session_id: $session_id, claim_count: $claim_count,
|
|
245
|
+
corroborated: $corroborated, contradicted: $contradicted,
|
|
246
|
+
unverified: $unverified, verdict: $verdict, claims: $claims}' \
|
|
247
|
+
>"${ASSAYER_DIR}/audit-${SAFE_SESSION_ID}.json" 2>/dev/null || true
|
|
248
|
+
|
|
249
|
+
_done
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Config loading for Assayer.
|
|
3
|
+
# Reads the repo's .claude/settings.json assayer.* keys, falling back to the
|
|
4
|
+
# plugin's own config.json defaults.
|
|
5
|
+
|
|
6
|
+
_ASSAYER_CONFIG_JSON=""
|
|
7
|
+
_ASSAYER_PLUGIN_CONFIG_JSON=""
|
|
8
|
+
|
|
9
|
+
assayer_config_load() {
|
|
10
|
+
local repo_root="${1:-}"
|
|
11
|
+
|
|
12
|
+
_ASSAYER_PLUGIN_CONFIG_JSON=""
|
|
13
|
+
local plugin_config="${CLAUDE_PLUGIN_ROOT:-}/config.json"
|
|
14
|
+
if [[ -f "$plugin_config" ]]; then
|
|
15
|
+
_ASSAYER_PLUGIN_CONFIG_JSON=$(cat "$plugin_config" 2>/dev/null) || _ASSAYER_PLUGIN_CONFIG_JSON=""
|
|
16
|
+
fi
|
|
17
|
+
|
|
18
|
+
_ASSAYER_CONFIG_JSON=""
|
|
19
|
+
if [[ -n "$repo_root" ]]; then
|
|
20
|
+
local settings_file="${repo_root}/.claude/settings.json"
|
|
21
|
+
if [[ -f "$settings_file" ]]; then
|
|
22
|
+
local settings
|
|
23
|
+
settings=$(cat "$settings_file" 2>/dev/null) || settings=""
|
|
24
|
+
local block
|
|
25
|
+
block=$(printf '%s' "$settings" | jq -c '.assayer // empty' 2>/dev/null) || block=""
|
|
26
|
+
[[ -n "$block" ]] && _ASSAYER_CONFIG_JSON="$block"
|
|
27
|
+
fi
|
|
28
|
+
fi
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# Get a single scalar value. Checks settings.json first, then plugin config.json.
|
|
32
|
+
assayer_config_get() {
|
|
33
|
+
local key="$1"
|
|
34
|
+
|
|
35
|
+
if [[ -n "$_ASSAYER_CONFIG_JSON" ]]; then
|
|
36
|
+
local val
|
|
37
|
+
val=$(printf '%s' "$_ASSAYER_CONFIG_JSON" | jq -r "${key} // empty" 2>/dev/null) || val=""
|
|
38
|
+
[[ -n "$val" && "$val" != "null" ]] && {
|
|
39
|
+
printf '%s' "$val"
|
|
40
|
+
return 0
|
|
41
|
+
}
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
if [[ -n "$_ASSAYER_PLUGIN_CONFIG_JSON" ]]; then
|
|
45
|
+
local val
|
|
46
|
+
val=$(printf '%s' "$_ASSAYER_PLUGIN_CONFIG_JSON" | jq -r ".assayer${key} // empty" 2>/dev/null) || val=""
|
|
47
|
+
[[ -n "$val" && "$val" != "null" ]] && {
|
|
48
|
+
printf '%s' "$val"
|
|
49
|
+
return 0
|
|
50
|
+
}
|
|
51
|
+
fi
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
assayer_config_enabled() {
|
|
55
|
+
local val
|
|
56
|
+
val=$(assayer_config_get '.enabled')
|
|
57
|
+
[[ "$val" == "true" ]]
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
assayer_config_model() {
|
|
61
|
+
local val
|
|
62
|
+
val=$(assayer_config_get '.evaluation.model')
|
|
63
|
+
printf '%s' "${val:-claude-haiku-4-5-20251001}"
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
assayer_config_timeout() {
|
|
67
|
+
local val
|
|
68
|
+
val=$(assayer_config_get '.evaluation.timeout_seconds')
|
|
69
|
+
printf '%s' "${val:-60}"
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
assayer_config_max_claims() {
|
|
73
|
+
local val
|
|
74
|
+
val=$(assayer_config_get '.max_claims')
|
|
75
|
+
printf '%s' "${val:-12}"
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
assayer_config_min_confidence() {
|
|
79
|
+
local val
|
|
80
|
+
val=$(assayer_config_get '.min_confidence')
|
|
81
|
+
printf '%s' "${val:-0.5}"
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
assayer_config_final_message_chars() {
|
|
85
|
+
local val
|
|
86
|
+
val=$(assayer_config_get '.final_message_chars')
|
|
87
|
+
printf '%s' "${val:-6000}"
|
|
88
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Canonical assayer.* event emission.
|
|
3
|
+
# Thin wrapper around the ecosystem plugin's onlooker-event.mjs `emit` mode.
|
|
4
|
+
# Every emission is validated against @onlooker-community/schema before being
|
|
5
|
+
# appended to ~/.onlooker/logs/onlooker-events.jsonl.
|
|
6
|
+
#
|
|
7
|
+
# Usage:
|
|
8
|
+
# assayer_emit_event "assayer.audit.started" '{"audit_id":"...","claim_count":3}'
|
|
9
|
+
|
|
10
|
+
_ASSAYER_PLUGIN_NAME="assayer"
|
|
11
|
+
|
|
12
|
+
_assayer_event_js_path() {
|
|
13
|
+
if [[ -n "${_ONLOOKER_EVENT_JS:-}" && -f "$_ONLOOKER_EVENT_JS" ]]; then
|
|
14
|
+
printf '%s' "$_ONLOOKER_EVENT_JS"
|
|
15
|
+
return 0
|
|
16
|
+
fi
|
|
17
|
+
local plugin_root="${CLAUDE_PLUGIN_ROOT:-}"
|
|
18
|
+
local candidates=(
|
|
19
|
+
"${plugin_root}/scripts/lib/onlooker-event.mjs"
|
|
20
|
+
"${plugin_root}/../../scripts/lib/onlooker-event.mjs"
|
|
21
|
+
)
|
|
22
|
+
local c
|
|
23
|
+
for c in "${candidates[@]}"; do
|
|
24
|
+
[[ -f "$c" ]] && {
|
|
25
|
+
printf '%s' "$c"
|
|
26
|
+
return 0
|
|
27
|
+
}
|
|
28
|
+
done
|
|
29
|
+
return 1
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
_assayer_session_id() {
|
|
33
|
+
if [[ -n "${_HOOK_SESSION_ID:-}" ]]; then
|
|
34
|
+
printf '%s' "$_HOOK_SESSION_ID"
|
|
35
|
+
return 0
|
|
36
|
+
fi
|
|
37
|
+
if [[ -n "${CLAUDE_SESSION_ID:-}" ]]; then
|
|
38
|
+
printf '%s' "$CLAUDE_SESSION_ID"
|
|
39
|
+
return 0
|
|
40
|
+
fi
|
|
41
|
+
printf 'unknown'
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# Emit a single assayer.* event. Returns 0 on success, non-zero on failure.
|
|
45
|
+
assayer_emit_event() {
|
|
46
|
+
local event_type="${1:-}"
|
|
47
|
+
local payload="${2:-}"
|
|
48
|
+
|
|
49
|
+
[[ -z "$event_type" || -z "$payload" ]] && return 1
|
|
50
|
+
|
|
51
|
+
local event_js
|
|
52
|
+
event_js=$(_assayer_event_js_path) || {
|
|
53
|
+
printf 'assayer-events: cannot locate onlooker-event.mjs\n' >&2
|
|
54
|
+
return 1
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
local session_id
|
|
58
|
+
session_id=$(_assayer_session_id)
|
|
59
|
+
|
|
60
|
+
local params
|
|
61
|
+
params=$(jq -n \
|
|
62
|
+
--arg plugin "$_ASSAYER_PLUGIN_NAME" \
|
|
63
|
+
--arg sid "$session_id" \
|
|
64
|
+
--arg type "$event_type" \
|
|
65
|
+
--argjson payload "$payload" \
|
|
66
|
+
'{plugin: $plugin, session_id: $sid, event_type: $type, payload: $payload}' \
|
|
67
|
+
2>/dev/null) || return 1
|
|
68
|
+
|
|
69
|
+
local event stderr_file
|
|
70
|
+
stderr_file=$(mktemp -t assayer-event-err.XXXXXX 2>/dev/null) || stderr_file="/tmp/assayer-event-err.$$"
|
|
71
|
+
event=$(printf '%s' "$params" \
|
|
72
|
+
| ONLOOKER_DIR="${ONLOOKER_DIR:-$HOME/.onlooker}" \
|
|
73
|
+
ONLOOKER_PLUGIN_NAME="$_ASSAYER_PLUGIN_NAME" \
|
|
74
|
+
node "$event_js" emit 2>"$stderr_file") || {
|
|
75
|
+
printf 'assayer_emit_event: schema validation failed for %s\n' "$event_type" >&2
|
|
76
|
+
[[ -s "$stderr_file" ]] && cat "$stderr_file" >&2
|
|
77
|
+
rm -f "$stderr_file"
|
|
78
|
+
return 1
|
|
79
|
+
}
|
|
80
|
+
rm -f "$stderr_file"
|
|
81
|
+
|
|
82
|
+
local log_path="${ONLOOKER_EVENTS_LOG:-${ONLOOKER_DIR:-$HOME/.onlooker}/logs/onlooker-events.jsonl}"
|
|
83
|
+
mkdir -p "$(dirname "$log_path")" 2>/dev/null || return 1
|
|
84
|
+
printf '%s\n' "$event" >>"$log_path"
|
|
85
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Claim extraction for Assayer.
|
|
3
|
+
#
|
|
4
|
+
# The language-understanding half of the plugin: an LLM reads the agent's
|
|
5
|
+
# final message and pulls out *testable success claims* — assertions that some
|
|
6
|
+
# command or check succeeded ("I ran the tests, they pass", "the build is
|
|
7
|
+
# green", "lint is clean"). Each claim is tagged with a type and a
|
|
8
|
+
# command_keyword that the deterministic verifier (assayer-verify.sh) uses to
|
|
9
|
+
# locate the command that would corroborate or contradict it.
|
|
10
|
+
#
|
|
11
|
+
# The LLM does not judge truth — it only identifies claims and what command
|
|
12
|
+
# would settle them. The factual cross-check is pure bash, so it stays
|
|
13
|
+
# deterministic and testable.
|
|
14
|
+
|
|
15
|
+
# Write the extraction prompt for a final assistant message to stdout.
|
|
16
|
+
# $1 — final assistant message text
|
|
17
|
+
# $2 — max_claims
|
|
18
|
+
assayer_build_extraction_prompt() {
|
|
19
|
+
local message="${1:-}"
|
|
20
|
+
local max_claims="${2:-12}"
|
|
21
|
+
|
|
22
|
+
printf '%s\n' 'You are auditing an AI coding agent'"'"'s final message to its user.'
|
|
23
|
+
printf '%s\n' 'Extract every TESTABLE SUCCESS CLAIM: a statement asserting that a command,'
|
|
24
|
+
printf '%s\n' 'test, build, lint, or type check was run and SUCCEEDED. Ignore plans, intentions,'
|
|
25
|
+
printf '%s\n' 'hedged statements ("should pass"), and claims about code that no shell command'
|
|
26
|
+
printf '%s\n' 'could confirm.'
|
|
27
|
+
printf '\n'
|
|
28
|
+
printf '%s\n' 'Return JSON only — no prose, no markdown fences. A JSON array, possibly empty:'
|
|
29
|
+
printf '%s\n' '['
|
|
30
|
+
printf '%s\n' ' {'
|
|
31
|
+
printf '%s\n' ' "text": "the exact claim, quoted from the message",'
|
|
32
|
+
printf '%s\n' ' "type": "tests_pass|build_succeeds|lint_clean|types_check|command_succeeds|generic",'
|
|
33
|
+
printf '%s\n' ' "command_keyword": "a lowercase substring you expect in the verifying shell command, e.g. test, build, lint, tsc",'
|
|
34
|
+
printf '%s\n' ' "confidence": 0.0..1.0'
|
|
35
|
+
printf '%s\n' ' }'
|
|
36
|
+
printf '%s\n' ']'
|
|
37
|
+
printf '\n'
|
|
38
|
+
printf '%s\n' "Extract at most ${max_claims} claims, highest-confidence first."
|
|
39
|
+
printf '\n'
|
|
40
|
+
printf '%s\n' '---AGENT FINAL MESSAGE---'
|
|
41
|
+
printf '%s\n' "$message"
|
|
42
|
+
printf '%s\n' '---END MESSAGE---'
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Parse a claude -p response into a clean JSON array of claims.
|
|
46
|
+
# Strips markdown fences, validates it is a JSON array, and drops malformed
|
|
47
|
+
# entries. Echoes a compact JSON array (or "[]").
|
|
48
|
+
# $1 — raw response text
|
|
49
|
+
assayer_parse_claims() {
|
|
50
|
+
local raw="${1:-}"
|
|
51
|
+
[[ -z "$raw" ]] && {
|
|
52
|
+
printf '[]'
|
|
53
|
+
return 0
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Strip leading/trailing markdown fences if present.
|
|
57
|
+
local clean
|
|
58
|
+
clean=$(printf '%s' "$raw" | sed -e 's/^```json//' -e 's/^```//' -e 's/```$//')
|
|
59
|
+
|
|
60
|
+
# Validate as a JSON array; keep only well-formed claim objects with a
|
|
61
|
+
# non-empty text and a recognized type.
|
|
62
|
+
local parsed
|
|
63
|
+
parsed=$(printf '%s' "$clean" | jq -c '
|
|
64
|
+
if type == "array" then
|
|
65
|
+
[ .[]
|
|
66
|
+
| select(type == "object")
|
|
67
|
+
| select((.text // "") != "")
|
|
68
|
+
| {
|
|
69
|
+
text: .text,
|
|
70
|
+
type: (
|
|
71
|
+
if (.type // "") | test("^(tests_pass|build_succeeds|lint_clean|types_check|command_succeeds|generic)$")
|
|
72
|
+
then .type else "generic" end
|
|
73
|
+
),
|
|
74
|
+
command_keyword: ((.command_keyword // "") | ascii_downcase),
|
|
75
|
+
confidence: (
|
|
76
|
+
if (.confidence | type) == "number" then .confidence else 0.6 end
|
|
77
|
+
)
|
|
78
|
+
}
|
|
79
|
+
]
|
|
80
|
+
else
|
|
81
|
+
[]
|
|
82
|
+
end
|
|
83
|
+
' 2>/dev/null) || parsed="[]"
|
|
84
|
+
|
|
85
|
+
[[ -z "$parsed" || "$parsed" == "null" ]] && parsed="[]"
|
|
86
|
+
printf '%s' "$parsed"
|
|
87
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Project key derivation for Assayer.
|
|
3
|
+
# Mirrors echo/archivist/tribunal: stable 12-char hex key derived from the git
|
|
4
|
+
# remote or repo root, surviving renames, clones, and worktrees.
|
|
5
|
+
|
|
6
|
+
_assayer_sha256_first12() {
|
|
7
|
+
local input="$1"
|
|
8
|
+
if command -v shasum >/dev/null 2>&1; then
|
|
9
|
+
printf '%s' "$input" | shasum -a 256 2>/dev/null | cut -c1-12
|
|
10
|
+
elif command -v sha256sum >/dev/null 2>&1; then
|
|
11
|
+
printf '%s' "$input" | sha256sum 2>/dev/null | cut -c1-12
|
|
12
|
+
else
|
|
13
|
+
return 1
|
|
14
|
+
fi
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
assayer_project_remote_url() {
|
|
18
|
+
local cwd="${1:-}"
|
|
19
|
+
[[ -z "$cwd" || ! -d "$cwd" ]] && return 0
|
|
20
|
+
git -C "$cwd" remote get-url origin 2>/dev/null || true
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
assayer_project_repo_root() {
|
|
24
|
+
local cwd="${1:-}"
|
|
25
|
+
[[ -z "$cwd" || ! -d "$cwd" ]] && return 0
|
|
26
|
+
|
|
27
|
+
if ! git -C "$cwd" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
|
|
28
|
+
return 0
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
local common_dir toplevel
|
|
32
|
+
common_dir=$(git -C "$cwd" rev-parse --git-common-dir 2>/dev/null) || return 0
|
|
33
|
+
|
|
34
|
+
if [[ -n "$common_dir" && "$common_dir" != /* ]]; then
|
|
35
|
+
common_dir="$(cd "$cwd" && cd "$common_dir" 2>/dev/null && pwd -P)" || common_dir=""
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
if [[ -n "$common_dir" && -d "$common_dir" ]]; then
|
|
39
|
+
toplevel="$(cd "$common_dir/.." 2>/dev/null && pwd -P)" || toplevel=""
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
if [[ -z "$toplevel" ]]; then
|
|
43
|
+
toplevel=$(git -C "$cwd" rev-parse --show-toplevel 2>/dev/null || true)
|
|
44
|
+
[[ -n "$toplevel" ]] && toplevel="$(cd "$toplevel" 2>/dev/null && pwd -P)"
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
printf '%s' "$toplevel"
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
assayer_project_key() {
|
|
51
|
+
local cwd="${1:-}"
|
|
52
|
+
[[ -z "$cwd" ]] && cwd="$(pwd)"
|
|
53
|
+
|
|
54
|
+
local remote
|
|
55
|
+
remote=$(assayer_project_remote_url "$cwd")
|
|
56
|
+
if [[ -n "$remote" ]]; then
|
|
57
|
+
_assayer_sha256_first12 "remote:$remote"
|
|
58
|
+
return 0
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
local root
|
|
62
|
+
root=$(assayer_project_repo_root "$cwd")
|
|
63
|
+
if [[ -n "$root" ]]; then
|
|
64
|
+
_assayer_sha256_first12 "root:$root"
|
|
65
|
+
return 0
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
return 0
|
|
69
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Transcript reader for Assayer.
|
|
3
|
+
#
|
|
4
|
+
# The Stop hook payload carries `transcript_path` — a JSONL file already
|
|
5
|
+
# committed to disk before Stop fires (same field tribunal and compass read).
|
|
6
|
+
# Assayer needs two things from it:
|
|
7
|
+
#
|
|
8
|
+
# 1. The final assistant message — the text the agent left the user with,
|
|
9
|
+
# where claims like "I ran the tests, they pass" live.
|
|
10
|
+
# 2. The session's Bash commands paired with their result status — the
|
|
11
|
+
# factual record to check those claims against.
|
|
12
|
+
#
|
|
13
|
+
# Claude Code transcripts represent a Bash invocation as a `tool_use` block
|
|
14
|
+
# (name "Bash", with `.input.command`) on an assistant line, and its outcome
|
|
15
|
+
# as a `tool_result` block on a following user line carrying the same
|
|
16
|
+
# `tool_use_id` and an `is_error` flag. There is no per-call numeric exit code
|
|
17
|
+
# in the transcript, so `is_error` is the success/failure signal.
|
|
18
|
+
|
|
19
|
+
# Echo the final assistant message text (text blocks of the last assistant
|
|
20
|
+
# turn that contains any), truncated to max_chars. Empty if unavailable.
|
|
21
|
+
# $1 — transcript_path
|
|
22
|
+
# $2 — max_chars (default 6000)
|
|
23
|
+
assayer_final_assistant_message() {
|
|
24
|
+
local transcript_path="${1:-}"
|
|
25
|
+
local max_chars="${2:-6000}"
|
|
26
|
+
|
|
27
|
+
[[ -f "$transcript_path" ]] || return 0
|
|
28
|
+
|
|
29
|
+
local text
|
|
30
|
+
text=$(jq -s -r '
|
|
31
|
+
[ .[]
|
|
32
|
+
| select(.type == "assistant")
|
|
33
|
+
| select(any(.message.content[]?; .type == "text"))
|
|
34
|
+
]
|
|
35
|
+
| last
|
|
36
|
+
| if . == null then ""
|
|
37
|
+
else [ .message.content[]? | select(.type == "text") | .text ] | join("\n")
|
|
38
|
+
end
|
|
39
|
+
' "$transcript_path" 2>/dev/null) || text=""
|
|
40
|
+
|
|
41
|
+
[[ -z "$text" ]] && return 0
|
|
42
|
+
printf '%s' "${text:0:$max_chars}"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Echo a JSON array of the session's Bash commands paired with result status:
|
|
46
|
+
# [ { "command": "...", "is_error": true|false, "excerpt": "..." }, ... ]
|
|
47
|
+
# Ordered as they appear in the transcript. `is_error` is false when the
|
|
48
|
+
# matching tool_result is absent or its is_error flag is not true.
|
|
49
|
+
# $1 — transcript_path
|
|
50
|
+
assayer_collect_commands() {
|
|
51
|
+
local transcript_path="${1:-}"
|
|
52
|
+
|
|
53
|
+
[[ -f "$transcript_path" ]] || {
|
|
54
|
+
printf '[]'
|
|
55
|
+
return 0
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
local out
|
|
59
|
+
out=$(jq -s -c '
|
|
60
|
+
(
|
|
61
|
+
[ .[]
|
|
62
|
+
| select(.type == "assistant")
|
|
63
|
+
| .message.content[]?
|
|
64
|
+
| select(.type == "tool_use" and .name == "Bash")
|
|
65
|
+
| { id: .id, command: (.input.command // "") }
|
|
66
|
+
]
|
|
67
|
+
) as $calls
|
|
68
|
+
|
|
|
69
|
+
(
|
|
70
|
+
[ .[]
|
|
71
|
+
| select(.type == "user")
|
|
72
|
+
| .message.content[]?
|
|
73
|
+
| select(.type == "tool_result")
|
|
74
|
+
| {
|
|
75
|
+
id: .tool_use_id,
|
|
76
|
+
is_error: (.is_error == true),
|
|
77
|
+
excerpt: (
|
|
78
|
+
if (.content | type) == "string" then .content
|
|
79
|
+
elif (.content | type) == "array" then
|
|
80
|
+
([ .content[]? | select(.type == "text") | .text ] | join("\n"))
|
|
81
|
+
else "" end
|
|
82
|
+
)
|
|
83
|
+
}
|
|
84
|
+
]
|
|
85
|
+
) as $results
|
|
86
|
+
|
|
|
87
|
+
[ $calls[]
|
|
88
|
+
| . as $c
|
|
89
|
+
| {
|
|
90
|
+
command: $c.command,
|
|
91
|
+
is_error: (first($results[] | select(.id == $c.id) | .is_error) // false),
|
|
92
|
+
excerpt: ((first($results[] | select(.id == $c.id) | .excerpt) // "")[0:240])
|
|
93
|
+
}
|
|
94
|
+
]
|
|
95
|
+
' "$transcript_path" 2>/dev/null) || out=""
|
|
96
|
+
|
|
97
|
+
[[ -z "$out" || "$out" == "null" ]] && out="[]"
|
|
98
|
+
printf '%s' "$out"
|
|
99
|
+
}
|