@onlooker-community/ecosystem 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +13 -0
- package/.claude-plugin/plugin.json +1 -1
- package/.release-please-manifest.json +3 -2
- package/CHANGELOG.md +7 -0
- package/CLAUDE.md +1 -0
- package/package.json +3 -3
- package/plugins/assayer/.claude-plugin/plugin.json +14 -0
- package/plugins/assayer/CHANGELOG.md +10 -0
- package/plugins/assayer/README.md +114 -0
- package/plugins/assayer/config.json +14 -0
- package/plugins/assayer/docs/adr/001-verify-claims-against-transcript-evidence.md +57 -0
- package/plugins/assayer/docs/design.md +72 -0
- package/plugins/assayer/hooks/hooks.json +15 -0
- package/plugins/assayer/scripts/hooks/assayer-stop.sh +249 -0
- package/plugins/assayer/scripts/lib/assayer-config.sh +88 -0
- package/plugins/assayer/scripts/lib/assayer-events.sh +85 -0
- package/plugins/assayer/scripts/lib/assayer-extract.sh +87 -0
- package/plugins/assayer/scripts/lib/assayer-project-key.sh +69 -0
- package/plugins/assayer/scripts/lib/assayer-transcript.sh +99 -0
- package/plugins/assayer/scripts/lib/assayer-ulid.sh +46 -0
- package/plugins/assayer/scripts/lib/assayer-verify.sh +95 -0
- package/release-please-config.json +16 -0
- package/test/bats/assayer-config.bats +60 -0
- package/test/bats/assayer-events.bats +99 -0
- package/test/bats/assayer-extract.bats +76 -0
- package/test/bats/assayer-project-key.bats +58 -0
- package/test/bats/assayer-stop-hook.bats +81 -0
- package/test/bats/assayer-transcript.bats +72 -0
- package/test/bats/assayer-ulid.bats +31 -0
- package/test/bats/assayer-verify.bats +89 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Config loading for Assayer.
|
|
3
|
+
# Reads the repo's .claude/settings.json assayer.* keys, falling back to the
|
|
4
|
+
# plugin's own config.json defaults.
|
|
5
|
+
|
|
6
|
+
_ASSAYER_CONFIG_JSON=""
|
|
7
|
+
_ASSAYER_PLUGIN_CONFIG_JSON=""
|
|
8
|
+
|
|
9
|
+
assayer_config_load() {
|
|
10
|
+
local repo_root="${1:-}"
|
|
11
|
+
|
|
12
|
+
_ASSAYER_PLUGIN_CONFIG_JSON=""
|
|
13
|
+
local plugin_config="${CLAUDE_PLUGIN_ROOT:-}/config.json"
|
|
14
|
+
if [[ -f "$plugin_config" ]]; then
|
|
15
|
+
_ASSAYER_PLUGIN_CONFIG_JSON=$(cat "$plugin_config" 2>/dev/null) || _ASSAYER_PLUGIN_CONFIG_JSON=""
|
|
16
|
+
fi
|
|
17
|
+
|
|
18
|
+
_ASSAYER_CONFIG_JSON=""
|
|
19
|
+
if [[ -n "$repo_root" ]]; then
|
|
20
|
+
local settings_file="${repo_root}/.claude/settings.json"
|
|
21
|
+
if [[ -f "$settings_file" ]]; then
|
|
22
|
+
local settings
|
|
23
|
+
settings=$(cat "$settings_file" 2>/dev/null) || settings=""
|
|
24
|
+
local block
|
|
25
|
+
block=$(printf '%s' "$settings" | jq -c '.assayer // empty' 2>/dev/null) || block=""
|
|
26
|
+
[[ -n "$block" ]] && _ASSAYER_CONFIG_JSON="$block"
|
|
27
|
+
fi
|
|
28
|
+
fi
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# Get a single scalar value. Checks settings.json first, then plugin config.json.
|
|
32
|
+
assayer_config_get() {
|
|
33
|
+
local key="$1"
|
|
34
|
+
|
|
35
|
+
if [[ -n "$_ASSAYER_CONFIG_JSON" ]]; then
|
|
36
|
+
local val
|
|
37
|
+
val=$(printf '%s' "$_ASSAYER_CONFIG_JSON" | jq -r "${key} // empty" 2>/dev/null) || val=""
|
|
38
|
+
[[ -n "$val" && "$val" != "null" ]] && {
|
|
39
|
+
printf '%s' "$val"
|
|
40
|
+
return 0
|
|
41
|
+
}
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
if [[ -n "$_ASSAYER_PLUGIN_CONFIG_JSON" ]]; then
|
|
45
|
+
local val
|
|
46
|
+
val=$(printf '%s' "$_ASSAYER_PLUGIN_CONFIG_JSON" | jq -r ".assayer${key} // empty" 2>/dev/null) || val=""
|
|
47
|
+
[[ -n "$val" && "$val" != "null" ]] && {
|
|
48
|
+
printf '%s' "$val"
|
|
49
|
+
return 0
|
|
50
|
+
}
|
|
51
|
+
fi
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
assayer_config_enabled() {
|
|
55
|
+
local val
|
|
56
|
+
val=$(assayer_config_get '.enabled')
|
|
57
|
+
[[ "$val" == "true" ]]
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
assayer_config_model() {
|
|
61
|
+
local val
|
|
62
|
+
val=$(assayer_config_get '.evaluation.model')
|
|
63
|
+
printf '%s' "${val:-claude-haiku-4-5-20251001}"
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
assayer_config_timeout() {
|
|
67
|
+
local val
|
|
68
|
+
val=$(assayer_config_get '.evaluation.timeout_seconds')
|
|
69
|
+
printf '%s' "${val:-60}"
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
assayer_config_max_claims() {
|
|
73
|
+
local val
|
|
74
|
+
val=$(assayer_config_get '.max_claims')
|
|
75
|
+
printf '%s' "${val:-12}"
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
assayer_config_min_confidence() {
|
|
79
|
+
local val
|
|
80
|
+
val=$(assayer_config_get '.min_confidence')
|
|
81
|
+
printf '%s' "${val:-0.5}"
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
assayer_config_final_message_chars() {
|
|
85
|
+
local val
|
|
86
|
+
val=$(assayer_config_get '.final_message_chars')
|
|
87
|
+
printf '%s' "${val:-6000}"
|
|
88
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Canonical assayer.* event emission.
|
|
3
|
+
# Thin wrapper around the ecosystem plugin's onlooker-event.mjs `emit` mode.
|
|
4
|
+
# Every emission is validated against @onlooker-community/schema before being
|
|
5
|
+
# appended to ~/.onlooker/logs/onlooker-events.jsonl.
|
|
6
|
+
#
|
|
7
|
+
# Usage:
|
|
8
|
+
# assayer_emit_event "assayer.audit.started" '{"audit_id":"...","claim_count":3}'
|
|
9
|
+
|
|
10
|
+
_ASSAYER_PLUGIN_NAME="assayer"
|
|
11
|
+
|
|
12
|
+
_assayer_event_js_path() {
|
|
13
|
+
if [[ -n "${_ONLOOKER_EVENT_JS:-}" && -f "$_ONLOOKER_EVENT_JS" ]]; then
|
|
14
|
+
printf '%s' "$_ONLOOKER_EVENT_JS"
|
|
15
|
+
return 0
|
|
16
|
+
fi
|
|
17
|
+
local plugin_root="${CLAUDE_PLUGIN_ROOT:-}"
|
|
18
|
+
local candidates=(
|
|
19
|
+
"${plugin_root}/scripts/lib/onlooker-event.mjs"
|
|
20
|
+
"${plugin_root}/../../scripts/lib/onlooker-event.mjs"
|
|
21
|
+
)
|
|
22
|
+
local c
|
|
23
|
+
for c in "${candidates[@]}"; do
|
|
24
|
+
[[ -f "$c" ]] && {
|
|
25
|
+
printf '%s' "$c"
|
|
26
|
+
return 0
|
|
27
|
+
}
|
|
28
|
+
done
|
|
29
|
+
return 1
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
_assayer_session_id() {
|
|
33
|
+
if [[ -n "${_HOOK_SESSION_ID:-}" ]]; then
|
|
34
|
+
printf '%s' "$_HOOK_SESSION_ID"
|
|
35
|
+
return 0
|
|
36
|
+
fi
|
|
37
|
+
if [[ -n "${CLAUDE_SESSION_ID:-}" ]]; then
|
|
38
|
+
printf '%s' "$CLAUDE_SESSION_ID"
|
|
39
|
+
return 0
|
|
40
|
+
fi
|
|
41
|
+
printf 'unknown'
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
# Emit a single assayer.* event. Returns 0 on success, non-zero on failure.
|
|
45
|
+
assayer_emit_event() {
|
|
46
|
+
local event_type="${1:-}"
|
|
47
|
+
local payload="${2:-}"
|
|
48
|
+
|
|
49
|
+
[[ -z "$event_type" || -z "$payload" ]] && return 1
|
|
50
|
+
|
|
51
|
+
local event_js
|
|
52
|
+
event_js=$(_assayer_event_js_path) || {
|
|
53
|
+
printf 'assayer-events: cannot locate onlooker-event.mjs\n' >&2
|
|
54
|
+
return 1
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
local session_id
|
|
58
|
+
session_id=$(_assayer_session_id)
|
|
59
|
+
|
|
60
|
+
local params
|
|
61
|
+
params=$(jq -n \
|
|
62
|
+
--arg plugin "$_ASSAYER_PLUGIN_NAME" \
|
|
63
|
+
--arg sid "$session_id" \
|
|
64
|
+
--arg type "$event_type" \
|
|
65
|
+
--argjson payload "$payload" \
|
|
66
|
+
'{plugin: $plugin, session_id: $sid, event_type: $type, payload: $payload}' \
|
|
67
|
+
2>/dev/null) || return 1
|
|
68
|
+
|
|
69
|
+
local event stderr_file
|
|
70
|
+
stderr_file=$(mktemp -t assayer-event-err.XXXXXX 2>/dev/null) || stderr_file="/tmp/assayer-event-err.$$"
|
|
71
|
+
event=$(printf '%s' "$params" \
|
|
72
|
+
| ONLOOKER_DIR="${ONLOOKER_DIR:-$HOME/.onlooker}" \
|
|
73
|
+
ONLOOKER_PLUGIN_NAME="$_ASSAYER_PLUGIN_NAME" \
|
|
74
|
+
node "$event_js" emit 2>"$stderr_file") || {
|
|
75
|
+
printf 'assayer_emit_event: schema validation failed for %s\n' "$event_type" >&2
|
|
76
|
+
[[ -s "$stderr_file" ]] && cat "$stderr_file" >&2
|
|
77
|
+
rm -f "$stderr_file"
|
|
78
|
+
return 1
|
|
79
|
+
}
|
|
80
|
+
rm -f "$stderr_file"
|
|
81
|
+
|
|
82
|
+
local log_path="${ONLOOKER_EVENTS_LOG:-${ONLOOKER_DIR:-$HOME/.onlooker}/logs/onlooker-events.jsonl}"
|
|
83
|
+
mkdir -p "$(dirname "$log_path")" 2>/dev/null || return 1
|
|
84
|
+
printf '%s\n' "$event" >>"$log_path"
|
|
85
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Claim extraction for Assayer.
|
|
3
|
+
#
|
|
4
|
+
# The language-understanding half of the plugin: an LLM reads the agent's
|
|
5
|
+
# final message and pulls out *testable success claims* — assertions that some
|
|
6
|
+
# command or check succeeded ("I ran the tests, they pass", "the build is
|
|
7
|
+
# green", "lint is clean"). Each claim is tagged with a type and a
|
|
8
|
+
# command_keyword that the deterministic verifier (assayer-verify.sh) uses to
|
|
9
|
+
# locate the command that would corroborate or contradict it.
|
|
10
|
+
#
|
|
11
|
+
# The LLM does not judge truth — it only identifies claims and what command
|
|
12
|
+
# would settle them. The factual cross-check is pure bash, so it stays
|
|
13
|
+
# deterministic and testable.
|
|
14
|
+
|
|
15
|
+
# Write the extraction prompt for a final assistant message to stdout.
|
|
16
|
+
# $1 — final assistant message text
|
|
17
|
+
# $2 — max_claims
|
|
18
|
+
assayer_build_extraction_prompt() {
|
|
19
|
+
local message="${1:-}"
|
|
20
|
+
local max_claims="${2:-12}"
|
|
21
|
+
|
|
22
|
+
printf '%s\n' 'You are auditing an AI coding agent'"'"'s final message to its user.'
|
|
23
|
+
printf '%s\n' 'Extract every TESTABLE SUCCESS CLAIM: a statement asserting that a command,'
|
|
24
|
+
printf '%s\n' 'test, build, lint, or type check was run and SUCCEEDED. Ignore plans, intentions,'
|
|
25
|
+
printf '%s\n' 'hedged statements ("should pass"), and claims about code that no shell command'
|
|
26
|
+
printf '%s\n' 'could confirm.'
|
|
27
|
+
printf '\n'
|
|
28
|
+
printf '%s\n' 'Return JSON only — no prose, no markdown fences. A JSON array, possibly empty:'
|
|
29
|
+
printf '%s\n' '['
|
|
30
|
+
printf '%s\n' ' {'
|
|
31
|
+
printf '%s\n' ' "text": "the exact claim, quoted from the message",'
|
|
32
|
+
printf '%s\n' ' "type": "tests_pass|build_succeeds|lint_clean|types_check|command_succeeds|generic",'
|
|
33
|
+
printf '%s\n' ' "command_keyword": "a lowercase substring you expect in the verifying shell command, e.g. test, build, lint, tsc",'
|
|
34
|
+
printf '%s\n' ' "confidence": 0.0..1.0'
|
|
35
|
+
printf '%s\n' ' }'
|
|
36
|
+
printf '%s\n' ']'
|
|
37
|
+
printf '\n'
|
|
38
|
+
printf '%s\n' "Extract at most ${max_claims} claims, highest-confidence first."
|
|
39
|
+
printf '\n'
|
|
40
|
+
printf '%s\n' '---AGENT FINAL MESSAGE---'
|
|
41
|
+
printf '%s\n' "$message"
|
|
42
|
+
printf '%s\n' '---END MESSAGE---'
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Parse a claude -p response into a clean JSON array of claims.
|
|
46
|
+
# Strips markdown fences, validates it is a JSON array, and drops malformed
|
|
47
|
+
# entries. Echoes a compact JSON array (or "[]").
|
|
48
|
+
# $1 — raw response text
|
|
49
|
+
assayer_parse_claims() {
|
|
50
|
+
local raw="${1:-}"
|
|
51
|
+
[[ -z "$raw" ]] && {
|
|
52
|
+
printf '[]'
|
|
53
|
+
return 0
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Strip leading/trailing markdown fences if present.
|
|
57
|
+
local clean
|
|
58
|
+
clean=$(printf '%s' "$raw" | sed -e 's/^```json//' -e 's/^```//' -e 's/```$//')
|
|
59
|
+
|
|
60
|
+
# Validate as a JSON array; keep only well-formed claim objects with a
|
|
61
|
+
# non-empty text and a recognized type.
|
|
62
|
+
local parsed
|
|
63
|
+
parsed=$(printf '%s' "$clean" | jq -c '
|
|
64
|
+
if type == "array" then
|
|
65
|
+
[ .[]
|
|
66
|
+
| select(type == "object")
|
|
67
|
+
| select((.text // "") != "")
|
|
68
|
+
| {
|
|
69
|
+
text: .text,
|
|
70
|
+
type: (
|
|
71
|
+
if (.type // "") | test("^(tests_pass|build_succeeds|lint_clean|types_check|command_succeeds|generic)$")
|
|
72
|
+
then .type else "generic" end
|
|
73
|
+
),
|
|
74
|
+
command_keyword: ((.command_keyword // "") | ascii_downcase),
|
|
75
|
+
confidence: (
|
|
76
|
+
if (.confidence | type) == "number" then .confidence else 0.6 end
|
|
77
|
+
)
|
|
78
|
+
}
|
|
79
|
+
]
|
|
80
|
+
else
|
|
81
|
+
[]
|
|
82
|
+
end
|
|
83
|
+
' 2>/dev/null) || parsed="[]"
|
|
84
|
+
|
|
85
|
+
[[ -z "$parsed" || "$parsed" == "null" ]] && parsed="[]"
|
|
86
|
+
printf '%s' "$parsed"
|
|
87
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Project key derivation for Assayer.
|
|
3
|
+
# Mirrors echo/archivist/tribunal: stable 12-char hex key derived from the git
|
|
4
|
+
# remote or repo root, surviving renames, clones, and worktrees.
|
|
5
|
+
|
|
6
|
+
_assayer_sha256_first12() {
|
|
7
|
+
local input="$1"
|
|
8
|
+
if command -v shasum >/dev/null 2>&1; then
|
|
9
|
+
printf '%s' "$input" | shasum -a 256 2>/dev/null | cut -c1-12
|
|
10
|
+
elif command -v sha256sum >/dev/null 2>&1; then
|
|
11
|
+
printf '%s' "$input" | sha256sum 2>/dev/null | cut -c1-12
|
|
12
|
+
else
|
|
13
|
+
return 1
|
|
14
|
+
fi
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
assayer_project_remote_url() {
|
|
18
|
+
local cwd="${1:-}"
|
|
19
|
+
[[ -z "$cwd" || ! -d "$cwd" ]] && return 0
|
|
20
|
+
git -C "$cwd" remote get-url origin 2>/dev/null || true
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
assayer_project_repo_root() {
|
|
24
|
+
local cwd="${1:-}"
|
|
25
|
+
[[ -z "$cwd" || ! -d "$cwd" ]] && return 0
|
|
26
|
+
|
|
27
|
+
if ! git -C "$cwd" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
|
|
28
|
+
return 0
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
local common_dir toplevel
|
|
32
|
+
common_dir=$(git -C "$cwd" rev-parse --git-common-dir 2>/dev/null) || return 0
|
|
33
|
+
|
|
34
|
+
if [[ -n "$common_dir" && "$common_dir" != /* ]]; then
|
|
35
|
+
common_dir="$(cd "$cwd" && cd "$common_dir" 2>/dev/null && pwd -P)" || common_dir=""
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
if [[ -n "$common_dir" && -d "$common_dir" ]]; then
|
|
39
|
+
toplevel="$(cd "$common_dir/.." 2>/dev/null && pwd -P)" || toplevel=""
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
if [[ -z "$toplevel" ]]; then
|
|
43
|
+
toplevel=$(git -C "$cwd" rev-parse --show-toplevel 2>/dev/null || true)
|
|
44
|
+
[[ -n "$toplevel" ]] && toplevel="$(cd "$toplevel" 2>/dev/null && pwd -P)"
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
printf '%s' "$toplevel"
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
assayer_project_key() {
|
|
51
|
+
local cwd="${1:-}"
|
|
52
|
+
[[ -z "$cwd" ]] && cwd="$(pwd)"
|
|
53
|
+
|
|
54
|
+
local remote
|
|
55
|
+
remote=$(assayer_project_remote_url "$cwd")
|
|
56
|
+
if [[ -n "$remote" ]]; then
|
|
57
|
+
_assayer_sha256_first12 "remote:$remote"
|
|
58
|
+
return 0
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
local root
|
|
62
|
+
root=$(assayer_project_repo_root "$cwd")
|
|
63
|
+
if [[ -n "$root" ]]; then
|
|
64
|
+
_assayer_sha256_first12 "root:$root"
|
|
65
|
+
return 0
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
return 0
|
|
69
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Transcript reader for Assayer.
|
|
3
|
+
#
|
|
4
|
+
# The Stop hook payload carries `transcript_path` — a JSONL file already
|
|
5
|
+
# committed to disk before Stop fires (same field tribunal and compass read).
|
|
6
|
+
# Assayer needs two things from it:
|
|
7
|
+
#
|
|
8
|
+
# 1. The final assistant message — the text the agent left the user with,
|
|
9
|
+
# where claims like "I ran the tests, they pass" live.
|
|
10
|
+
# 2. The session's Bash commands paired with their result status — the
|
|
11
|
+
# factual record to check those claims against.
|
|
12
|
+
#
|
|
13
|
+
# Claude Code transcripts represent a Bash invocation as a `tool_use` block
|
|
14
|
+
# (name "Bash", with `.input.command`) on an assistant line, and its outcome
|
|
15
|
+
# as a `tool_result` block on a following user line carrying the same
|
|
16
|
+
# `tool_use_id` and an `is_error` flag. There is no per-call numeric exit code
|
|
17
|
+
# in the transcript, so `is_error` is the success/failure signal.
|
|
18
|
+
|
|
19
|
+
# Echo the final assistant message text (text blocks of the last assistant
|
|
20
|
+
# turn that contains any), truncated to max_chars. Empty if unavailable.
|
|
21
|
+
# $1 — transcript_path
|
|
22
|
+
# $2 — max_chars (default 6000)
|
|
23
|
+
assayer_final_assistant_message() {
|
|
24
|
+
local transcript_path="${1:-}"
|
|
25
|
+
local max_chars="${2:-6000}"
|
|
26
|
+
|
|
27
|
+
[[ -f "$transcript_path" ]] || return 0
|
|
28
|
+
|
|
29
|
+
local text
|
|
30
|
+
text=$(jq -s -r '
|
|
31
|
+
[ .[]
|
|
32
|
+
| select(.type == "assistant")
|
|
33
|
+
| select(any(.message.content[]?; .type == "text"))
|
|
34
|
+
]
|
|
35
|
+
| last
|
|
36
|
+
| if . == null then ""
|
|
37
|
+
else [ .message.content[]? | select(.type == "text") | .text ] | join("\n")
|
|
38
|
+
end
|
|
39
|
+
' "$transcript_path" 2>/dev/null) || text=""
|
|
40
|
+
|
|
41
|
+
[[ -z "$text" ]] && return 0
|
|
42
|
+
printf '%s' "${text:0:$max_chars}"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
# Echo a JSON array of the session's Bash commands paired with result status:
|
|
46
|
+
# [ { "command": "...", "is_error": true|false, "excerpt": "..." }, ... ]
|
|
47
|
+
# Ordered as they appear in the transcript. `is_error` is false when the
|
|
48
|
+
# matching tool_result is absent or its is_error flag is not true.
|
|
49
|
+
# $1 — transcript_path
|
|
50
|
+
assayer_collect_commands() {
|
|
51
|
+
local transcript_path="${1:-}"
|
|
52
|
+
|
|
53
|
+
[[ -f "$transcript_path" ]] || {
|
|
54
|
+
printf '[]'
|
|
55
|
+
return 0
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
local out
|
|
59
|
+
out=$(jq -s -c '
|
|
60
|
+
(
|
|
61
|
+
[ .[]
|
|
62
|
+
| select(.type == "assistant")
|
|
63
|
+
| .message.content[]?
|
|
64
|
+
| select(.type == "tool_use" and .name == "Bash")
|
|
65
|
+
| { id: .id, command: (.input.command // "") }
|
|
66
|
+
]
|
|
67
|
+
) as $calls
|
|
68
|
+
|
|
|
69
|
+
(
|
|
70
|
+
[ .[]
|
|
71
|
+
| select(.type == "user")
|
|
72
|
+
| .message.content[]?
|
|
73
|
+
| select(.type == "tool_result")
|
|
74
|
+
| {
|
|
75
|
+
id: .tool_use_id,
|
|
76
|
+
is_error: (.is_error == true),
|
|
77
|
+
excerpt: (
|
|
78
|
+
if (.content | type) == "string" then .content
|
|
79
|
+
elif (.content | type) == "array" then
|
|
80
|
+
([ .content[]? | select(.type == "text") | .text ] | join("\n"))
|
|
81
|
+
else "" end
|
|
82
|
+
)
|
|
83
|
+
}
|
|
84
|
+
]
|
|
85
|
+
) as $results
|
|
86
|
+
|
|
|
87
|
+
[ $calls[]
|
|
88
|
+
| . as $c
|
|
89
|
+
| {
|
|
90
|
+
command: $c.command,
|
|
91
|
+
is_error: (first($results[] | select(.id == $c.id) | .is_error) // false),
|
|
92
|
+
excerpt: ((first($results[] | select(.id == $c.id) | .excerpt) // "")[0:240])
|
|
93
|
+
}
|
|
94
|
+
]
|
|
95
|
+
' "$transcript_path" 2>/dev/null) || out=""
|
|
96
|
+
|
|
97
|
+
[[ -z "$out" || "$out" == "null" ]] && out="[]"
|
|
98
|
+
printf '%s' "$out"
|
|
99
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Minimal ULID generator for Assayer audit_id values.
|
|
3
|
+
# Crockford Base32, lexicographically sortable, time-ordered.
|
|
4
|
+
|
|
5
|
+
_ASSAYER_ULID_ALPHABET="0123456789ABCDEFGHJKMNPQRSTVWXYZ"
|
|
6
|
+
|
|
7
|
+
_assayer_ulid_encode() {
|
|
8
|
+
local n="$1"
|
|
9
|
+
local len="$2"
|
|
10
|
+
local out=""
|
|
11
|
+
local i
|
|
12
|
+
for ((i = 0; i < len; i++)); do
|
|
13
|
+
out="${_ASSAYER_ULID_ALPHABET:$((n % 32)):1}${out}"
|
|
14
|
+
n=$((n / 32))
|
|
15
|
+
done
|
|
16
|
+
printf '%s' "$out"
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
assayer_ulid() {
|
|
20
|
+
local now_ms
|
|
21
|
+
if [[ "$(uname)" == "Darwin" ]]; then
|
|
22
|
+
now_ms=$(python3 -c 'import time; print(int(time.time() * 1000))' 2>/dev/null) \
|
|
23
|
+
|| now_ms=$(($(date +%s) * 1000))
|
|
24
|
+
else
|
|
25
|
+
now_ms=$(date +%s%3N 2>/dev/null) || now_ms=$(($(date +%s) * 1000))
|
|
26
|
+
fi
|
|
27
|
+
|
|
28
|
+
local rand_hex rand_hi rand_lo
|
|
29
|
+
rand_hex=$(openssl rand -hex 10 2>/dev/null)
|
|
30
|
+
if [[ -n "$rand_hex" && ${#rand_hex} -eq 20 ]]; then
|
|
31
|
+
rand_hi=$((16#${rand_hex:0:10}))
|
|
32
|
+
rand_lo=$((16#${rand_hex:10:10}))
|
|
33
|
+
else
|
|
34
|
+
rand_hi=$((RANDOM * 32768 + RANDOM))
|
|
35
|
+
rand_lo=$((RANDOM * 32768 + RANDOM))
|
|
36
|
+
rand_hi=$(((rand_hi * 256 + RANDOM % 256) & ((1 << 40) - 1)))
|
|
37
|
+
rand_lo=$(((rand_lo * 256 + RANDOM % 256) & ((1 << 40) - 1)))
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
local ts_part hi_part lo_part
|
|
41
|
+
ts_part=$(_assayer_ulid_encode "$now_ms" 10)
|
|
42
|
+
hi_part=$(_assayer_ulid_encode "$rand_hi" 8)
|
|
43
|
+
lo_part=$(_assayer_ulid_encode "$rand_lo" 8)
|
|
44
|
+
|
|
45
|
+
printf '%s%s%s' "$ts_part" "$hi_part" "$lo_part"
|
|
46
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Claim verification for Assayer.
|
|
3
|
+
#
|
|
4
|
+
# The deterministic half: given a claim (with a type and command_keyword) and
|
|
5
|
+
# the session's Bash commands paired with their is_error status, locate the
|
|
6
|
+
# command that would settle the claim and classify it. No LLM, no randomness —
|
|
7
|
+
# the same inputs always produce the same verdict.
|
|
8
|
+
#
|
|
9
|
+
# Matching: a claim type implies keywords (tests_pass -> "test", build_succeeds
|
|
10
|
+
# -> "build", ...); the LLM-supplied command_keyword is added. The MOST RECENT
|
|
11
|
+
# command containing any keyword wins, because an agent may fix and re-run, and
|
|
12
|
+
# the last run reflects the final state the claim describes.
|
|
13
|
+
#
|
|
14
|
+
# Verdicts:
|
|
15
|
+
# corroborated — matching command succeeded (is_error false)
|
|
16
|
+
# contradicted — matching command failed (is_error true)
|
|
17
|
+
# unverified — no matching command (reason no_matching_command), or the
|
|
18
|
+
# claim implies no checkable command (reason ambiguous)
|
|
19
|
+
|
|
20
|
+
# Classify a single claim against the collected commands.
|
|
21
|
+
# Echoes a JSON object: { verdict, evidence_command?, is_error?, excerpt?, reason? }
|
|
22
|
+
# $1 — claim JSON object
|
|
23
|
+
# $2 — commands JSON array (from assayer_collect_commands)
|
|
24
|
+
assayer_classify_claim() {
|
|
25
|
+
local claim="${1:-}"
|
|
26
|
+
local commands="${2:-[]}"
|
|
27
|
+
|
|
28
|
+
[[ -z "$claim" ]] && {
|
|
29
|
+
printf '{"verdict":"unverified","reason":"ambiguous"}'
|
|
30
|
+
return 0
|
|
31
|
+
}
|
|
32
|
+
[[ -z "$commands" || "$commands" == "null" ]] && commands="[]"
|
|
33
|
+
|
|
34
|
+
local result
|
|
35
|
+
result=$(jq -n \
|
|
36
|
+
--argjson claim "$claim" \
|
|
37
|
+
--argjson commands "$commands" '
|
|
38
|
+
def keywords:
|
|
39
|
+
($claim.type // "generic") as $t
|
|
40
|
+
| ( if $t == "tests_pass" then ["test"]
|
|
41
|
+
elif $t == "build_succeeds" then ["build"]
|
|
42
|
+
elif $t == "lint_clean" then ["lint"]
|
|
43
|
+
elif $t == "types_check" then ["tsc", "typecheck", "type-check", "types"]
|
|
44
|
+
else [] end ) as $base
|
|
45
|
+
| ($base + (if (($claim.command_keyword // "") | length) > 0 then [$claim.command_keyword] else [] end))
|
|
46
|
+
| map(ascii_downcase) | map(select(. != "")) | unique;
|
|
47
|
+
|
|
48
|
+
keywords as $kw
|
|
49
|
+
| if ($kw | length) == 0 then
|
|
50
|
+
{ verdict: "unverified", reason: "ambiguous" }
|
|
51
|
+
else
|
|
52
|
+
[ $commands[]
|
|
53
|
+
| . as $c
|
|
54
|
+
| select(($c.command | ascii_downcase) as $cmd | any($kw[]; . as $k | $cmd | contains($k)))
|
|
55
|
+
] as $matches
|
|
56
|
+
| if ($matches | length) == 0 then
|
|
57
|
+
{ verdict: "unverified", reason: "no_matching_command" }
|
|
58
|
+
else
|
|
59
|
+
($matches | last) as $m
|
|
60
|
+
| {
|
|
61
|
+
verdict: (if $m.is_error then "contradicted" else "corroborated" end),
|
|
62
|
+
evidence_command: $m.command,
|
|
63
|
+
is_error: $m.is_error,
|
|
64
|
+
excerpt: ($m.excerpt // "")
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
' 2>/dev/null) || result=""
|
|
69
|
+
|
|
70
|
+
[[ -z "$result" || "$result" == "null" ]] && result='{"verdict":"unverified","reason":"ambiguous"}'
|
|
71
|
+
printf '%s' "$result"
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# Derive the overall audit verdict from the three counts.
|
|
75
|
+
# $1 — contradicted count
|
|
76
|
+
# $2 — corroborated count
|
|
77
|
+
# $3 — unverified count
|
|
78
|
+
assayer_audit_verdict() {
|
|
79
|
+
local contradicted="${1:-0}"
|
|
80
|
+
local corroborated="${2:-0}"
|
|
81
|
+
local unverified="${3:-0}"
|
|
82
|
+
|
|
83
|
+
if [[ "$contradicted" -gt 0 ]]; then
|
|
84
|
+
printf 'contradictions_found'
|
|
85
|
+
elif [[ "$corroborated" -gt 0 ]]; then
|
|
86
|
+
printf 'clean'
|
|
87
|
+
else
|
|
88
|
+
# No contradictions and nothing corroborated — only unverified (or none).
|
|
89
|
+
if [[ "$unverified" -gt 0 ]]; then
|
|
90
|
+
printf 'clean'
|
|
91
|
+
else
|
|
92
|
+
printf 'nothing_to_verify'
|
|
93
|
+
fi
|
|
94
|
+
fi
|
|
95
|
+
}
|
|
@@ -206,6 +206,22 @@
|
|
|
206
206
|
"jsonpath": "$.version"
|
|
207
207
|
}
|
|
208
208
|
]
|
|
209
|
+
},
|
|
210
|
+
"plugins/assayer": {
|
|
211
|
+
"changelog-path": "CHANGELOG.md",
|
|
212
|
+
"release-type": "simple",
|
|
213
|
+
"bump-minor-pre-major": true,
|
|
214
|
+
"bump-patch-for-minor-pre-major": false,
|
|
215
|
+
"component": "assayer",
|
|
216
|
+
"draft": false,
|
|
217
|
+
"prerelease": false,
|
|
218
|
+
"extra-files": [
|
|
219
|
+
{
|
|
220
|
+
"type": "json",
|
|
221
|
+
"path": ".claude-plugin/plugin.json",
|
|
222
|
+
"jsonpath": "$.version"
|
|
223
|
+
}
|
|
224
|
+
]
|
|
209
225
|
}
|
|
210
226
|
},
|
|
211
227
|
"$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
# Exercises Assayer config loading: defaults and per-project overrides.
|
|
4
|
+
|
|
5
|
+
setup() {
|
|
6
|
+
source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
|
|
7
|
+
setup_test_env
|
|
8
|
+
PLUGIN_ROOT="${REPO_ROOT}/plugins/assayer"
|
|
9
|
+
export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
|
|
10
|
+
# shellcheck disable=SC1091
|
|
11
|
+
source "${PLUGIN_ROOT}/scripts/lib/assayer-config.sh"
|
|
12
|
+
|
|
13
|
+
REPO="${BATS_TEST_TMPDIR}/repo"
|
|
14
|
+
mkdir -p "${REPO}/.claude"
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
@test "disabled by default (no settings)" {
|
|
18
|
+
assayer_config_load "$REPO"
|
|
19
|
+
run assayer_config_enabled
|
|
20
|
+
[ "$status" -ne 0 ]
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
@test "enabled when settings opt in" {
|
|
24
|
+
printf '%s\n' '{"assayer":{"enabled":true}}' >"${REPO}/.claude/settings.json"
|
|
25
|
+
assayer_config_load "$REPO"
|
|
26
|
+
run assayer_config_enabled
|
|
27
|
+
[ "$status" -eq 0 ]
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
@test "default model is haiku" {
|
|
31
|
+
assayer_config_load "$REPO"
|
|
32
|
+
[ "$(assayer_config_model)" = "claude-haiku-4-5-20251001" ]
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
@test "model override is honored" {
|
|
36
|
+
printf '%s\n' '{"assayer":{"evaluation":{"model":"claude-opus-4-8"}}}' >"${REPO}/.claude/settings.json"
|
|
37
|
+
assayer_config_load "$REPO"
|
|
38
|
+
[ "$(assayer_config_model)" = "claude-opus-4-8" ]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
@test "default max_claims is 12" {
|
|
42
|
+
assayer_config_load "$REPO"
|
|
43
|
+
[ "$(assayer_config_max_claims)" = "12" ]
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
@test "default min_confidence is 0.5" {
|
|
47
|
+
assayer_config_load "$REPO"
|
|
48
|
+
[ "$(assayer_config_min_confidence)" = "0.5" ]
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@test "min_confidence override is honored" {
|
|
52
|
+
printf '%s\n' '{"assayer":{"min_confidence":0.8}}' >"${REPO}/.claude/settings.json"
|
|
53
|
+
assayer_config_load "$REPO"
|
|
54
|
+
[ "$(assayer_config_min_confidence)" = "0.8" ]
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
@test "default timeout is 60" {
|
|
58
|
+
assayer_config_load "$REPO"
|
|
59
|
+
[ "$(assayer_config_timeout)" = "60" ]
|
|
60
|
+
}
|