@onlooker-community/ecosystem 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/.claude-plugin/marketplace.json +13 -0
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.release-please-manifest.json +3 -2
  4. package/CHANGELOG.md +7 -0
  5. package/CLAUDE.md +1 -0
  6. package/package.json +3 -3
  7. package/plugins/assayer/.claude-plugin/plugin.json +14 -0
  8. package/plugins/assayer/CHANGELOG.md +10 -0
  9. package/plugins/assayer/README.md +114 -0
  10. package/plugins/assayer/config.json +14 -0
  11. package/plugins/assayer/docs/adr/001-verify-claims-against-transcript-evidence.md +57 -0
  12. package/plugins/assayer/docs/design.md +72 -0
  13. package/plugins/assayer/hooks/hooks.json +15 -0
  14. package/plugins/assayer/scripts/hooks/assayer-stop.sh +249 -0
  15. package/plugins/assayer/scripts/lib/assayer-config.sh +88 -0
  16. package/plugins/assayer/scripts/lib/assayer-events.sh +85 -0
  17. package/plugins/assayer/scripts/lib/assayer-extract.sh +87 -0
  18. package/plugins/assayer/scripts/lib/assayer-project-key.sh +69 -0
  19. package/plugins/assayer/scripts/lib/assayer-transcript.sh +99 -0
  20. package/plugins/assayer/scripts/lib/assayer-ulid.sh +46 -0
  21. package/plugins/assayer/scripts/lib/assayer-verify.sh +95 -0
  22. package/release-please-config.json +16 -0
  23. package/test/bats/assayer-config.bats +60 -0
  24. package/test/bats/assayer-events.bats +99 -0
  25. package/test/bats/assayer-extract.bats +76 -0
  26. package/test/bats/assayer-project-key.bats +58 -0
  27. package/test/bats/assayer-stop-hook.bats +81 -0
  28. package/test/bats/assayer-transcript.bats +72 -0
  29. package/test/bats/assayer-ulid.bats +31 -0
  30. package/test/bats/assayer-verify.bats +89 -0
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # Validates every emitted assayer.* event against @onlooker-community/schema.
4
+ #
5
+ # The assayer.* event types ship in @onlooker-community/schema; until the
6
+ # installed version includes them, these tests skip rather than fail. Once the
7
+ # ecosystem's schema dependency is bumped to a release that carries them, they
8
+ # run for real. See plugins/assayer/README.md (Requirements).
9
+
10
+ setup() {
11
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
12
+ setup_test_env
13
+
14
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/assayer"
15
+ export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
16
+ export ONLOOKER_EVENTS_LOG="${ONLOOKER_DIR}/logs/onlooker-events.jsonl"
17
+ mkdir -p "$(dirname "$ONLOOKER_EVENTS_LOG")"
18
+
19
+ export _ONLOOKER_EVENT_JS="${REPO_ROOT}/scripts/lib/onlooker-event.mjs"
20
+ export CLAUDE_SESSION_ID="bats-session-$$"
21
+
22
+ # shellcheck disable=SC1091
23
+ source "${PLUGIN_ROOT}/scripts/lib/assayer-events.sh"
24
+ }
25
+
26
+ # Skip when the installed schema predates the assayer.* event types.
27
+ _require_assayer_schema() {
28
+ if ! grep -q "assayer.audit.started" \
29
+ "${REPO_ROOT}/node_modules/@onlooker-community/schema/schemas/event.v1.json" 2>/dev/null; then
30
+ skip "installed @onlooker-community/schema has no assayer.* types yet"
31
+ fi
32
+ }
33
+
34
+ _validate_latest_event() {
35
+ local last
36
+ last=$(tail -n 1 "$ONLOOKER_EVENTS_LOG")
37
+ [ -n "$last" ] || return 1
38
+ printf '%s' "$last" | ONLOOKER_DIR="$ONLOOKER_DIR" \
39
+ node "${REPO_ROOT}/scripts/lib/onlooker-event.mjs" validate >/dev/null
40
+ }
41
+
42
+ # Valid 26-char Crockford Base32 ULID (no I, L, O, or U).
43
+ AUDIT_ID="01J0000000000000000000AB34"
44
+
45
+ @test "assayer.audit.started validates" {
46
+ _require_assayer_schema
47
+ local p
48
+ p=$(jq -n --arg a "$AUDIT_ID" '{audit_id: $a, claim_count: 3, trigger: "stop", command_count: 5}')
49
+ assayer_emit_event "assayer.audit.started" "$p"
50
+ run _validate_latest_event
51
+ [ "$status" -eq 0 ]
52
+ }
53
+
54
+ @test "assayer.claim.contradicted validates" {
55
+ _require_assayer_schema
56
+ local p
57
+ p=$(jq -n --arg a "$AUDIT_ID" '{
58
+ audit_id: $a,
59
+ claim: "I ran the tests and they all pass.",
60
+ claim_type: "tests_pass",
61
+ evidence_command: "npm test",
62
+ result_excerpt: "1 failed, 32 passed",
63
+ confidence: 0.9
64
+ }')
65
+ assayer_emit_event "assayer.claim.contradicted" "$p"
66
+ run _validate_latest_event
67
+ [ "$status" -eq 0 ]
68
+ }
69
+
70
+ @test "assayer.claim.unverified validates" {
71
+ _require_assayer_schema
72
+ local p
73
+ p=$(jq -n --arg a "$AUDIT_ID" '{audit_id: $a, claim: "The deploy is healthy.", claim_type: "generic", reason: "no_matching_command"}')
74
+ assayer_emit_event "assayer.claim.unverified" "$p"
75
+ run _validate_latest_event
76
+ [ "$status" -eq 0 ]
77
+ }
78
+
79
+ @test "assayer.audit.complete validates" {
80
+ _require_assayer_schema
81
+ local p
82
+ p=$(jq -n --arg a "$AUDIT_ID" '{
83
+ audit_id: $a, claim_count: 3, corroborated: 1, contradicted: 1,
84
+ unverified: 1, verdict: "contradictions_found", duration_ms: 4200
85
+ }')
86
+ assayer_emit_event "assayer.audit.complete" "$p"
87
+ run _validate_latest_event
88
+ [ "$status" -eq 0 ]
89
+ }
90
+
91
+ @test "emission fails on unknown event type" {
92
+ run assayer_emit_event "assayer.no.such.event" '{"audit_id":"x"}'
93
+ [ "$status" -ne 0 ]
94
+ }
95
+
96
+ @test "assayer_emit_event returns 1 when payload is empty" {
97
+ run assayer_emit_event "assayer.audit.started" ""
98
+ [ "$status" -ne 0 ]
99
+ }
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # Exercises claim parsing: assayer_parse_claims and the extraction prompt.
4
+
5
+ setup() {
6
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
7
+ setup_test_env
8
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/assayer"
9
+ # shellcheck disable=SC1091
10
+ source "${PLUGIN_ROOT}/scripts/lib/assayer-extract.sh"
11
+ }
12
+
13
+ @test "parses a clean JSON array of claims" {
14
+ run assayer_parse_claims '[{"text":"tests pass","type":"tests_pass","command_keyword":"test","confidence":0.9}]'
15
+ [ "$status" -eq 0 ]
16
+ [ "$(printf '%s' "$output" | jq 'length')" -eq 1 ]
17
+ [ "$(printf '%s' "$output" | jq -r '.[0].type')" = "tests_pass" ]
18
+ }
19
+
20
+ @test "strips markdown fences" {
21
+ local raw
22
+ raw=$'```json\n[{"text":"build ok","type":"build_succeeds","command_keyword":"build","confidence":0.8}]\n```'
23
+ run assayer_parse_claims "$raw"
24
+ [ "$status" -eq 0 ]
25
+ [ "$(printf '%s' "$output" | jq 'length')" -eq 1 ]
26
+ }
27
+
28
+ @test "drops malformed entries and entries without text" {
29
+ run assayer_parse_claims '[{"text":"ok","type":"generic","confidence":0.7},{"no_text":true},{"text":""}]'
30
+ [ "$status" -eq 0 ]
31
+ [ "$(printf '%s' "$output" | jq 'length')" -eq 1 ]
32
+ }
33
+
34
+ @test "coerces unknown type to generic" {
35
+ run assayer_parse_claims '[{"text":"thing","type":"made_up","command_keyword":"x","confidence":0.7}]'
36
+ [ "$status" -eq 0 ]
37
+ [ "$(printf '%s' "$output" | jq -r '.[0].type')" = "generic" ]
38
+ }
39
+
40
+ @test "defaults confidence when missing or non-numeric" {
41
+ run assayer_parse_claims '[{"text":"thing","type":"generic","command_keyword":"x"}]'
42
+ [ "$status" -eq 0 ]
43
+ [ "$(printf '%s' "$output" | jq -r '.[0].confidence')" = "0.6" ]
44
+ }
45
+
46
+ @test "lowercases command_keyword" {
47
+ run assayer_parse_claims '[{"text":"thing","type":"generic","command_keyword":"TEST","confidence":0.7}]'
48
+ [ "$status" -eq 0 ]
49
+ [ "$(printf '%s' "$output" | jq -r '.[0].command_keyword')" = "test" ]
50
+ }
51
+
52
+ @test "non-array input yields empty array" {
53
+ run assayer_parse_claims '{"text":"not an array"}'
54
+ [ "$status" -eq 0 ]
55
+ [ "$output" = "[]" ]
56
+ }
57
+
58
+ @test "garbage input yields empty array" {
59
+ run assayer_parse_claims 'I could not find any claims.'
60
+ [ "$status" -eq 0 ]
61
+ [ "$output" = "[]" ]
62
+ }
63
+
64
+ @test "empty input yields empty array" {
65
+ run assayer_parse_claims ""
66
+ [ "$status" -eq 0 ]
67
+ [ "$output" = "[]" ]
68
+ }
69
+
70
+ @test "extraction prompt includes the message and the JSON contract" {
71
+ run assayer_build_extraction_prompt "I ran the tests and they pass." 5
72
+ [ "$status" -eq 0 ]
73
+ [[ "$output" == *"I ran the tests and they pass."* ]]
74
+ [[ "$output" == *"TESTABLE SUCCESS CLAIM"* ]]
75
+ [[ "$output" == *"at most 5 claims"* ]]
76
+ }
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # Exercises Assayer project-key derivation.
4
+
5
+ setup() {
6
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
7
+ setup_test_env
8
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/assayer"
9
+ # shellcheck disable=SC1091
10
+ source "${PLUGIN_ROOT}/scripts/lib/assayer-project-key.sh"
11
+
12
+ REPO="${BATS_TEST_TMPDIR}/repo"
13
+ mkdir -p "$REPO"
14
+ git -C "$REPO" init -q
15
+ git -C "$REPO" config user.email test@example.com
16
+ git -C "$REPO" config user.name test
17
+ (cd "$REPO" && printf 'x\n' >f && git add f && git commit -q -m init)
18
+ }
19
+
20
+ @test "key is 12 hex chars for a repo with a remote" {
21
+ git -C "$REPO" remote add origin https://example.com/foo/bar.git
22
+ run assayer_project_key "$REPO"
23
+ [ "$status" -eq 0 ]
24
+ [[ "$output" =~ ^[0-9a-f]{12}$ ]]
25
+ }
26
+
27
+ @test "key is stable across calls" {
28
+ git -C "$REPO" remote add origin https://example.com/foo/bar.git
29
+ a=$(assayer_project_key "$REPO")
30
+ b=$(assayer_project_key "$REPO")
31
+ [ "$a" = "$b" ]
32
+ }
33
+
34
+ @test "remote-keyed differs from root-keyed" {
35
+ local with_remote without_remote
36
+ without_remote=$(assayer_project_key "$REPO")
37
+ git -C "$REPO" remote add origin https://example.com/foo/bar.git
38
+ with_remote=$(assayer_project_key "$REPO")
39
+ [ "$with_remote" != "$without_remote" ]
40
+ }
41
+
42
+ @test "different remotes yield different keys" {
43
+ git -C "$REPO" remote add origin https://example.com/foo/one.git
44
+ local one
45
+ one=$(assayer_project_key "$REPO")
46
+ git -C "$REPO" remote set-url origin https://example.com/foo/two.git
47
+ local two
48
+ two=$(assayer_project_key "$REPO")
49
+ [ "$one" != "$two" ]
50
+ }
51
+
52
+ @test "non-repo cwd yields empty key" {
53
+ local non_repo="${BATS_TEST_TMPDIR}/not-a-repo"
54
+ mkdir -p "$non_repo"
55
+ run assayer_project_key "$non_repo"
56
+ [ "$status" -eq 0 ]
57
+ [ -z "$output" ]
58
+ }
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # Exercises the Assayer Stop hook's gating behavior. Does not invoke claude -p
4
+ # (the hook bails before the extraction step when preconditions fail).
5
+ # Verifies: disabled-by-default, no-git, recursion guard, no-transcript, and
6
+ # stdout silence (advisory hook must never block Stop).
7
+
8
+ setup() {
9
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
10
+ setup_test_env
11
+
12
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/assayer"
13
+ export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
14
+ HOOK="${PLUGIN_ROOT}/scripts/hooks/assayer-stop.sh"
15
+
16
+ REPO="${BATS_TEST_TMPDIR}/repo"
17
+ mkdir -p "$REPO"
18
+ git -C "$REPO" init -q
19
+ git -C "$REPO" config user.email test@example.com
20
+ git -C "$REPO" config user.name test
21
+ (cd "$REPO" && printf 'initial\n' >README.md && git add README.md && git commit -q -m init)
22
+
23
+ TRANSCRIPT="${BATS_TEST_TMPDIR}/transcript.jsonl"
24
+ printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"text","text":"All tests pass."}]}}' >"$TRANSCRIPT"
25
+ }
26
+
27
+ _make_input() {
28
+ local cwd="${1:-$REPO}" sid="${2:-test-session}" transcript="${3:-$TRANSCRIPT}"
29
+ jq -n --arg cwd "$cwd" --arg sid "$sid" --arg tp "$transcript" \
30
+ '{cwd: $cwd, session_id: $sid, transcript_path: $tp}'
31
+ }
32
+
33
+ @test "exits 0 silently when assayer.enabled is false (default)" {
34
+ local input
35
+ input=$(_make_input)
36
+ run bash -c "printf '%s' '$input' | ONLOOKER_DIR='$ONLOOKER_DIR' '$HOOK'"
37
+ [ "$status" -eq 0 ]
38
+ [ -z "$output" ]
39
+ }
40
+
41
+ @test "exits 0 when cwd is not a git repo" {
42
+ local non_repo="${BATS_TEST_TMPDIR}/not-a-repo"
43
+ mkdir -p "$non_repo"
44
+ local input
45
+ input=$(_make_input "$non_repo")
46
+ run bash -c "printf '%s' '$input' | ONLOOKER_DIR='$ONLOOKER_DIR' '$HOOK'"
47
+ [ "$status" -eq 0 ]
48
+ }
49
+
50
+ @test "recursion guard: ASSAYER_NESTED=1 causes immediate exit 0" {
51
+ mkdir -p "${REPO}/.claude"
52
+ printf '%s\n' '{"assayer":{"enabled":true}}' >"${REPO}/.claude/settings.json"
53
+ local input
54
+ input=$(_make_input)
55
+ run bash -c "printf '%s' '$input' | ASSAYER_NESTED=1 ONLOOKER_DIR='$ONLOOKER_DIR' '$HOOK'"
56
+ [ "$status" -eq 0 ]
57
+ [ -z "$output" ]
58
+ }
59
+
60
+ @test "exits 0 when enabled but transcript is missing" {
61
+ mkdir -p "${REPO}/.claude"
62
+ printf '%s\n' '{"assayer":{"enabled":true}}' >"${REPO}/.claude/settings.json"
63
+ local input
64
+ input=$(_make_input "$REPO" "test-session" "${BATS_TEST_TMPDIR}/nope.jsonl")
65
+ run bash -c "printf '%s' '$input' | ONLOOKER_DIR='$ONLOOKER_DIR' '$HOOK'"
66
+ [ "$status" -eq 0 ]
67
+ [ -z "$output" ]
68
+ }
69
+
70
+ @test "exits 0 when enabled but final message is empty" {
71
+ mkdir -p "${REPO}/.claude"
72
+ printf '%s\n' '{"assayer":{"enabled":true}}' >"${REPO}/.claude/settings.json"
73
+ # Transcript with no assistant text turn.
74
+ local empty_transcript="${BATS_TEST_TMPDIR}/empty.jsonl"
75
+ printf '%s\n' '{"type":"user","message":{"content":[{"type":"text","text":"hi"}]}}' >"$empty_transcript"
76
+ local input
77
+ input=$(_make_input "$REPO" "test-session" "$empty_transcript")
78
+ run bash -c "printf '%s' '$input' | ONLOOKER_DIR='$ONLOOKER_DIR' '$HOOK'"
79
+ [ "$status" -eq 0 ]
80
+ [ -z "$output" ]
81
+ }
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # Exercises the transcript reader against a synthetic JSONL transcript shaped
4
+ # like a real Claude Code session log.
5
+
6
+ setup() {
7
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
8
+ setup_test_env
9
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/assayer"
10
+ # shellcheck disable=SC1091
11
+ source "${PLUGIN_ROOT}/scripts/lib/assayer-transcript.sh"
12
+
13
+ TRANSCRIPT="${BATS_TEST_TMPDIR}/transcript.jsonl"
14
+ {
15
+ # An assistant turn that runs a passing build, then a failing test.
16
+ printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"text","text":"Running the build."},{"type":"tool_use","name":"Bash","id":"t1","input":{"command":"npm run build"}}]}}'
17
+ printf '%s\n' '{"type":"user","message":{"content":[{"type":"tool_result","tool_use_id":"t1","is_error":false,"content":"build ok"}]}}'
18
+ printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Bash","id":"t2","input":{"command":"npm test"}}]}}'
19
+ printf '%s\n' '{"type":"user","message":{"content":[{"type":"tool_result","tool_use_id":"t2","is_error":true,"content":"1 failed"}]}}'
20
+ # A non-Bash tool call should be ignored.
21
+ printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"tool_use","name":"Read","id":"t3","input":{"file_path":"x"}}]}}'
22
+ # Final assistant message with the claims.
23
+ printf '%s\n' '{"type":"assistant","message":{"content":[{"type":"text","text":"Done. The build passes and the tests are green."}]}}'
24
+ } >"$TRANSCRIPT"
25
+ }
26
+
27
+ @test "final assistant message returns the last text turn" {
28
+ run assayer_final_assistant_message "$TRANSCRIPT" 6000
29
+ [ "$status" -eq 0 ]
30
+ [ "$output" = "Done. The build passes and the tests are green." ]
31
+ }
32
+
33
+ @test "final assistant message truncates to max_chars" {
34
+ run assayer_final_assistant_message "$TRANSCRIPT" 10
35
+ [ "$status" -eq 0 ]
36
+ [ "${#output}" -eq 10 ]
37
+ }
38
+
39
+ @test "missing transcript yields empty final message" {
40
+ run assayer_final_assistant_message "${BATS_TEST_TMPDIR}/nope.jsonl" 6000
41
+ [ "$status" -eq 0 ]
42
+ [ -z "$output" ]
43
+ }
44
+
45
+ @test "collects Bash commands with their is_error status" {
46
+ run assayer_collect_commands "$TRANSCRIPT"
47
+ [ "$status" -eq 0 ]
48
+ [ "$(printf '%s' "$output" | jq 'length')" -eq 2 ]
49
+ [ "$(printf '%s' "$output" | jq -r '.[0].command')" = "npm run build" ]
50
+ [ "$(printf '%s' "$output" | jq -r '.[0].is_error')" = "false" ]
51
+ [ "$(printf '%s' "$output" | jq -r '.[1].command')" = "npm test" ]
52
+ [ "$(printf '%s' "$output" | jq -r '.[1].is_error')" = "true" ]
53
+ }
54
+
55
+ @test "captures the failing command's output excerpt" {
56
+ run assayer_collect_commands "$TRANSCRIPT"
57
+ [ "$status" -eq 0 ]
58
+ [ "$(printf '%s' "$output" | jq -r '.[1].excerpt')" = "1 failed" ]
59
+ }
60
+
61
+ @test "non-Bash tool calls are excluded" {
62
+ run assayer_collect_commands "$TRANSCRIPT"
63
+ [ "$status" -eq 0 ]
64
+ # Only the two Bash commands, never the Read call.
65
+ [ "$(printf '%s' "$output" | jq '[.[] | select(.command | contains("file"))] | length')" -eq 0 ]
66
+ }
67
+
68
+ @test "missing transcript yields empty command array" {
69
+ run assayer_collect_commands "${BATS_TEST_TMPDIR}/nope.jsonl"
70
+ [ "$status" -eq 0 ]
71
+ [ "$output" = "[]" ]
72
+ }
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # Exercises the Assayer ULID generator.
4
+
5
+ setup() {
6
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
7
+ setup_test_env
8
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/assayer"
9
+ # shellcheck disable=SC1091
10
+ source "${PLUGIN_ROOT}/scripts/lib/assayer-ulid.sh"
11
+ }
12
+
13
+ @test "ulid is 26 chars of Crockford Base32" {
14
+ run assayer_ulid
15
+ [ "$status" -eq 0 ]
16
+ [ "${#output}" -eq 26 ]
17
+ [[ "$output" =~ ^[0-9A-HJKMNP-TV-Z]{26}$ ]]
18
+ }
19
+
20
+ @test "ulids are unique across calls" {
21
+ a=$(assayer_ulid)
22
+ b=$(assayer_ulid)
23
+ [ "$a" != "$b" ]
24
+ }
25
+
26
+ @test "ulids are lexicographically time-ordered" {
27
+ a=$(assayer_ulid)
28
+ sleep 0.01
29
+ b=$(assayer_ulid)
30
+ [[ "$a" < "$b" || "$a" == "$b" ]]
31
+ }
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env bats
2
+
3
+ # Exercises the deterministic claim verifier: assayer_classify_claim and
4
+ # assayer_audit_verdict. Pure logic — no LLM, no schema, no filesystem.
5
+
6
+ setup() {
7
+ source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
8
+ setup_test_env
9
+ PLUGIN_ROOT="${REPO_ROOT}/plugins/assayer"
10
+ # shellcheck disable=SC1091
11
+ source "${PLUGIN_ROOT}/scripts/lib/assayer-verify.sh"
12
+ }
13
+
14
+ CMDS_FAILING_TEST='[{"command":"npm test","is_error":true,"excerpt":"1 failed, 32 passed"},{"command":"git status","is_error":false,"excerpt":""}]'
15
+
16
+ @test "tests_pass claim is contradicted by a failing test command" {
17
+ run assayer_classify_claim '{"text":"tests pass","type":"tests_pass","command_keyword":"test","confidence":0.9}' "$CMDS_FAILING_TEST"
18
+ [ "$status" -eq 0 ]
19
+ [ "$(printf '%s' "$output" | jq -r '.verdict')" = "contradicted" ]
20
+ [ "$(printf '%s' "$output" | jq -r '.evidence_command')" = "npm test" ]
21
+ [ "$(printf '%s' "$output" | jq -r '.excerpt')" = "1 failed, 32 passed" ]
22
+ }
23
+
24
+ @test "build_succeeds claim is corroborated by a passing build" {
25
+ run assayer_classify_claim '{"text":"build is green","type":"build_succeeds","command_keyword":"build","confidence":0.9}' \
26
+ '[{"command":"npm run build","is_error":false,"excerpt":"done"}]'
27
+ [ "$status" -eq 0 ]
28
+ [ "$(printf '%s' "$output" | jq -r '.verdict')" = "corroborated" ]
29
+ }
30
+
31
+ @test "claim with no matching command is unverified (no_matching_command)" {
32
+ run assayer_classify_claim '{"text":"lint clean","type":"lint_clean","command_keyword":"lint","confidence":0.9}' \
33
+ '[{"command":"npm test","is_error":false,"excerpt":""}]'
34
+ [ "$status" -eq 0 ]
35
+ [ "$(printf '%s' "$output" | jq -r '.verdict')" = "unverified" ]
36
+ [ "$(printf '%s' "$output" | jq -r '.reason')" = "no_matching_command" ]
37
+ }
38
+
39
+ @test "generic claim with no keyword is unverified (ambiguous)" {
40
+ run assayer_classify_claim '{"text":"deploy healthy","type":"generic","command_keyword":"","confidence":0.9}' \
41
+ "$CMDS_FAILING_TEST"
42
+ [ "$status" -eq 0 ]
43
+ [ "$(printf '%s' "$output" | jq -r '.verdict')" = "unverified" ]
44
+ [ "$(printf '%s' "$output" | jq -r '.reason')" = "ambiguous" ]
45
+ }
46
+
47
+ @test "most recent matching command wins (fix-and-rerun)" {
48
+ # Failing test first, passing test after a fix — the later run is authoritative.
49
+ run assayer_classify_claim '{"text":"tests pass now","type":"tests_pass","command_keyword":"test","confidence":0.9}' \
50
+ '[{"command":"npm test","is_error":true,"excerpt":"fail"},{"command":"npm test","is_error":false,"excerpt":"pass"}]'
51
+ [ "$status" -eq 0 ]
52
+ [ "$(printf '%s' "$output" | jq -r '.verdict')" = "corroborated" ]
53
+ }
54
+
55
+ @test "types_check claim matches a tsc command" {
56
+ run assayer_classify_claim '{"text":"types check out","type":"types_check","command_keyword":"","confidence":0.9}' \
57
+ '[{"command":"npx tsc --noEmit","is_error":true,"excerpt":"TS2345"}]'
58
+ [ "$status" -eq 0 ]
59
+ [ "$(printf '%s' "$output" | jq -r '.verdict')" = "contradicted" ]
60
+ }
61
+
62
+ @test "command_keyword matches when type is generic" {
63
+ run assayer_classify_claim '{"text":"migration ran","type":"generic","command_keyword":"migrate","confidence":0.9}' \
64
+ '[{"command":"rails db:migrate","is_error":false,"excerpt":"migrated"}]'
65
+ [ "$status" -eq 0 ]
66
+ [ "$(printf '%s' "$output" | jq -r '.verdict')" = "corroborated" ]
67
+ }
68
+
69
+ @test "empty claim defaults to unverified ambiguous" {
70
+ run assayer_classify_claim "" "$CMDS_FAILING_TEST"
71
+ [ "$status" -eq 0 ]
72
+ [ "$(printf '%s' "$output" | jq -r '.verdict')" = "unverified" ]
73
+ }
74
+
75
+ @test "audit verdict is contradictions_found when any contradiction" {
76
+ [ "$(assayer_audit_verdict 1 3 0)" = "contradictions_found" ]
77
+ }
78
+
79
+ @test "audit verdict is clean with corroborations and no contradictions" {
80
+ [ "$(assayer_audit_verdict 0 2 1)" = "clean" ]
81
+ }
82
+
83
+ @test "audit verdict is clean when only unverified claims" {
84
+ [ "$(assayer_audit_verdict 0 0 2)" = "clean" ]
85
+ }
86
+
87
+ @test "audit verdict is nothing_to_verify when all counts zero" {
88
+ [ "$(assayer_audit_verdict 0 0 0)" = "nothing_to_verify" ]
89
+ }