@onlooker-community/ecosystem 0.9.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +39 -1
- package/.claude-plugin/plugin.json +2 -2
- package/.github/copilot-instructions.md +46 -0
- package/.github/workflows/coverage.yml +78 -0
- package/.github/workflows/release.yml +24 -8
- package/.github/workflows/test.yml +3 -0
- package/.markdownlintignore +3 -0
- package/.release-please-manifest.json +4 -1
- package/CHANGELOG.md +44 -0
- package/README.md +57 -13
- package/config.json +6 -1
- package/docs/adr/001-claude-code-hooks-as-integration-surface.md +43 -0
- package/docs/adr/002-centralized-jsonl-event-log.md +39 -0
- package/docs/adr/003-ulid-over-uuid.md +40 -0
- package/docs/adr/004-plugin-config-with-settings-overlay.md +34 -0
- package/docs/architecture.md +117 -0
- package/hooks/hooks.json +4 -0
- package/package.json +13 -7
- package/plugins/archivist/.claude-plugin/plugin.json +14 -0
- package/plugins/archivist/CHANGELOG.md +8 -0
- package/plugins/archivist/README.md +105 -0
- package/plugins/archivist/config.json +18 -0
- package/plugins/archivist/hooks/hooks.json +35 -0
- package/plugins/archivist/scripts/hooks/archivist-extract.sh +238 -0
- package/plugins/archivist/scripts/hooks/archivist-inject.sh +159 -0
- package/plugins/archivist/scripts/lib/archivist-config.sh +66 -0
- package/plugins/archivist/scripts/lib/archivist-project-key.sh +91 -0
- package/plugins/archivist/scripts/lib/archivist-storage.sh +215 -0
- package/plugins/archivist/scripts/lib/archivist-ulid.sh +52 -0
- package/plugins/echo/.claude-plugin/plugin.json +14 -0
- package/plugins/echo/CHANGELOG.md +24 -0
- package/plugins/echo/README.md +110 -0
- package/plugins/echo/config.json +15 -0
- package/plugins/echo/docs/adr/001-echo-as-separate-plugin.md +33 -0
- package/plugins/echo/docs/adr/002-direct-evaluation-vs-tribunal-pipeline.md +35 -0
- package/plugins/echo/docs/adr/003-stop-hook-trigger.md +40 -0
- package/plugins/echo/hooks/hooks.json +15 -0
- package/plugins/echo/scripts/hooks/echo-stop-gate.sh +366 -0
- package/plugins/echo/scripts/lib/echo-config.sh +108 -0
- package/plugins/echo/scripts/lib/echo-events.sh +74 -0
- package/plugins/echo/scripts/lib/echo-project-key.sh +81 -0
- package/plugins/echo/scripts/lib/echo-ulid.sh +46 -0
- package/plugins/tribunal/.claude-plugin/plugin.json +20 -0
- package/plugins/tribunal/CHANGELOG.md +10 -0
- package/plugins/tribunal/README.md +134 -0
- package/plugins/tribunal/agents/tribunal-actor.md +35 -0
- package/plugins/tribunal/agents/tribunal-judge-adversarial.md +51 -0
- package/plugins/tribunal/agents/tribunal-judge-security.md +47 -0
- package/plugins/tribunal/agents/tribunal-judge-standard.md +47 -0
- package/plugins/tribunal/agents/tribunal-meta-judge.md +61 -0
- package/plugins/tribunal/config.json +50 -0
- package/plugins/tribunal/docs/adr/001-actor-jury-meta-gate-loop.md +40 -0
- package/plugins/tribunal/docs/adr/002-majority-gate-policy.md +48 -0
- package/plugins/tribunal/hooks/hooks.json +15 -0
- package/plugins/tribunal/scripts/hooks/tribunal-stop-gate.sh +267 -0
- package/plugins/tribunal/scripts/lib/tribunal-aggregate.sh +65 -0
- package/plugins/tribunal/scripts/lib/tribunal-config.sh +101 -0
- package/plugins/tribunal/scripts/lib/tribunal-events.sh +97 -0
- package/plugins/tribunal/scripts/lib/tribunal-gate.sh +111 -0
- package/plugins/tribunal/scripts/lib/tribunal-jury.sh +102 -0
- package/plugins/tribunal/scripts/lib/tribunal-project-key.sh +84 -0
- package/plugins/tribunal/scripts/lib/tribunal-rubric.sh +153 -0
- package/plugins/tribunal/scripts/lib/tribunal-ulid.sh +50 -0
- package/plugins/tribunal/scripts/lib/tribunal-verdict.sh +127 -0
- package/plugins/tribunal/skills/tribunal/SKILL.md +129 -0
- package/release-please-config.json +43 -5
- package/scripts/coverage/bash-coverage.mjs +169 -0
- package/scripts/coverage/format-comment.mjs +120 -0
- package/scripts/coverage/run-coverage.mjs +151 -0
- package/scripts/hooks/agent-spawn-tracker.sh +4 -4
- package/scripts/hooks/prompt-rule-injector.sh +122 -0
- package/scripts/lib/onlooker-event.mjs +82 -10
- package/scripts/lib/portable-lock.sh +48 -0
- package/scripts/lib/prompt-rules.sh +207 -0
- package/scripts/lib/tool-history.sh +7 -8
- package/scripts/lib/validate-path.sh +4 -0
- package/scripts/lint/check-manifests.mjs +314 -0
- package/scripts/lint/check-references.mjs +311 -0
- package/skills/list-prompt-rules/SKILL.md +15 -0
- package/test/bats/archivist-config-files.bats +60 -0
- package/test/bats/archivist-config.bats +54 -0
- package/test/bats/archivist-inject.bats +73 -0
- package/test/bats/archivist-project-key.bats +75 -0
- package/test/bats/archivist-storage.bats +119 -0
- package/test/bats/archivist-ulid.bats +36 -0
- package/test/bats/config.bats +10 -10
- package/test/bats/echo-config.bats +90 -0
- package/test/bats/echo-events.bats +121 -0
- package/test/bats/echo-project-key.bats +115 -0
- package/test/bats/echo-stop-hook.bats +101 -0
- package/test/bats/echo-ulid.bats +38 -0
- package/test/bats/portable-lock.bats +62 -0
- package/test/bats/prompt-rules.bats +269 -0
- package/test/bats/read-chunk-tracking.bats +73 -0
- package/test/bats/tool-history-tracker.bats +1 -0
- package/test/bats/tribunal-aggregate.bats +77 -0
- package/test/bats/tribunal-config.bats +86 -0
- package/test/bats/tribunal-events.bats +209 -0
- package/test/bats/tribunal-gate.bats +95 -0
- package/test/bats/tribunal-jury.bats +80 -0
- package/test/bats/tribunal-rubric.bats +119 -0
- package/test/bats/tribunal-stop-hook.bats +73 -0
- package/test/bats/tribunal-verdict.bats +71 -0
- package/test/bats/validate-path.bats +1 -1
- package/test/fixtures/hook-inputs/post-tool-use-read-chunked.json +15 -0
- package/test/fixtures/hook-inputs/user-prompt-submit-rule-match.json +8 -0
- package/test/fixtures/hook-inputs/user-prompt-submit-rule-nomatch.json +8 -0
- package/test/helpers/setup.bash +9 -0
- package/test/node/check-manifests.test.mjs +173 -0
- package/test/node/check-references.test.mjs +279 -0
- package/test/node/coverage.test.mjs +143 -0
- package/test/node/schema-events.test.mjs +41 -1
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
setup() {
|
|
4
|
+
source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
|
|
5
|
+
setup_test_env
|
|
6
|
+
|
|
7
|
+
PLUGIN_ROOT="${REPO_ROOT}/plugins/tribunal"
|
|
8
|
+
export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
|
|
9
|
+
# shellcheck disable=SC1091
|
|
10
|
+
source "${PLUGIN_ROOT}/scripts/lib/tribunal-aggregate.sh"
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
VERDICTS='[{"judge_id":"a","score":0.8},{"judge_id":"b","score":0.6},{"judge_id":"c","score":0.4}]'
|
|
14
|
+
|
|
15
|
+
@test "mean of [0.8, 0.6, 0.4] is 0.6" {
|
|
16
|
+
local v
|
|
17
|
+
v=$(tribunal_aggregate "mean" "$VERDICTS")
|
|
18
|
+
awk -v v="$v" 'BEGIN { exit !(v > 0.59 && v < 0.61) }'
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
@test "median of three is the middle" {
|
|
22
|
+
local v
|
|
23
|
+
v=$(tribunal_aggregate "median" "$VERDICTS")
|
|
24
|
+
awk -v v="$v" 'BEGIN { exit !(v > 0.59 && v < 0.61) }'
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
@test "median of four averages the two middle scores" {
|
|
28
|
+
local four='[{"judge_id":"a","score":0.2},{"judge_id":"b","score":0.4},{"judge_id":"c","score":0.6},{"judge_id":"d","score":0.8}]'
|
|
29
|
+
local v
|
|
30
|
+
v=$(tribunal_aggregate "median" "$four")
|
|
31
|
+
awk -v v="$v" 'BEGIN { exit !(v > 0.49 && v < 0.51) }'
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
@test "min picks the lowest score" {
|
|
35
|
+
local v
|
|
36
|
+
v=$(tribunal_aggregate "min" "$VERDICTS")
|
|
37
|
+
awk -v v="$v" 'BEGIN { exit !(v > 0.39 && v < 0.41) }'
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
@test "weighted_mean degrades to mean in v0.1" {
|
|
41
|
+
local v
|
|
42
|
+
v=$(tribunal_aggregate "weighted_mean" "$VERDICTS")
|
|
43
|
+
awk -v v="$v" 'BEGIN { exit !(v > 0.59 && v < 0.61) }'
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
@test "unknown method falls back to mean with warning on stderr" {
|
|
47
|
+
run bash -c '
|
|
48
|
+
source "${REPO_ROOT}/plugins/tribunal/scripts/lib/tribunal-aggregate.sh"
|
|
49
|
+
tribunal_aggregate "lottery" "[{\"score\":0.5},{\"score\":0.7}]" 2>&1
|
|
50
|
+
'
|
|
51
|
+
[ "$status" -eq 0 ]
|
|
52
|
+
[[ "$output" == *"unknown method lottery"* ]]
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@test "empty verdicts aggregates to 0" {
|
|
56
|
+
local v
|
|
57
|
+
v=$(tribunal_aggregate "mean" "[]")
|
|
58
|
+
[ "$v" = "0" ]
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
@test "disagreement of identical scores is 0" {
|
|
62
|
+
local d
|
|
63
|
+
d=$(tribunal_disagreement '[{"score":0.7},{"score":0.7}]')
|
|
64
|
+
awk -v d="$d" 'BEGIN { exit !(d < 0.01) }'
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
@test "disagreement of [0.2, 0.8] is 0.6" {
|
|
68
|
+
local d
|
|
69
|
+
d=$(tribunal_disagreement '[{"score":0.2},{"score":0.8}]')
|
|
70
|
+
awk -v d="$d" 'BEGIN { exit !(d > 0.59 && d < 0.61) }'
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
@test "disagreement of single verdict is 0" {
|
|
74
|
+
local d
|
|
75
|
+
d=$(tribunal_disagreement '[{"score":0.7}]')
|
|
76
|
+
[ "$d" = "0" ]
|
|
77
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
setup() {
|
|
4
|
+
source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
|
|
5
|
+
setup_test_env
|
|
6
|
+
|
|
7
|
+
PLUGIN_ROOT="${REPO_ROOT}/plugins/tribunal"
|
|
8
|
+
export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
|
|
9
|
+
# shellcheck disable=SC1091
|
|
10
|
+
source "${PLUGIN_ROOT}/scripts/lib/tribunal-config.sh"
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
@test "config defaults from plugin config.json: enabled" {
|
|
14
|
+
tribunal_config_load ""
|
|
15
|
+
run tribunal_config_enabled
|
|
16
|
+
[ "$status" -eq 0 ]
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
@test "stop_hook defaults to disabled" {
|
|
20
|
+
tribunal_config_load ""
|
|
21
|
+
run tribunal_config_stop_hook_enabled
|
|
22
|
+
[ "$status" -ne 0 ]
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
@test "user-level settings.json overlay can disable tribunal" {
|
|
26
|
+
mkdir -p "${HOME}/.claude"
|
|
27
|
+
printf '%s\n' '{"tribunal":{"enabled":false}}' > "${HOME}/.claude/settings.json"
|
|
28
|
+
tribunal_config_load ""
|
|
29
|
+
run tribunal_config_enabled
|
|
30
|
+
[ "$status" -ne 0 ]
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
@test "repo-level settings.json overrides user-level" {
|
|
34
|
+
mkdir -p "${HOME}/.claude"
|
|
35
|
+
printf '%s\n' '{"tribunal":{"enabled":false}}' > "${HOME}/.claude/settings.json"
|
|
36
|
+
local repo="${BATS_TEST_TMPDIR}/repo"
|
|
37
|
+
mkdir -p "${repo}/.claude"
|
|
38
|
+
printf '%s\n' '{"tribunal":{"enabled":true}}' > "${repo}/.claude/settings.json"
|
|
39
|
+
tribunal_config_load "$repo"
|
|
40
|
+
run tribunal_config_enabled
|
|
41
|
+
[ "$status" -eq 0 ]
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
@test "default judge_types is standard + adversarial" {
|
|
45
|
+
tribunal_config_load ""
|
|
46
|
+
local types
|
|
47
|
+
types=$(tribunal_config_get_json '.tribunal.session.judge_types')
|
|
48
|
+
[ "$types" = '["standard","adversarial"]' ]
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@test "default gate_policy is majority" {
|
|
52
|
+
tribunal_config_load ""
|
|
53
|
+
local v
|
|
54
|
+
v=$(tribunal_config_get '.tribunal.session.gate_policy')
|
|
55
|
+
[ "$v" = "majority" ]
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
@test "judge model falls back to tribunal.judges.model when no per-type override" {
|
|
59
|
+
tribunal_config_load ""
|
|
60
|
+
local m
|
|
61
|
+
m=$(tribunal_config_judge_model "standard")
|
|
62
|
+
[ "$m" = "claude-opus-4-7" ]
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
@test "per-judge-type model override wins over fallback" {
|
|
66
|
+
mkdir -p "${HOME}/.claude"
|
|
67
|
+
printf '%s\n' '{"tribunal":{"judges":{"security":{"model":"claude-opus-4-7-deep"}}}}' > "${HOME}/.claude/settings.json"
|
|
68
|
+
tribunal_config_load ""
|
|
69
|
+
local m
|
|
70
|
+
m=$(tribunal_config_judge_model "security")
|
|
71
|
+
[ "$m" = "claude-opus-4-7-deep" ]
|
|
72
|
+
# Other types still fall through to the default.
|
|
73
|
+
m=$(tribunal_config_judge_model "standard")
|
|
74
|
+
[ "$m" = "claude-opus-4-7" ]
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
@test "deep-merge preserves unset defaults" {
|
|
78
|
+
mkdir -p "${HOME}/.claude"
|
|
79
|
+
printf '%s\n' '{"tribunal":{"session":{"max_iterations":7}}}' > "${HOME}/.claude/settings.json"
|
|
80
|
+
tribunal_config_load ""
|
|
81
|
+
local mi gp
|
|
82
|
+
mi=$(tribunal_config_get '.tribunal.session.max_iterations')
|
|
83
|
+
gp=$(tribunal_config_get '.tribunal.session.gate_policy')
|
|
84
|
+
[ "$mi" = "7" ]
|
|
85
|
+
[ "$gp" = "majority" ]
|
|
86
|
+
}
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
# Validates every emitted tribunal.* event against @onlooker-community/schema
|
|
4
|
+
# v2.1.0+. Builds a single event via the canonical emitter and asserts the
|
|
5
|
+
# resulting JSONL line passes validate().
|
|
6
|
+
|
|
7
|
+
setup() {
|
|
8
|
+
source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
|
|
9
|
+
setup_test_env
|
|
10
|
+
|
|
11
|
+
PLUGIN_ROOT="${REPO_ROOT}/plugins/tribunal"
|
|
12
|
+
export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
|
|
13
|
+
export ONLOOKER_EVENTS_LOG="${ONLOOKER_DIR}/logs/onlooker-events.jsonl"
|
|
14
|
+
mkdir -p "$(dirname "$ONLOOKER_EVENTS_LOG")"
|
|
15
|
+
|
|
16
|
+
# tribunal-events.sh looks up onlooker-event.mjs relative to its plugin
|
|
17
|
+
# root, but tests set CLAUDE_PLUGIN_ROOT to plugins/tribunal — point the
|
|
18
|
+
# wrapper at the ecosystem copy directly so it does not have to walk.
|
|
19
|
+
export _ONLOOKER_EVENT_JS="${REPO_ROOT}/scripts/lib/onlooker-event.mjs"
|
|
20
|
+
|
|
21
|
+
# shellcheck disable=SC1091
|
|
22
|
+
source "${PLUGIN_ROOT}/scripts/lib/tribunal-events.sh"
|
|
23
|
+
|
|
24
|
+
export CLAUDE_SESSION_ID="bats-session-$$"
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
# Re-validate the latest event in the log against the schema.
|
|
28
|
+
_validate_latest_event() {
|
|
29
|
+
local last
|
|
30
|
+
last=$(tail -n 1 "$ONLOOKER_EVENTS_LOG")
|
|
31
|
+
[ -n "$last" ] || return 1
|
|
32
|
+
printf '%s' "$last" | ONLOOKER_DIR="$ONLOOKER_DIR" \
|
|
33
|
+
node "${REPO_ROOT}/scripts/lib/onlooker-event.mjs" validate >/dev/null
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
TASK_ID="01J000000000000000000000TS"
|
|
37
|
+
ITER_ID="01J000000000000000000000IT"
|
|
38
|
+
JUDGE_ID="01J000000000000000000000JJ"
|
|
39
|
+
|
|
40
|
+
@test "session.start validates" {
|
|
41
|
+
local p
|
|
42
|
+
p=$(jq -n --arg t "$TASK_ID" '{
|
|
43
|
+
task_id: $t,
|
|
44
|
+
judge_types: ["standard","adversarial"],
|
|
45
|
+
gate_policy: "majority",
|
|
46
|
+
score_threshold: 0.75,
|
|
47
|
+
max_iterations: 3,
|
|
48
|
+
actor_model_id: "claude-sonnet-4-6",
|
|
49
|
+
judge_model_ids: ["claude-opus-4-7","claude-opus-4-7"],
|
|
50
|
+
meta_model_id: "claude-opus-4-7"
|
|
51
|
+
}')
|
|
52
|
+
tribunal_emit_event "tribunal.session.start" "$p"
|
|
53
|
+
run _validate_latest_event
|
|
54
|
+
[ "$status" -eq 0 ]
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
@test "iteration.start validates" {
|
|
58
|
+
local p
|
|
59
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" \
|
|
60
|
+
'{task_id: $t, iteration_id: $i, iteration_number: 0, trigger: "initial"}')
|
|
61
|
+
tribunal_emit_event "tribunal.iteration.start" "$p"
|
|
62
|
+
run _validate_latest_event
|
|
63
|
+
[ "$status" -eq 0 ]
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
@test "actor.start + actor.complete validate" {
|
|
67
|
+
local p
|
|
68
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" \
|
|
69
|
+
'{task_id: $t, iteration_id: $i, iteration_number: 0, actor_model_id: "claude-sonnet-4-6"}')
|
|
70
|
+
tribunal_emit_event "tribunal.actor.start" "$p"
|
|
71
|
+
_validate_latest_event
|
|
72
|
+
|
|
73
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" \
|
|
74
|
+
'{task_id: $t, success: true, duration_ms: 4200, iteration_id: $i, iteration_number: 0, artifact_kind: "patch", actor_model_id: "claude-sonnet-4-6"}')
|
|
75
|
+
tribunal_emit_event "tribunal.actor.complete" "$p"
|
|
76
|
+
run _validate_latest_event
|
|
77
|
+
[ "$status" -eq 0 ]
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
@test "jury.empaneled validates" {
|
|
81
|
+
local p
|
|
82
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" --arg j "$JUDGE_ID" '{
|
|
83
|
+
task_id: $t,
|
|
84
|
+
iteration_id: $i,
|
|
85
|
+
judges: [
|
|
86
|
+
{judge_id: $j, judge_type: "standard", model_id: "claude-opus-4-7"},
|
|
87
|
+
{judge_id: ($j+"X"), judge_type: "adversarial", model_id: "claude-opus-4-7"}
|
|
88
|
+
],
|
|
89
|
+
panel_size: 2
|
|
90
|
+
}')
|
|
91
|
+
tribunal_emit_event "tribunal.jury.empaneled" "$p"
|
|
92
|
+
run _validate_latest_event
|
|
93
|
+
[ "$status" -eq 0 ]
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
@test "judge.start + verdict validate" {
|
|
97
|
+
local p
|
|
98
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" --arg j "$JUDGE_ID" \
|
|
99
|
+
'{task_id: $t, iteration_id: $i, judge_id: $j, judge_type: "standard", judge_model_id: "claude-opus-4-7"}')
|
|
100
|
+
tribunal_emit_event "tribunal.judge.start" "$p"
|
|
101
|
+
_validate_latest_event
|
|
102
|
+
|
|
103
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" --arg j "$JUDGE_ID" '{
|
|
104
|
+
task_id: $t,
|
|
105
|
+
score: 0.82,
|
|
106
|
+
passed: true,
|
|
107
|
+
judge_type: "standard",
|
|
108
|
+
iteration_id: $i,
|
|
109
|
+
judge_id: $j,
|
|
110
|
+
judge_model_id: "claude-opus-4-7",
|
|
111
|
+
criteria_evaluated: ["correctness","completeness","clarity"],
|
|
112
|
+
strengths_count: 3,
|
|
113
|
+
weaknesses_count: 1,
|
|
114
|
+
confidence: 0.85,
|
|
115
|
+
feedback_summary: "looks fine"
|
|
116
|
+
}')
|
|
117
|
+
tribunal_emit_event "tribunal.verdict" "$p"
|
|
118
|
+
run _validate_latest_event
|
|
119
|
+
[ "$status" -eq 0 ]
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
@test "meta.start + meta.complete validate (with bias_types)" {
|
|
123
|
+
local p
|
|
124
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" \
|
|
125
|
+
'{task_id: $t, iteration_id: $i, meta_model_id: "claude-opus-4-7", verdicts_reviewed: 2}')
|
|
126
|
+
tribunal_emit_event "tribunal.meta.start" "$p"
|
|
127
|
+
_validate_latest_event
|
|
128
|
+
|
|
129
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" '{
|
|
130
|
+
task_id: $t,
|
|
131
|
+
verdict_quality: "questionable",
|
|
132
|
+
bias_detected: true,
|
|
133
|
+
bias_types: ["verbosity","sycophancy"],
|
|
134
|
+
override_recommendation: "re-evaluate",
|
|
135
|
+
confidence: 0.7,
|
|
136
|
+
iteration_id: $i,
|
|
137
|
+
meta_model_id: "claude-opus-4-7"
|
|
138
|
+
}')
|
|
139
|
+
tribunal_emit_event "tribunal.meta.complete" "$p"
|
|
140
|
+
run _validate_latest_event
|
|
141
|
+
[ "$status" -eq 0 ]
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
@test "consensus.reached validates" {
|
|
145
|
+
local p
|
|
146
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" --arg j "$JUDGE_ID" '{
|
|
147
|
+
task_id: $t,
|
|
148
|
+
iteration_id: $i,
|
|
149
|
+
aggregated_score: 0.7,
|
|
150
|
+
passed: true,
|
|
151
|
+
aggregation_method: "weighted_mean",
|
|
152
|
+
judges: [{judge_id: $j, score: 0.8},{judge_id: ($j+"X"), score: 0.6}]
|
|
153
|
+
}')
|
|
154
|
+
tribunal_emit_event "tribunal.consensus.reached" "$p"
|
|
155
|
+
run _validate_latest_event
|
|
156
|
+
[ "$status" -eq 0 ]
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
@test "dissent.recorded validates with re-evaluate resolution" {
|
|
160
|
+
local p
|
|
161
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" --arg j "$JUDGE_ID" '{
|
|
162
|
+
task_id: $t,
|
|
163
|
+
iteration_id: $i,
|
|
164
|
+
disagreement_score: 0.5,
|
|
165
|
+
judges: [
|
|
166
|
+
{judge_id: $j, score: 0.85, passed: true},
|
|
167
|
+
{judge_id: ($j+"X"), score: 0.35, passed: false}
|
|
168
|
+
],
|
|
169
|
+
resolution: "re-evaluate"
|
|
170
|
+
}')
|
|
171
|
+
tribunal_emit_event "tribunal.dissent.recorded" "$p"
|
|
172
|
+
run _validate_latest_event
|
|
173
|
+
[ "$status" -eq 0 ]
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
@test "gate.passed and gate.blocked validate" {
|
|
177
|
+
local p
|
|
178
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" \
|
|
179
|
+
'{task_id: $t, iteration_id: $i, final_score: 0.82, iteration_number: 0, judges_consulted: 2}')
|
|
180
|
+
tribunal_emit_event "tribunal.gate.passed" "$p"
|
|
181
|
+
_validate_latest_event
|
|
182
|
+
|
|
183
|
+
p=$(jq -n --arg t "$TASK_ID" --arg i "$ITER_ID" '{
|
|
184
|
+
task_id: $t,
|
|
185
|
+
iteration_id: $i,
|
|
186
|
+
reason: "low_score",
|
|
187
|
+
final_score: 0.42,
|
|
188
|
+
iteration_number: 0,
|
|
189
|
+
will_retry: true,
|
|
190
|
+
retry_iteration_number: 1
|
|
191
|
+
}')
|
|
192
|
+
tribunal_emit_event "tribunal.gate.blocked" "$p"
|
|
193
|
+
run _validate_latest_event
|
|
194
|
+
[ "$status" -eq 0 ]
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
@test "session.complete validates with exhausted_iterations outcome" {
|
|
198
|
+
local p
|
|
199
|
+
p=$(jq -n --arg t "$TASK_ID" \
|
|
200
|
+
'{task_id: $t, outcome: "exhausted_iterations", final_score: 0.55, iterations_used: 3, total_duration_ms: 28000}')
|
|
201
|
+
tribunal_emit_event "tribunal.session.complete" "$p"
|
|
202
|
+
run _validate_latest_event
|
|
203
|
+
[ "$status" -eq 0 ]
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
@test "emission fails loudly on bogus event_type (schema rejects)" {
|
|
207
|
+
run tribunal_emit_event "tribunal.no.such.event" '{"task_id":"x"}'
|
|
208
|
+
[ "$status" -ne 0 ]
|
|
209
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
setup() {
|
|
4
|
+
source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
|
|
5
|
+
setup_test_env
|
|
6
|
+
|
|
7
|
+
PLUGIN_ROOT="${REPO_ROOT}/plugins/tribunal"
|
|
8
|
+
export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
|
|
9
|
+
# shellcheck disable=SC1091
|
|
10
|
+
source "${PLUGIN_ROOT}/scripts/lib/tribunal-gate.sh"
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
ALL_PASSED='[{"judge_id":"a","score":0.85,"passed":true},{"judge_id":"b","score":0.80,"passed":true}]'
|
|
14
|
+
ONE_FAILED='[{"judge_id":"a","score":0.85,"passed":true},{"judge_id":"b","score":0.40,"passed":false}]'
|
|
15
|
+
ALL_FAILED='[{"judge_id":"a","score":0.30,"passed":false},{"judge_id":"b","score":0.40,"passed":false}]'
|
|
16
|
+
NO_META='{}'
|
|
17
|
+
|
|
18
|
+
@test "strict: all judges pass + score >= threshold → passed" {
|
|
19
|
+
local out
|
|
20
|
+
out=$(tribunal_gate_decide "strict" "$ALL_PASSED" "0.82" "0.75" "$NO_META" "0.05" "0.25")
|
|
21
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "true" ]
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
@test "strict: one judge fails → blocked with dissent_unresolved or low_score" {
|
|
25
|
+
local out
|
|
26
|
+
out=$(tribunal_gate_decide "strict" "$ONE_FAILED" "0.62" "0.75" "$NO_META" "0.45" "0.25")
|
|
27
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "false" ]
|
|
28
|
+
# dissent + no meta override → dissent_unresolved
|
|
29
|
+
[ "$(printf '%s' "$out" | jq -r '.reason')" = "dissent_unresolved" ]
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
@test "majority: more than half pass + score clears → passed" {
|
|
33
|
+
local three='[{"judge_id":"a","score":0.9,"passed":true},{"judge_id":"b","score":0.8,"passed":true},{"judge_id":"c","score":0.4,"passed":false}]'
|
|
34
|
+
local out
|
|
35
|
+
out=$(tribunal_gate_decide "majority" "$three" "0.78" "0.75" "$NO_META" "0.20" "0.25")
|
|
36
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "true" ]
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
@test "majority: split 1-1 with low score → blocked low_score" {
|
|
40
|
+
local out
|
|
41
|
+
out=$(tribunal_gate_decide "majority" "$ONE_FAILED" "0.62" "0.75" "$NO_META" "0.20" "0.25")
|
|
42
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "false" ]
|
|
43
|
+
[ "$(printf '%s' "$out" | jq -r '.reason')" = "low_score" ]
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
@test "unanimous: identical to strict when count > 1" {
|
|
47
|
+
local out
|
|
48
|
+
out=$(tribunal_gate_decide "unanimous" "$ALL_PASSED" "0.82" "0.75" "$NO_META" "0.05" "0.25")
|
|
49
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "true" ]
|
|
50
|
+
|
|
51
|
+
out=$(tribunal_gate_decide "unanimous" "$ONE_FAILED" "0.62" "0.75" "$NO_META" "0.05" "0.25")
|
|
52
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "false" ]
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@test "meta_override accept beats failing jury" {
|
|
56
|
+
local meta='{"override_recommendation":"accept","bias_detected":false}'
|
|
57
|
+
local out
|
|
58
|
+
out=$(tribunal_gate_decide "meta_override" "$ALL_FAILED" "0.30" "0.75" "$meta" "0.10" "0.25")
|
|
59
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "true" ]
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
@test "meta_override reject blocks even with passing jury" {
|
|
63
|
+
local meta='{"override_recommendation":"reject","bias_detected":false}'
|
|
64
|
+
local out
|
|
65
|
+
out=$(tribunal_gate_decide "meta_override" "$ALL_PASSED" "0.82" "0.75" "$meta" "0.05" "0.25")
|
|
66
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "false" ]
|
|
67
|
+
[ "$(printf '%s' "$out" | jq -r '.reason')" = "meta_override" ]
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
@test "bias_detected + meta says reject → bias_detected reason" {
|
|
71
|
+
local meta='{"override_recommendation":"reject","bias_detected":true,"bias_types":["verbosity"]}'
|
|
72
|
+
local out
|
|
73
|
+
out=$(tribunal_gate_decide "majority" "$ONE_FAILED" "0.60" "0.75" "$meta" "0.45" "0.25")
|
|
74
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "false" ]
|
|
75
|
+
[ "$(printf '%s' "$out" | jq -r '.reason')" = "bias_detected" ]
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
@test "dissent above threshold + no meta override → dissent_unresolved" {
|
|
79
|
+
local meta='{"bias_detected":false}'
|
|
80
|
+
local out
|
|
81
|
+
out=$(tribunal_gate_decide "majority" "$ONE_FAILED" "0.62" "0.50" "$meta" "0.45" "0.25")
|
|
82
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "false" ]
|
|
83
|
+
[ "$(printf '%s' "$out" | jq -r '.reason')" = "dissent_unresolved" ]
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
@test "score clears threshold but jury says no → meta_override or dissent reason" {
|
|
87
|
+
# All judges marked passed=false but aggregated_score is above threshold
|
|
88
|
+
# (contrived to exercise the "score_ok + jury_fail" branch).
|
|
89
|
+
local odd='[{"judge_id":"a","score":0.9,"passed":false},{"judge_id":"b","score":0.8,"passed":false}]'
|
|
90
|
+
local meta='{"override_recommendation":"reject","bias_detected":false}'
|
|
91
|
+
local out
|
|
92
|
+
out=$(tribunal_gate_decide "majority" "$odd" "0.85" "0.75" "$meta" "0.10" "0.25")
|
|
93
|
+
[ "$(printf '%s' "$out" | jq -r '.passed')" = "false" ]
|
|
94
|
+
[ "$(printf '%s' "$out" | jq -r '.reason')" = "meta_override" ]
|
|
95
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
setup() {
|
|
4
|
+
source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
|
|
5
|
+
setup_test_env
|
|
6
|
+
|
|
7
|
+
PLUGIN_ROOT="${REPO_ROOT}/plugins/tribunal"
|
|
8
|
+
export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
|
|
9
|
+
# shellcheck disable=SC1091
|
|
10
|
+
source "${PLUGIN_ROOT}/scripts/lib/tribunal-config.sh"
|
|
11
|
+
# shellcheck disable=SC1091
|
|
12
|
+
source "${PLUGIN_ROOT}/scripts/lib/tribunal-ulid.sh"
|
|
13
|
+
# shellcheck disable=SC1091
|
|
14
|
+
source "${PLUGIN_ROOT}/scripts/lib/tribunal-jury.sh"
|
|
15
|
+
|
|
16
|
+
tribunal_config_load ""
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
@test "default empanel produces standard + adversarial" {
|
|
20
|
+
local panel
|
|
21
|
+
panel=$(tribunal_jury_empanel '["standard","adversarial"]')
|
|
22
|
+
[ "$(printf '%s' "$panel" | jq 'length')" = "2" ]
|
|
23
|
+
[ "$(printf '%s' "$panel" | jq -r '.[0].judge_type')" = "standard" ]
|
|
24
|
+
[ "$(printf '%s' "$panel" | jq -r '.[1].judge_type')" = "adversarial" ]
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
@test "each panel member gets a distinct judge_id" {
|
|
28
|
+
local panel
|
|
29
|
+
panel=$(tribunal_jury_empanel '["standard","adversarial","security"]')
|
|
30
|
+
local distinct
|
|
31
|
+
distinct=$(printf '%s' "$panel" | jq -r '[.[].judge_id] | unique | length')
|
|
32
|
+
[ "$distinct" = "3" ]
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
@test "panel members get model from config" {
|
|
36
|
+
local panel m
|
|
37
|
+
panel=$(tribunal_jury_empanel '["standard"]')
|
|
38
|
+
m=$(printf '%s' "$panel" | jq -r '.[0].model')
|
|
39
|
+
[ "$m" = "claude-opus-4-7" ]
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
@test "maintainability degrades to standard with warning" {
|
|
43
|
+
run bash -c '
|
|
44
|
+
source "${REPO_ROOT}/plugins/tribunal/scripts/lib/tribunal-config.sh"
|
|
45
|
+
source "${REPO_ROOT}/plugins/tribunal/scripts/lib/tribunal-ulid.sh"
|
|
46
|
+
source "${REPO_ROOT}/plugins/tribunal/scripts/lib/tribunal-jury.sh"
|
|
47
|
+
CLAUDE_PLUGIN_ROOT="${REPO_ROOT}/plugins/tribunal" tribunal_config_load ""
|
|
48
|
+
tribunal_jury_empanel "[\"maintainability\"]" 2>&1
|
|
49
|
+
'
|
|
50
|
+
[ "$status" -eq 0 ]
|
|
51
|
+
[[ "$output" == *"degrading to standard"* ]]
|
|
52
|
+
[[ "$output" == *"standard"* ]]
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@test "meta type is refused in jury panel" {
|
|
56
|
+
local panel
|
|
57
|
+
panel=$(tribunal_jury_empanel '["standard","meta"]' 2>/dev/null)
|
|
58
|
+
[ "$(printf '%s' "$panel" | jq 'length')" = "1" ]
|
|
59
|
+
[ "$(printf '%s' "$panel" | jq -r '.[0].judge_type')" = "standard" ]
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
@test "subagent mapping is canonical per judge_type" {
|
|
63
|
+
local panel
|
|
64
|
+
panel=$(tribunal_jury_empanel '["standard","security","adversarial"]')
|
|
65
|
+
[ "$(printf '%s' "$panel" | jq -r '.[0].subagent')" = "tribunal-judge-standard" ]
|
|
66
|
+
[ "$(printf '%s' "$panel" | jq -r '.[1].subagent')" = "tribunal-judge-security" ]
|
|
67
|
+
[ "$(printf '%s' "$panel" | jq -r '.[2].subagent')" = "tribunal-judge-adversarial" ]
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
@test "to_schema_judges strips internal subagent field" {
|
|
71
|
+
local panel schema
|
|
72
|
+
panel=$(tribunal_jury_empanel '["standard"]')
|
|
73
|
+
schema=$(tribunal_jury_to_schema_judges "$panel")
|
|
74
|
+
# subagent must NOT appear in the schema-shape output
|
|
75
|
+
[ "$(printf '%s' "$schema" | jq -r '.[0] | has("subagent")')" = "false" ]
|
|
76
|
+
# but judge_id, judge_type, model_id must
|
|
77
|
+
[ "$(printf '%s' "$schema" | jq -r '.[0] | has("judge_id")')" = "true" ]
|
|
78
|
+
[ "$(printf '%s' "$schema" | jq -r '.[0] | has("judge_type")')" = "true" ]
|
|
79
|
+
[ "$(printf '%s' "$schema" | jq -r '.[0] | has("model_id")')" = "true" ]
|
|
80
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
#!/usr/bin/env bats
|
|
2
|
+
|
|
3
|
+
setup() {
|
|
4
|
+
source "${BATS_TEST_DIRNAME}/../helpers/setup.bash"
|
|
5
|
+
setup_test_env
|
|
6
|
+
|
|
7
|
+
PLUGIN_ROOT="${REPO_ROOT}/plugins/tribunal"
|
|
8
|
+
export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT"
|
|
9
|
+
# shellcheck disable=SC1091
|
|
10
|
+
source "${PLUGIN_ROOT}/scripts/lib/tribunal-config.sh"
|
|
11
|
+
# shellcheck disable=SC1091
|
|
12
|
+
source "${PLUGIN_ROOT}/scripts/lib/tribunal-rubric.sh"
|
|
13
|
+
|
|
14
|
+
tribunal_config_load ""
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
@test "default rubric loads from builtins" {
|
|
18
|
+
tribunal_rubric_load ""
|
|
19
|
+
local r id
|
|
20
|
+
r=$(tribunal_rubric_get "default")
|
|
21
|
+
[ -n "$r" ]
|
|
22
|
+
id=$(printf '%s' "$r" | jq -r '.id')
|
|
23
|
+
[ "$id" = "default" ]
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
@test "default rubric id resolves to 'default'" {
|
|
27
|
+
local id
|
|
28
|
+
id=$(tribunal_rubric_default_id)
|
|
29
|
+
[ "$id" = "default" ]
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
@test "default rubric passes validation" {
|
|
33
|
+
tribunal_rubric_load ""
|
|
34
|
+
local r
|
|
35
|
+
r=$(tribunal_rubric_get "default")
|
|
36
|
+
run tribunal_rubric_validate "$r"
|
|
37
|
+
[ "$status" -eq 0 ]
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
@test "project rubric override by id replaces builtin" {
|
|
41
|
+
local repo="${BATS_TEST_TMPDIR}/repo"
|
|
42
|
+
mkdir -p "${repo}/.claude"
|
|
43
|
+
cat > "${repo}/.claude/tribunal.json" <<'EOF'
|
|
44
|
+
{
|
|
45
|
+
"rubrics": [
|
|
46
|
+
{
|
|
47
|
+
"id": "default",
|
|
48
|
+
"criteria": [
|
|
49
|
+
{ "name": "tests", "weight": 1.0, "min_pass": 0.9 }
|
|
50
|
+
],
|
|
51
|
+
"score_threshold": 0.9,
|
|
52
|
+
"max_iterations": 5,
|
|
53
|
+
"judge_types": ["standard"],
|
|
54
|
+
"gate_policy": "strict",
|
|
55
|
+
"aggregation_method": "min"
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
}
|
|
59
|
+
EOF
|
|
60
|
+
tribunal_rubric_load "$repo"
|
|
61
|
+
local r mi gp
|
|
62
|
+
r=$(tribunal_rubric_get "default")
|
|
63
|
+
mi=$(printf '%s' "$r" | jq -r '.max_iterations')
|
|
64
|
+
gp=$(printf '%s' "$r" | jq -r '.gate_policy')
|
|
65
|
+
[ "$mi" = "5" ]
|
|
66
|
+
[ "$gp" = "strict" ]
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
@test "named rubric from project file is reachable by id" {
|
|
70
|
+
local repo="${BATS_TEST_TMPDIR}/repo"
|
|
71
|
+
mkdir -p "${repo}/.claude"
|
|
72
|
+
cat > "${repo}/.claude/tribunal.json" <<'EOF'
|
|
73
|
+
{
|
|
74
|
+
"rubrics": [
|
|
75
|
+
{
|
|
76
|
+
"id": "security-tight",
|
|
77
|
+
"criteria": [
|
|
78
|
+
{ "name": "security", "weight": 1.0, "min_pass": 0.95 }
|
|
79
|
+
],
|
|
80
|
+
"score_threshold": 0.95,
|
|
81
|
+
"max_iterations": 3,
|
|
82
|
+
"judge_types": ["standard", "security"],
|
|
83
|
+
"gate_policy": "unanimous",
|
|
84
|
+
"aggregation_method": "min"
|
|
85
|
+
}
|
|
86
|
+
]
|
|
87
|
+
}
|
|
88
|
+
EOF
|
|
89
|
+
tribunal_rubric_load "$repo"
|
|
90
|
+
local r
|
|
91
|
+
r=$(tribunal_rubric_get "security-tight")
|
|
92
|
+
[ -n "$r" ]
|
|
93
|
+
[ "$(printf '%s' "$r" | jq -r '.id')" = "security-tight" ]
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
@test "missing rubric id returns empty" {
|
|
97
|
+
tribunal_rubric_load ""
|
|
98
|
+
local r
|
|
99
|
+
r=$(tribunal_rubric_get "does-not-exist")
|
|
100
|
+
[ -z "$r" ]
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
@test "validate rejects weights summing != 1" {
|
|
104
|
+
local r='{"id":"bad","criteria":[{"name":"a","weight":0.4,"min_pass":0.5},{"name":"b","weight":0.4,"min_pass":0.5}],"score_threshold":0.75,"max_iterations":3,"judge_types":["standard"],"gate_policy":"majority","aggregation_method":"mean"}'
|
|
105
|
+
run tribunal_rubric_validate "$r"
|
|
106
|
+
[ "$status" -ne 0 ]
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
@test "validate rejects invalid gate_policy" {
|
|
110
|
+
local r='{"id":"bad","criteria":[{"name":"a","weight":1.0,"min_pass":0.5}],"score_threshold":0.75,"max_iterations":3,"judge_types":["standard"],"gate_policy":"democracy","aggregation_method":"mean"}'
|
|
111
|
+
run tribunal_rubric_validate "$r"
|
|
112
|
+
[ "$status" -ne 0 ]
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
@test "validate rejects out-of-range score_threshold" {
|
|
116
|
+
local r='{"id":"bad","criteria":[{"name":"a","weight":1.0,"min_pass":0.5}],"score_threshold":1.5,"max_iterations":3,"judge_types":["standard"],"gate_policy":"majority","aggregation_method":"mean"}'
|
|
117
|
+
run tribunal_rubric_validate "$r"
|
|
118
|
+
[ "$status" -ne 0 ]
|
|
119
|
+
}
|