@onlooker-community/ecosystem 0.9.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +39 -1
- package/.claude-plugin/plugin.json +2 -2
- package/.github/copilot-instructions.md +46 -0
- package/.github/workflows/coverage.yml +78 -0
- package/.github/workflows/release.yml +24 -8
- package/.github/workflows/test.yml +3 -0
- package/.markdownlintignore +3 -0
- package/.release-please-manifest.json +4 -1
- package/CHANGELOG.md +44 -0
- package/README.md +57 -13
- package/config.json +6 -1
- package/docs/adr/001-claude-code-hooks-as-integration-surface.md +43 -0
- package/docs/adr/002-centralized-jsonl-event-log.md +39 -0
- package/docs/adr/003-ulid-over-uuid.md +40 -0
- package/docs/adr/004-plugin-config-with-settings-overlay.md +34 -0
- package/docs/architecture.md +117 -0
- package/hooks/hooks.json +4 -0
- package/package.json +13 -7
- package/plugins/archivist/.claude-plugin/plugin.json +14 -0
- package/plugins/archivist/CHANGELOG.md +8 -0
- package/plugins/archivist/README.md +105 -0
- package/plugins/archivist/config.json +18 -0
- package/plugins/archivist/hooks/hooks.json +35 -0
- package/plugins/archivist/scripts/hooks/archivist-extract.sh +238 -0
- package/plugins/archivist/scripts/hooks/archivist-inject.sh +159 -0
- package/plugins/archivist/scripts/lib/archivist-config.sh +66 -0
- package/plugins/archivist/scripts/lib/archivist-project-key.sh +91 -0
- package/plugins/archivist/scripts/lib/archivist-storage.sh +215 -0
- package/plugins/archivist/scripts/lib/archivist-ulid.sh +52 -0
- package/plugins/echo/.claude-plugin/plugin.json +14 -0
- package/plugins/echo/CHANGELOG.md +24 -0
- package/plugins/echo/README.md +110 -0
- package/plugins/echo/config.json +15 -0
- package/plugins/echo/docs/adr/001-echo-as-separate-plugin.md +33 -0
- package/plugins/echo/docs/adr/002-direct-evaluation-vs-tribunal-pipeline.md +35 -0
- package/plugins/echo/docs/adr/003-stop-hook-trigger.md +40 -0
- package/plugins/echo/hooks/hooks.json +15 -0
- package/plugins/echo/scripts/hooks/echo-stop-gate.sh +366 -0
- package/plugins/echo/scripts/lib/echo-config.sh +108 -0
- package/plugins/echo/scripts/lib/echo-events.sh +74 -0
- package/plugins/echo/scripts/lib/echo-project-key.sh +81 -0
- package/plugins/echo/scripts/lib/echo-ulid.sh +46 -0
- package/plugins/tribunal/.claude-plugin/plugin.json +20 -0
- package/plugins/tribunal/CHANGELOG.md +10 -0
- package/plugins/tribunal/README.md +134 -0
- package/plugins/tribunal/agents/tribunal-actor.md +35 -0
- package/plugins/tribunal/agents/tribunal-judge-adversarial.md +51 -0
- package/plugins/tribunal/agents/tribunal-judge-security.md +47 -0
- package/plugins/tribunal/agents/tribunal-judge-standard.md +47 -0
- package/plugins/tribunal/agents/tribunal-meta-judge.md +61 -0
- package/plugins/tribunal/config.json +50 -0
- package/plugins/tribunal/docs/adr/001-actor-jury-meta-gate-loop.md +40 -0
- package/plugins/tribunal/docs/adr/002-majority-gate-policy.md +48 -0
- package/plugins/tribunal/hooks/hooks.json +15 -0
- package/plugins/tribunal/scripts/hooks/tribunal-stop-gate.sh +267 -0
- package/plugins/tribunal/scripts/lib/tribunal-aggregate.sh +65 -0
- package/plugins/tribunal/scripts/lib/tribunal-config.sh +101 -0
- package/plugins/tribunal/scripts/lib/tribunal-events.sh +97 -0
- package/plugins/tribunal/scripts/lib/tribunal-gate.sh +111 -0
- package/plugins/tribunal/scripts/lib/tribunal-jury.sh +102 -0
- package/plugins/tribunal/scripts/lib/tribunal-project-key.sh +84 -0
- package/plugins/tribunal/scripts/lib/tribunal-rubric.sh +153 -0
- package/plugins/tribunal/scripts/lib/tribunal-ulid.sh +50 -0
- package/plugins/tribunal/scripts/lib/tribunal-verdict.sh +127 -0
- package/plugins/tribunal/skills/tribunal/SKILL.md +129 -0
- package/release-please-config.json +43 -5
- package/scripts/coverage/bash-coverage.mjs +169 -0
- package/scripts/coverage/format-comment.mjs +120 -0
- package/scripts/coverage/run-coverage.mjs +151 -0
- package/scripts/hooks/agent-spawn-tracker.sh +4 -4
- package/scripts/hooks/prompt-rule-injector.sh +122 -0
- package/scripts/lib/onlooker-event.mjs +82 -10
- package/scripts/lib/portable-lock.sh +48 -0
- package/scripts/lib/prompt-rules.sh +207 -0
- package/scripts/lib/tool-history.sh +7 -8
- package/scripts/lib/validate-path.sh +4 -0
- package/scripts/lint/check-manifests.mjs +314 -0
- package/scripts/lint/check-references.mjs +311 -0
- package/skills/list-prompt-rules/SKILL.md +15 -0
- package/test/bats/archivist-config-files.bats +60 -0
- package/test/bats/archivist-config.bats +54 -0
- package/test/bats/archivist-inject.bats +73 -0
- package/test/bats/archivist-project-key.bats +75 -0
- package/test/bats/archivist-storage.bats +119 -0
- package/test/bats/archivist-ulid.bats +36 -0
- package/test/bats/config.bats +10 -10
- package/test/bats/echo-config.bats +90 -0
- package/test/bats/echo-events.bats +121 -0
- package/test/bats/echo-project-key.bats +115 -0
- package/test/bats/echo-stop-hook.bats +101 -0
- package/test/bats/echo-ulid.bats +38 -0
- package/test/bats/portable-lock.bats +62 -0
- package/test/bats/prompt-rules.bats +269 -0
- package/test/bats/read-chunk-tracking.bats +73 -0
- package/test/bats/tool-history-tracker.bats +1 -0
- package/test/bats/tribunal-aggregate.bats +77 -0
- package/test/bats/tribunal-config.bats +86 -0
- package/test/bats/tribunal-events.bats +209 -0
- package/test/bats/tribunal-gate.bats +95 -0
- package/test/bats/tribunal-jury.bats +80 -0
- package/test/bats/tribunal-rubric.bats +119 -0
- package/test/bats/tribunal-stop-hook.bats +73 -0
- package/test/bats/tribunal-verdict.bats +71 -0
- package/test/bats/validate-path.bats +1 -1
- package/test/fixtures/hook-inputs/post-tool-use-read-chunked.json +15 -0
- package/test/fixtures/hook-inputs/user-prompt-submit-rule-match.json +8 -0
- package/test/fixtures/hook-inputs/user-prompt-submit-rule-nomatch.json +8 -0
- package/test/helpers/setup.bash +9 -0
- package/test/node/check-manifests.test.mjs +173 -0
- package/test/node/check-references.test.mjs +279 -0
- package/test/node/coverage.test.mjs +143 -0
- package/test/node/schema-events.test.mjs +41 -1
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Verdict persistence for Tribunal.
|
|
3
|
+
#
|
|
4
|
+
# Writes per-iteration artifacts under:
|
|
5
|
+
# $ONLOOKER_DIR/tribunal/<project-key>/<task_id>/iteration-<iteration_id>/
|
|
6
|
+
# actor.md
|
|
7
|
+
# jury.json
|
|
8
|
+
# verdicts/<judge_id>.json
|
|
9
|
+
# consensus.json
|
|
10
|
+
# dissent.json (optional)
|
|
11
|
+
# meta.json
|
|
12
|
+
# gate.json
|
|
13
|
+
#
|
|
14
|
+
# Plus task-level files at <task_id>/{manifest,session-start,session-complete}.json.
|
|
15
|
+
#
|
|
16
|
+
# Requires tribunal-project-key.sh to be sourced.
|
|
17
|
+
|
|
18
|
+
tribunal_storage_root() {
|
|
19
|
+
local base="${ONLOOKER_DIR:-$HOME/.onlooker}"
|
|
20
|
+
printf '%s/tribunal' "$base"
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
tribunal_project_dir() {
|
|
24
|
+
local key="$1"
|
|
25
|
+
printf '%s/%s' "$(tribunal_storage_root)" "$key"
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
tribunal_task_dir() {
|
|
29
|
+
local key="$1"
|
|
30
|
+
local task_id="$2"
|
|
31
|
+
printf '%s/%s' "$(tribunal_project_dir "$key")" "$task_id"
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
tribunal_iteration_dir() {
|
|
35
|
+
local key="$1"
|
|
36
|
+
local task_id="$2"
|
|
37
|
+
local iteration_id="$3"
|
|
38
|
+
printf '%s/iteration-%s' "$(tribunal_task_dir "$key" "$task_id")" "$iteration_id"
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
tribunal_init_task() {
|
|
42
|
+
local key="$1"
|
|
43
|
+
local task_id="$2"
|
|
44
|
+
[[ -z "$key" || -z "$task_id" ]] && return 1
|
|
45
|
+
mkdir -p "$(tribunal_task_dir "$key" "$task_id")" 2>/dev/null
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
tribunal_init_iteration() {
|
|
49
|
+
local key="$1"
|
|
50
|
+
local task_id="$2"
|
|
51
|
+
local iteration_id="$3"
|
|
52
|
+
[[ -z "$key" || -z "$task_id" || -z "$iteration_id" ]] && return 1
|
|
53
|
+
mkdir -p "$(tribunal_iteration_dir "$key" "$task_id" "$iteration_id")/verdicts" 2>/dev/null
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Write the project-level manifest (one per project key, refreshed each task).
|
|
57
|
+
tribunal_write_project_manifest() {
|
|
58
|
+
local key="$1"
|
|
59
|
+
local remote_url="$2"
|
|
60
|
+
local repo_root="$3"
|
|
61
|
+
[[ -z "$key" ]] && return 1
|
|
62
|
+
mkdir -p "$(tribunal_project_dir "$key")" 2>/dev/null
|
|
63
|
+
|
|
64
|
+
local now
|
|
65
|
+
now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
66
|
+
jq -n \
|
|
67
|
+
--arg key "$key" \
|
|
68
|
+
--arg remote "$remote_url" \
|
|
69
|
+
--arg root "$repo_root" \
|
|
70
|
+
--arg now "$now" \
|
|
71
|
+
'{
|
|
72
|
+
project_key: $key,
|
|
73
|
+
remote_url: (if $remote == "" then null else $remote end),
|
|
74
|
+
repo_root: (if $root == "" then null else $root end),
|
|
75
|
+
last_task_at: $now,
|
|
76
|
+
source: "local"
|
|
77
|
+
}' > "$(tribunal_project_dir "$key")/manifest.json"
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# Write the per-task manifest with the active rubric snapshot.
|
|
81
|
+
tribunal_write_task_manifest() {
|
|
82
|
+
local key="$1"
|
|
83
|
+
local task_id="$2"
|
|
84
|
+
local task_summary="$3"
|
|
85
|
+
local rubric_id="$4"
|
|
86
|
+
local rubric_json="$5"
|
|
87
|
+
[[ -z "$key" || -z "$task_id" ]] && return 1
|
|
88
|
+
tribunal_init_task "$key" "$task_id" || return 1
|
|
89
|
+
|
|
90
|
+
local now
|
|
91
|
+
now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
92
|
+
jq -n \
|
|
93
|
+
--arg task_id "$task_id" \
|
|
94
|
+
--arg summary "$task_summary" \
|
|
95
|
+
--arg rubric_id "$rubric_id" \
|
|
96
|
+
--argjson rubric "$rubric_json" \
|
|
97
|
+
--arg now "$now" \
|
|
98
|
+
'{
|
|
99
|
+
task_id: $task_id,
|
|
100
|
+
task_summary: $summary,
|
|
101
|
+
rubric_id: $rubric_id,
|
|
102
|
+
rubric: $rubric,
|
|
103
|
+
started_at: $now
|
|
104
|
+
}' > "$(tribunal_task_dir "$key" "$task_id")/manifest.json"
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
# Append-time helpers for each per-iteration artifact. Each takes the full JSON
|
|
108
|
+
# blob the caller wants persisted (typically the same payload it just emitted as
|
|
109
|
+
# a canonical event).
|
|
110
|
+
tribunal_write_actor_output() {
|
|
111
|
+
local key="$1" task_id="$2" iteration_id="$3" body="$4"
|
|
112
|
+
tribunal_init_iteration "$key" "$task_id" "$iteration_id" || return 1
|
|
113
|
+
printf '%s\n' "$body" > "$(tribunal_iteration_dir "$key" "$task_id" "$iteration_id")/actor.md"
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
tribunal_write_iteration_artifact() {
|
|
117
|
+
local key="$1" task_id="$2" iteration_id="$3" name="$4" json="$5"
|
|
118
|
+
tribunal_init_iteration "$key" "$task_id" "$iteration_id" || return 1
|
|
119
|
+
printf '%s\n' "$json" > "$(tribunal_iteration_dir "$key" "$task_id" "$iteration_id")/${name}.json"
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
tribunal_write_judge_verdict() {
|
|
123
|
+
local key="$1" task_id="$2" iteration_id="$3" judge_id="$4" verdict_json="$5"
|
|
124
|
+
tribunal_init_iteration "$key" "$task_id" "$iteration_id" || return 1
|
|
125
|
+
printf '%s\n' "$verdict_json" \
|
|
126
|
+
> "$(tribunal_iteration_dir "$key" "$task_id" "$iteration_id")/verdicts/${judge_id}.json"
|
|
127
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: tribunal
|
|
3
|
+
description: Run a task under multi-agent quality gates. Spawns the tribunal-actor subagent, a jury of typed Judges, and a Meta-Judge; aggregates verdicts under a configurable gate policy; retries the Actor with critique on rejection until acceptance or max_iterations. Use when the user explicitly wraps a task with /tribunal, or wants stronger correctness/safety review than a single pass. Emits the full tribunal.* canonical event stream.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Tribunal: Multi-Agent Execution with Quality Gates
|
|
7
|
+
|
|
8
|
+
You are orchestrating a **Tribunal** evaluation loop. A user task gets wrapped in: **Actor → Jury → Meta-Judge → Gate**, retrying the Actor with feedback until the gate passes or `max_iterations` is reached.
|
|
9
|
+
|
|
10
|
+
## Setup
|
|
11
|
+
|
|
12
|
+
Before the loop, source the plugin's bash helpers and load config. Run this once at the start:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
set -uo pipefail
|
|
16
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-config.sh"
|
|
17
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-rubric.sh"
|
|
18
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-jury.sh"
|
|
19
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-aggregate.sh"
|
|
20
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-gate.sh"
|
|
21
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-events.sh"
|
|
22
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-verdict.sh"
|
|
23
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-project-key.sh"
|
|
24
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-ulid.sh"
|
|
25
|
+
|
|
26
|
+
tribunal_config_load "$(pwd)"
|
|
27
|
+
tribunal_rubric_load "$(pwd)"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Parse the task description from the user's prompt arguments. If the user passed `--rubric=<id>`, use that; otherwise use `tribunal_rubric_default_id`.
|
|
31
|
+
|
|
32
|
+
Resolve the active rubric with `tribunal_rubric_get "$rubric_id"`. Validate it with `tribunal_rubric_validate "$rubric"`. If validation fails, abort with `tribunal.session.complete` outcome `aborted` and tell the user why.
|
|
33
|
+
|
|
34
|
+
## Per-task initialization
|
|
35
|
+
|
|
36
|
+
Generate identifiers and persist task-level state:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
task_id=$(tribunal_ulid)
|
|
40
|
+
project_key=$(tribunal_project_key "$(pwd)")
|
|
41
|
+
remote=$(tribunal_project_remote_url "$(pwd)")
|
|
42
|
+
repo_root=$(tribunal_project_repo_root "$(pwd)")
|
|
43
|
+
tribunal_write_project_manifest "$project_key" "$remote" "$repo_root"
|
|
44
|
+
tribunal_write_task_manifest "$project_key" "$task_id" "$task_summary" "$rubric_id" "$rubric"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Emit `tribunal.session.start` with the resolved config (`judge_types`, `gate_policy`, `score_threshold`, `max_iterations`, model IDs).
|
|
48
|
+
|
|
49
|
+
## The loop
|
|
50
|
+
|
|
51
|
+
For `iteration_number` from `0` while `iteration_number < max_iterations`:
|
|
52
|
+
|
|
53
|
+
1. **Iteration start.** Generate `iteration_id=$(tribunal_ulid)`. `trigger` is `"initial"` for n=0, `"gate_blocked"` for retries. Emit `tribunal.iteration.start`.
|
|
54
|
+
|
|
55
|
+
2. **Actor.** Emit `tribunal.actor.start`. Use the Task tool to spawn `tribunal-actor` with:
|
|
56
|
+
- The task description.
|
|
57
|
+
- The rubric criteria (just `name` + `weight` + `min_pass`).
|
|
58
|
+
- On retries: a digest of the prior iteration's consensus, dissent (if any), and Meta-Judge override.
|
|
59
|
+
|
|
60
|
+
Capture the Actor's final output. Persist it: `tribunal_write_actor_output "$project_key" "$task_id" "$iteration_id" "$actor_output"`. Emit `tribunal.actor.complete` with `success: true` and the inferred `artifact_kind` (`file` / `patch` / `message` / `command`).
|
|
61
|
+
|
|
62
|
+
3. **Empanel the jury.** Resolve the panel from configured types:
|
|
63
|
+
```bash
|
|
64
|
+
types=$(tribunal_config_get_json '.tribunal.session.judge_types')
|
|
65
|
+
# Rubric may override:
|
|
66
|
+
rubric_types=$(printf '%s' "$rubric" | jq -c '.judge_types // empty')
|
|
67
|
+
[[ -n "$rubric_types" && "$rubric_types" != "null" ]] && types="$rubric_types"
|
|
68
|
+
jury=$(tribunal_jury_empanel "$types")
|
|
69
|
+
```
|
|
70
|
+
Persist the jury (`tribunal_write_iteration_artifact ... jury ...`) and emit `tribunal.jury.empaneled` with the schema-shaped `judges[]` (`tribunal_jury_to_schema_judges "$jury"`).
|
|
71
|
+
|
|
72
|
+
4. **Run each Judge.** For each entry in the jury panel:
|
|
73
|
+
- Emit `tribunal.judge.start` with `judge_id`, `judge_type`, `judge_model_id`.
|
|
74
|
+
- Spawn the judge subagent (`.subagent` field) with the Actor output + rubric.
|
|
75
|
+
- Parse the JSON object the judge returns. Augment it with `task_id`, `iteration_id`, `judge_id`, `judge_model_id` from the panel entry, and `judge_type` from the panel entry (canonical, overriding what the agent self-reported).
|
|
76
|
+
- Emit `tribunal.verdict` with that payload.
|
|
77
|
+
- Persist with `tribunal_write_judge_verdict`.
|
|
78
|
+
|
|
79
|
+
Collect the verdicts into a JSON array `verdicts`.
|
|
80
|
+
|
|
81
|
+
5. **Aggregate + dissent.**
|
|
82
|
+
```bash
|
|
83
|
+
method=$(printf '%s' "$rubric" | jq -r '.aggregation_method // "weighted_mean"')
|
|
84
|
+
threshold=$(printf '%s' "$rubric" | jq -r '.score_threshold // 0.75')
|
|
85
|
+
dissent_threshold=$(tribunal_config_get '.tribunal.session.dissent_threshold')
|
|
86
|
+
[[ -z "$dissent_threshold" ]] && dissent_threshold="0.25"
|
|
87
|
+
|
|
88
|
+
aggregated=$(tribunal_aggregate "$method" "$verdicts" "$rubric")
|
|
89
|
+
dissent=$(tribunal_disagreement "$verdicts")
|
|
90
|
+
```
|
|
91
|
+
Build and emit `tribunal.consensus.reached`. If `dissent > dissent_threshold`, emit `tribunal.dissent.recorded` (set `resolution` to `null` for now — the Meta-Judge may set it on the next step via `override_recommendation`).
|
|
92
|
+
|
|
93
|
+
6. **Meta-Judge.** Emit `tribunal.meta.start`. Spawn `tribunal-meta-judge` with the verdicts and the Actor output. Parse its JSON; augment with `task_id`, `iteration_id`, `meta_model_id`. Emit `tribunal.meta.complete`. Persist.
|
|
94
|
+
|
|
95
|
+
7. **Gate.**
|
|
96
|
+
```bash
|
|
97
|
+
policy=$(printf '%s' "$rubric" | jq -r '.gate_policy // "majority"')
|
|
98
|
+
gate=$(tribunal_gate_decide "$policy" "$verdicts" "$aggregated" "$threshold" "$meta" "$dissent" "$dissent_threshold")
|
|
99
|
+
```
|
|
100
|
+
If `gate.passed == true`, emit `tribunal.gate.passed` with `final_score: aggregated` and break the loop with outcome `accepted`. Otherwise emit `tribunal.gate.blocked` with the `reason`, `will_retry: (iteration_number + 1 < max_iterations)`, and `retry_iteration_number` if retrying. Persist `gate.json` either way.
|
|
101
|
+
|
|
102
|
+
If blocking and retrying, build the retry digest (lowest-scoring criteria + meta override + dissent summary) and feed it into the next iteration's Actor prompt.
|
|
103
|
+
|
|
104
|
+
## Termination
|
|
105
|
+
|
|
106
|
+
When the loop exits:
|
|
107
|
+
|
|
108
|
+
- `accepted` — gate passed.
|
|
109
|
+
- `exhausted_iterations` — loop ran `max_iterations` without acceptance.
|
|
110
|
+
- `aborted` — orchestrator caught an unrecoverable error (rubric validation failed, Actor subagent crashed twice, etc.). Set this explicitly when you catch errors; do not silently swallow.
|
|
111
|
+
|
|
112
|
+
Emit `tribunal.session.complete` with `outcome`, `final_score`, `iterations_used`, `total_duration_ms`. Skip `total_cost_usd` in v0.1 — the runtime does not surface subagent costs to the orchestrator yet.
|
|
113
|
+
|
|
114
|
+
## Summary to the user
|
|
115
|
+
|
|
116
|
+
After emitting `session.complete`, render a compact markdown summary to the user:
|
|
117
|
+
|
|
118
|
+
- Verdict (✓ accepted / ✗ rejected / ⏱ exhausted / ⚠ aborted) with final score.
|
|
119
|
+
- Per-iteration table: iteration | per-judge scores | dissent | gate result.
|
|
120
|
+
- Meta-Judge bias notes if any.
|
|
121
|
+
- Path to the persisted artifacts (`~/.onlooker/tribunal/<key>/<task_id>/`).
|
|
122
|
+
|
|
123
|
+
Keep the summary terse. The artifacts on disk are the long form.
|
|
124
|
+
|
|
125
|
+
## Error handling
|
|
126
|
+
|
|
127
|
+
- If a judge subagent fails to return parseable JSON, treat that judge as `score: 0, passed: false, confidence: 0` and surface the parse error in `feedback_summary`. Do not abort the iteration — let the gate decide.
|
|
128
|
+
- If the Meta-Judge fails, default to `verdict_quality: "questionable", bias_detected: false` so the gate falls back to score-based logic.
|
|
129
|
+
- If event emission fails (schema validation), keep going and write a warning to stderr. The persisted artifacts on disk are still trustworthy.
|
|
@@ -10,18 +10,56 @@
|
|
|
10
10
|
"extra-files": [
|
|
11
11
|
{
|
|
12
12
|
"type": "json",
|
|
13
|
-
"path": "
|
|
13
|
+
"path": ".claude-plugin/plugin.json",
|
|
14
|
+
"jsonpath": "$.version"
|
|
15
|
+
}
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
"plugins/archivist": {
|
|
19
|
+
"changelog-path": "CHANGELOG.md",
|
|
20
|
+
"release-type": "simple",
|
|
21
|
+
"bump-minor-pre-major": true,
|
|
22
|
+
"bump-patch-for-minor-pre-major": false,
|
|
23
|
+
"component": "archivist",
|
|
24
|
+
"draft": false,
|
|
25
|
+
"prerelease": false,
|
|
26
|
+
"extra-files": [
|
|
27
|
+
{
|
|
28
|
+
"type": "json",
|
|
29
|
+
"path": ".claude-plugin/plugin.json",
|
|
14
30
|
"jsonpath": "$.version"
|
|
15
|
-
}
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
},
|
|
34
|
+
"plugins/tribunal": {
|
|
35
|
+
"changelog-path": "CHANGELOG.md",
|
|
36
|
+
"release-type": "simple",
|
|
37
|
+
"bump-minor-pre-major": true,
|
|
38
|
+
"bump-patch-for-minor-pre-major": false,
|
|
39
|
+
"component": "tribunal",
|
|
40
|
+
"draft": false,
|
|
41
|
+
"prerelease": false,
|
|
42
|
+
"extra-files": [
|
|
16
43
|
{
|
|
17
44
|
"type": "json",
|
|
18
45
|
"path": ".claude-plugin/plugin.json",
|
|
19
46
|
"jsonpath": "$.version"
|
|
20
|
-
}
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
},
|
|
50
|
+
"plugins/echo": {
|
|
51
|
+
"changelog-path": "CHANGELOG.md",
|
|
52
|
+
"release-type": "simple",
|
|
53
|
+
"bump-minor-pre-major": true,
|
|
54
|
+
"bump-patch-for-minor-pre-major": false,
|
|
55
|
+
"component": "echo",
|
|
56
|
+
"draft": false,
|
|
57
|
+
"prerelease": false,
|
|
58
|
+
"extra-files": [
|
|
21
59
|
{
|
|
22
60
|
"type": "json",
|
|
23
|
-
"path": ".claude-plugin/
|
|
24
|
-
"jsonpath": "$.
|
|
61
|
+
"path": ".claude-plugin/plugin.json",
|
|
62
|
+
"jsonpath": "$.version"
|
|
25
63
|
}
|
|
26
64
|
]
|
|
27
65
|
}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Bash "tested-functions ratio" heuristic.
|
|
3
|
+
//
|
|
4
|
+
// True bash line coverage (bashcov / kcov) is heavy and flaky in CI, so
|
|
5
|
+
// instead we ask the cheaper question: "for every public function defined
|
|
6
|
+
// in scripts/lib/, does at least one bats test reference it by name?". The
|
|
7
|
+
// result is a per-file ratio plus a flat list of untested public functions.
|
|
8
|
+
//
|
|
9
|
+
// What counts as a "public" function:
|
|
10
|
+
// * defined with either `name() { ... }` or `function name { ... }`
|
|
11
|
+
// * name does NOT start with an underscore (those are private helpers and
|
|
12
|
+
// should be tested indirectly through their callers).
|
|
13
|
+
//
|
|
14
|
+
// What counts as a "reference" in tests:
|
|
15
|
+
// * the function name appears as a standalone word in any *.bats file
|
|
16
|
+
// (typical patterns: `run my_func ...`, `my_func "$arg"`, or sourced
|
|
17
|
+
// and called directly). False positives are possible — that's the cost
|
|
18
|
+
// of a heuristic — but the score is still useful as a regression gate
|
|
19
|
+
// and is calibrated against the noise floor.
|
|
20
|
+
//
|
|
21
|
+
// Flags:
|
|
22
|
+
// --json emit structured JSON on stdout (default: human-readable)
|
|
23
|
+
// --root <p> override repo root
|
|
24
|
+
//
|
|
25
|
+
// Exit codes: always 0; this is an informational tool. Use --json to feed
|
|
26
|
+
// into format-comment.mjs.
|
|
27
|
+
|
|
28
|
+
import { readdirSync, readFileSync, statSync } from 'node:fs';
|
|
29
|
+
import { dirname, join, relative, resolve } from 'node:path';
|
|
30
|
+
import { fileURLToPath } from 'node:url';
|
|
31
|
+
|
|
32
|
+
function findRepoRoot(start) {
|
|
33
|
+
let cur = resolve(start);
|
|
34
|
+
while (cur !== '/') {
|
|
35
|
+
try {
|
|
36
|
+
statSync(join(cur, '.claude-plugin', 'marketplace.json'));
|
|
37
|
+
return cur;
|
|
38
|
+
} catch {}
|
|
39
|
+
cur = dirname(cur);
|
|
40
|
+
}
|
|
41
|
+
throw new Error(`no repo root above ${start}`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function parseArgs(argv) {
|
|
45
|
+
const out = { json: false, root: null };
|
|
46
|
+
for (let i = 2; i < argv.length; i++) {
|
|
47
|
+
if (argv[i] === '--json') out.json = true;
|
|
48
|
+
else if (argv[i] === '--root') out.root = argv[++i];
|
|
49
|
+
}
|
|
50
|
+
return out;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function walk(dir, predicate) {
|
|
54
|
+
const out = [];
|
|
55
|
+
const stack = [dir];
|
|
56
|
+
while (stack.length) {
|
|
57
|
+
const cur = stack.pop();
|
|
58
|
+
let items;
|
|
59
|
+
try {
|
|
60
|
+
items = readdirSync(cur, { withFileTypes: true });
|
|
61
|
+
} catch {
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
for (const item of items) {
|
|
65
|
+
const p = join(cur, item.name);
|
|
66
|
+
if (item.isDirectory()) {
|
|
67
|
+
if (item.name === 'node_modules' || item.name === '.git') continue;
|
|
68
|
+
stack.push(p);
|
|
69
|
+
} else if (item.isFile() && predicate(p)) {
|
|
70
|
+
out.push(p);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return out.sort();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Extract `name` from lines like `name() {`, `name () {`, or `function name`.
|
|
78
|
+
// Skips lines indented (those are nested fns / non-top-level callbacks we
|
|
79
|
+
// don't want to attribute to the file's public surface).
|
|
80
|
+
function extractFunctions(content) {
|
|
81
|
+
const out = [];
|
|
82
|
+
const lines = content.split(/\r?\n/);
|
|
83
|
+
const def = /^(?:function\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\(\s*\)\s*\{?/;
|
|
84
|
+
for (const line of lines) {
|
|
85
|
+
// Strict: must start at column 0 (no leading whitespace).
|
|
86
|
+
if (line.length === 0 || line[0] === ' ' || line[0] === '\t') continue;
|
|
87
|
+
const m = line.match(def);
|
|
88
|
+
if (!m) continue;
|
|
89
|
+
const name = m[1];
|
|
90
|
+
// Skip private helpers and bash keywords that look like fn names.
|
|
91
|
+
if (name.startsWith('_')) continue;
|
|
92
|
+
if (['if', 'while', 'for', 'case', 'then', 'do', 'else', 'fi', 'done'].includes(name)) continue;
|
|
93
|
+
out.push(name);
|
|
94
|
+
}
|
|
95
|
+
return [...new Set(out)];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function isReferenced(name, testsContent) {
|
|
99
|
+
// Look for the name as a standalone word (preceded/followed by non-word
|
|
100
|
+
// characters). This catches `run name`, `name "$x"`, `$( name )`, etc.
|
|
101
|
+
const rx = new RegExp(`(^|[^A-Za-z0-9_])${name}([^A-Za-z0-9_]|$)`);
|
|
102
|
+
return rx.test(testsContent);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function main() {
|
|
106
|
+
const args = parseArgs(process.argv);
|
|
107
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
108
|
+
const root = args.root ? resolve(args.root) : findRepoRoot(here);
|
|
109
|
+
|
|
110
|
+
const libDirs = [join(root, 'scripts', 'lib'), join(root, 'plugins', 'archivist', 'scripts', 'lib')];
|
|
111
|
+
const libFiles = [];
|
|
112
|
+
for (const d of libDirs) {
|
|
113
|
+
try {
|
|
114
|
+
libFiles.push(...walk(d, (p) => p.endsWith('.sh')));
|
|
115
|
+
} catch {}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const testsDir = join(root, 'test', 'bats');
|
|
119
|
+
const testFiles = walk(testsDir, (p) => p.endsWith('.bats'));
|
|
120
|
+
const testsContent = testFiles.map((f) => readFileSync(f, 'utf8')).join('\n');
|
|
121
|
+
|
|
122
|
+
const perFile = [];
|
|
123
|
+
let totalFns = 0;
|
|
124
|
+
let totalTested = 0;
|
|
125
|
+
const untested = [];
|
|
126
|
+
|
|
127
|
+
for (const file of libFiles) {
|
|
128
|
+
const fns = extractFunctions(readFileSync(file, 'utf8'));
|
|
129
|
+
const tested = fns.filter((name) => isReferenced(name, testsContent));
|
|
130
|
+
const fileTotal = fns.length;
|
|
131
|
+
const fileTested = tested.length;
|
|
132
|
+
totalFns += fileTotal;
|
|
133
|
+
totalTested += fileTested;
|
|
134
|
+
const relpath = relative(root, file);
|
|
135
|
+
perFile.push({
|
|
136
|
+
file: relpath,
|
|
137
|
+
total: fileTotal,
|
|
138
|
+
tested: fileTested,
|
|
139
|
+
ratio: fileTotal === 0 ? 1 : fileTested / fileTotal,
|
|
140
|
+
untested: fns.filter((n) => !tested.includes(n)),
|
|
141
|
+
});
|
|
142
|
+
for (const u of fns.filter((n) => !tested.includes(n))) {
|
|
143
|
+
untested.push({ file: relpath, name: u });
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const overallRatio = totalFns === 0 ? 1 : totalTested / totalFns;
|
|
148
|
+
const report = {
|
|
149
|
+
overall: { total: totalFns, tested: totalTested, ratio: overallRatio },
|
|
150
|
+
files: perFile,
|
|
151
|
+
untested,
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
if (args.json) {
|
|
155
|
+
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
process.stdout.write(`bash function coverage: ${totalTested}/${totalFns} (${(overallRatio * 100).toFixed(1)}%)\n\n`);
|
|
160
|
+
for (const f of perFile) {
|
|
161
|
+
const pct = (f.ratio * 100).toFixed(0).padStart(3);
|
|
162
|
+
process.stdout.write(` ${pct}% ${f.tested}/${f.total} ${f.file}\n`);
|
|
163
|
+
for (const u of f.untested) {
|
|
164
|
+
process.stdout.write(` - ${u}\n`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
main();
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Combine node coverage + bash function coverage into a single markdown
|
|
3
|
+
// comment suitable for posting on a pull request via `gh pr comment`.
|
|
4
|
+
//
|
|
5
|
+
// Reads each report from a file path (so the caller can capture stdout
|
|
6
|
+
// once and pass the file through). Emits markdown on stdout.
|
|
7
|
+
//
|
|
8
|
+
// Usage:
|
|
9
|
+
// format-comment.mjs --node coverage-node.json --bash coverage-bash.json
|
|
10
|
+
//
|
|
11
|
+
// Each file should be JSON produced by the matching script's --json mode.
|
|
12
|
+
|
|
13
|
+
import { readFileSync } from 'node:fs';
|
|
14
|
+
|
|
15
|
+
function parseArgs(argv) {
|
|
16
|
+
const out = { node: null, bash: null, sha: process.env.GITHUB_SHA ?? null };
|
|
17
|
+
for (let i = 2; i < argv.length; i++) {
|
|
18
|
+
if (argv[i] === '--node') out.node = argv[++i];
|
|
19
|
+
else if (argv[i] === '--bash') out.bash = argv[++i];
|
|
20
|
+
else if (argv[i] === '--sha') out.sha = argv[++i];
|
|
21
|
+
}
|
|
22
|
+
return out;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function pct(n) {
|
|
26
|
+
if (typeof n !== 'number' || Number.isNaN(n)) return '—';
|
|
27
|
+
return `${n.toFixed(1)}%`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function badge(value, kind) {
|
|
31
|
+
if (typeof value !== 'number') return '⚪';
|
|
32
|
+
if (kind === 'bash') {
|
|
33
|
+
if (value >= 70) return '🟢';
|
|
34
|
+
if (value >= 50) return '🟡';
|
|
35
|
+
return '🔴';
|
|
36
|
+
}
|
|
37
|
+
if (value >= 80) return '🟢';
|
|
38
|
+
if (value >= 60) return '🟡';
|
|
39
|
+
return '🔴';
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function nodeSection(report) {
|
|
43
|
+
if (!report?.overall) {
|
|
44
|
+
return '_No node coverage report._';
|
|
45
|
+
}
|
|
46
|
+
const o = report.overall;
|
|
47
|
+
const lines = [
|
|
48
|
+
`**Overall:** ${badge(o.line, 'node')} ${pct(o.line)} lines · ${pct(o.branch)} branches · ${pct(o.funcs)} functions`,
|
|
49
|
+
'',
|
|
50
|
+
'| file | line | branch | funcs |',
|
|
51
|
+
'| --- | ---: | ---: | ---: |',
|
|
52
|
+
];
|
|
53
|
+
for (const f of report.files) {
|
|
54
|
+
lines.push(`| \`${f.file}\` | ${pct(f.line)} | ${pct(f.branch)} | ${pct(f.funcs)} |`);
|
|
55
|
+
}
|
|
56
|
+
return lines.join('\n');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function bashSection(report) {
|
|
60
|
+
if (!report?.overall) {
|
|
61
|
+
return '_No bash function coverage report._';
|
|
62
|
+
}
|
|
63
|
+
const o = report.overall;
|
|
64
|
+
const overallPct = o.ratio * 100;
|
|
65
|
+
const lines = [
|
|
66
|
+
`**Overall:** ${badge(overallPct, 'bash')} ${o.tested}/${o.total} public functions exercised by bats (${pct(overallPct)})`,
|
|
67
|
+
'',
|
|
68
|
+
'| file | tested / total | ratio |',
|
|
69
|
+
'| --- | ---: | ---: |',
|
|
70
|
+
];
|
|
71
|
+
for (const f of report.files) {
|
|
72
|
+
if (f.total === 0) continue;
|
|
73
|
+
lines.push(`| \`${f.file}\` | ${f.tested} / ${f.total} | ${pct(f.ratio * 100)} |`);
|
|
74
|
+
}
|
|
75
|
+
if (report.untested.length > 0) {
|
|
76
|
+
lines.push('');
|
|
77
|
+
lines.push('<details><summary>Untested public functions</summary>');
|
|
78
|
+
lines.push('');
|
|
79
|
+
for (const u of report.untested) {
|
|
80
|
+
lines.push(`- \`${u.file}\` — \`${u.name}\``);
|
|
81
|
+
}
|
|
82
|
+
lines.push('');
|
|
83
|
+
lines.push('</details>');
|
|
84
|
+
}
|
|
85
|
+
return lines.join('\n');
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function main() {
|
|
89
|
+
const args = parseArgs(process.argv);
|
|
90
|
+
let nodeReport = null;
|
|
91
|
+
let bashReport = null;
|
|
92
|
+
if (args.node) nodeReport = JSON.parse(readFileSync(args.node, 'utf8'));
|
|
93
|
+
if (args.bash) bashReport = JSON.parse(readFileSync(args.bash, 'utf8'));
|
|
94
|
+
|
|
95
|
+
const out = [];
|
|
96
|
+
out.push('<!-- onlooker-coverage-comment -->');
|
|
97
|
+
out.push('## Coverage');
|
|
98
|
+
out.push('');
|
|
99
|
+
if (args.sha) {
|
|
100
|
+
out.push(`Commit: \`${args.sha.slice(0, 12)}\``);
|
|
101
|
+
out.push('');
|
|
102
|
+
}
|
|
103
|
+
out.push('### Node (.mjs)');
|
|
104
|
+
out.push('');
|
|
105
|
+
out.push(nodeSection(nodeReport));
|
|
106
|
+
out.push('');
|
|
107
|
+
out.push('### Bash (function-reference heuristic)');
|
|
108
|
+
out.push('');
|
|
109
|
+
out.push(bashSection(bashReport));
|
|
110
|
+
out.push('');
|
|
111
|
+
out.push('---');
|
|
112
|
+
out.push('');
|
|
113
|
+
out.push(
|
|
114
|
+
'Bash numbers are a heuristic — they count public functions referenced by bats tests, not real line coverage. A red score points to public helpers nobody directly exercises.',
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
process.stdout.write(`${out.join('\n')}\n`);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
main();
|