@onlooker-community/ecosystem 0.10.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +39 -1
- package/.claude-plugin/plugin.json +2 -2
- package/.github/copilot-instructions.md +46 -0
- package/.github/workflows/coverage.yml +78 -0
- package/.github/workflows/release.yml +24 -8
- package/.github/workflows/test.yml +3 -0
- package/.markdownlintignore +3 -0
- package/.release-please-manifest.json +5 -1
- package/CHANGELOG.md +44 -0
- package/README.md +58 -13
- package/config.json +6 -1
- package/docs/adr/001-claude-code-hooks-as-integration-surface.md +43 -0
- package/docs/adr/002-centralized-jsonl-event-log.md +39 -0
- package/docs/adr/003-ulid-over-uuid.md +40 -0
- package/docs/adr/004-plugin-config-with-settings-overlay.md +34 -0
- package/docs/architecture.md +123 -0
- package/hooks/hooks.json +4 -0
- package/package.json +13 -7
- package/plugins/archivist/.claude-plugin/plugin.json +14 -0
- package/plugins/archivist/CHANGELOG.md +8 -0
- package/plugins/archivist/README.md +105 -0
- package/plugins/archivist/config.json +18 -0
- package/plugins/archivist/hooks/hooks.json +35 -0
- package/plugins/archivist/scripts/hooks/archivist-extract.sh +238 -0
- package/plugins/archivist/scripts/hooks/archivist-inject.sh +159 -0
- package/plugins/archivist/scripts/lib/archivist-config.sh +66 -0
- package/plugins/archivist/scripts/lib/archivist-project-key.sh +91 -0
- package/plugins/archivist/scripts/lib/archivist-storage.sh +215 -0
- package/plugins/archivist/scripts/lib/archivist-ulid.sh +52 -0
- package/plugins/cartographer/.claude-plugin/plugin.json +14 -0
- package/plugins/cartographer/CHANGELOG.md +27 -0
- package/plugins/cartographer/README.md +113 -0
- package/plugins/cartographer/config.json +21 -0
- package/plugins/cartographer/docs/adr/001-background-audit-launch.md +28 -0
- package/plugins/cartographer/docs/adr/002-flock-pid-file-fallback.md +30 -0
- package/plugins/cartographer/docs/adr/003-at-least-once-event-delivery.md +32 -0
- package/plugins/cartographer/docs/adr/004-exclude-paths-replace-semantics.md +27 -0
- package/plugins/cartographer/hooks/hooks.json +44 -0
- package/plugins/cartographer/scripts/hooks/cartographer-post-write.sh +87 -0
- package/plugins/cartographer/scripts/hooks/cartographer-session-start.sh +89 -0
- package/plugins/cartographer/scripts/lib/cartographer-analyze.sh +286 -0
- package/plugins/cartographer/scripts/lib/cartographer-collect.sh +59 -0
- package/plugins/cartographer/scripts/lib/cartographer-config.sh +105 -0
- package/plugins/cartographer/scripts/lib/cartographer-events.sh +82 -0
- package/plugins/cartographer/scripts/lib/cartographer-lock.sh +38 -0
- package/plugins/cartographer/scripts/lib/cartographer-project-key.sh +55 -0
- package/plugins/cartographer/scripts/lib/cartographer-ulid.sh +47 -0
- package/plugins/cartographer/scripts/run-audit.sh +309 -0
- package/plugins/cartographer/skills/cartographer/SKILL.md +154 -0
- package/plugins/echo/.claude-plugin/plugin.json +14 -0
- package/plugins/echo/CHANGELOG.md +24 -0
- package/plugins/echo/README.md +110 -0
- package/plugins/echo/config.json +15 -0
- package/plugins/echo/docs/adr/001-echo-as-separate-plugin.md +33 -0
- package/plugins/echo/docs/adr/002-direct-evaluation-vs-tribunal-pipeline.md +35 -0
- package/plugins/echo/docs/adr/003-stop-hook-trigger.md +40 -0
- package/plugins/echo/hooks/hooks.json +15 -0
- package/plugins/echo/scripts/hooks/echo-stop-gate.sh +366 -0
- package/plugins/echo/scripts/lib/echo-config.sh +108 -0
- package/plugins/echo/scripts/lib/echo-events.sh +74 -0
- package/plugins/echo/scripts/lib/echo-project-key.sh +81 -0
- package/plugins/echo/scripts/lib/echo-ulid.sh +46 -0
- package/plugins/tribunal/.claude-plugin/plugin.json +20 -0
- package/plugins/tribunal/CHANGELOG.md +10 -0
- package/plugins/tribunal/README.md +134 -0
- package/plugins/tribunal/agents/tribunal-actor.md +35 -0
- package/plugins/tribunal/agents/tribunal-judge-adversarial.md +51 -0
- package/plugins/tribunal/agents/tribunal-judge-security.md +47 -0
- package/plugins/tribunal/agents/tribunal-judge-standard.md +47 -0
- package/plugins/tribunal/agents/tribunal-meta-judge.md +61 -0
- package/plugins/tribunal/config.json +50 -0
- package/plugins/tribunal/docs/adr/001-actor-jury-meta-gate-loop.md +40 -0
- package/plugins/tribunal/docs/adr/002-majority-gate-policy.md +48 -0
- package/plugins/tribunal/hooks/hooks.json +15 -0
- package/plugins/tribunal/scripts/hooks/tribunal-stop-gate.sh +267 -0
- package/plugins/tribunal/scripts/lib/tribunal-aggregate.sh +65 -0
- package/plugins/tribunal/scripts/lib/tribunal-config.sh +101 -0
- package/plugins/tribunal/scripts/lib/tribunal-events.sh +97 -0
- package/plugins/tribunal/scripts/lib/tribunal-gate.sh +111 -0
- package/plugins/tribunal/scripts/lib/tribunal-jury.sh +102 -0
- package/plugins/tribunal/scripts/lib/tribunal-project-key.sh +84 -0
- package/plugins/tribunal/scripts/lib/tribunal-rubric.sh +153 -0
- package/plugins/tribunal/scripts/lib/tribunal-ulid.sh +50 -0
- package/plugins/tribunal/scripts/lib/tribunal-verdict.sh +127 -0
- package/plugins/tribunal/skills/tribunal/SKILL.md +129 -0
- package/release-please-config.json +59 -5
- package/scripts/coverage/bash-coverage.mjs +169 -0
- package/scripts/coverage/format-comment.mjs +120 -0
- package/scripts/coverage/run-coverage.mjs +151 -0
- package/scripts/hooks/agent-spawn-tracker.sh +4 -4
- package/scripts/hooks/prompt-rule-injector.sh +122 -0
- package/scripts/lib/portable-lock.sh +48 -0
- package/scripts/lib/prompt-rules.sh +207 -0
- package/scripts/lib/tool-history.sh +7 -8
- package/scripts/lib/validate-path.sh +4 -0
- package/scripts/lint/check-manifests.mjs +314 -0
- package/scripts/lint/check-references.mjs +311 -0
- package/skills/list-prompt-rules/SKILL.md +15 -0
- package/test/bats/archivist-config-files.bats +60 -0
- package/test/bats/archivist-config.bats +54 -0
- package/test/bats/archivist-inject.bats +73 -0
- package/test/bats/archivist-project-key.bats +75 -0
- package/test/bats/archivist-storage.bats +119 -0
- package/test/bats/archivist-ulid.bats +36 -0
- package/test/bats/cartographer-config.bats +107 -0
- package/test/bats/cartographer-lock.bats +77 -0
- package/test/bats/cartographer-ulid.bats +56 -0
- package/test/bats/config.bats +10 -10
- package/test/bats/echo-config.bats +90 -0
- package/test/bats/echo-events.bats +121 -0
- package/test/bats/echo-project-key.bats +115 -0
- package/test/bats/echo-stop-hook.bats +101 -0
- package/test/bats/echo-ulid.bats +38 -0
- package/test/bats/portable-lock.bats +62 -0
- package/test/bats/prompt-rules.bats +269 -0
- package/test/bats/tribunal-aggregate.bats +77 -0
- package/test/bats/tribunal-config.bats +86 -0
- package/test/bats/tribunal-events.bats +209 -0
- package/test/bats/tribunal-gate.bats +95 -0
- package/test/bats/tribunal-jury.bats +80 -0
- package/test/bats/tribunal-rubric.bats +119 -0
- package/test/bats/tribunal-stop-hook.bats +73 -0
- package/test/bats/tribunal-verdict.bats +71 -0
- package/test/fixtures/hook-inputs/user-prompt-submit-rule-match.json +8 -0
- package/test/fixtures/hook-inputs/user-prompt-submit-rule-nomatch.json +8 -0
- package/test/helpers/setup.bash +9 -0
- package/test/node/check-manifests.test.mjs +173 -0
- package/test/node/check-references.test.mjs +279 -0
- package/test/node/coverage.test.mjs +143 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: tribunal
|
|
3
|
+
description: Run a task under multi-agent quality gates. Spawns the tribunal-actor subagent, a jury of typed Judges, and a Meta-Judge; aggregates verdicts under a configurable gate policy; retries the Actor with critique on rejection until acceptance or max_iterations. Use when the user explicitly wraps a task with /tribunal, or wants stronger correctness/safety review than a single pass. Emits the full tribunal.* canonical event stream.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Tribunal: Multi-Agent Execution with Quality Gates
|
|
7
|
+
|
|
8
|
+
You are orchestrating a **Tribunal** evaluation loop. A user task gets wrapped in: **Actor → Jury → Meta-Judge → Gate**, retrying the Actor with feedback until the gate passes or `max_iterations` is reached.
|
|
9
|
+
|
|
10
|
+
## Setup
|
|
11
|
+
|
|
12
|
+
Before the loop, source the plugin's bash helpers and load config. Run this once at the start:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
set -uo pipefail
|
|
16
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-config.sh"
|
|
17
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-rubric.sh"
|
|
18
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-jury.sh"
|
|
19
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-aggregate.sh"
|
|
20
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-gate.sh"
|
|
21
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-events.sh"
|
|
22
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-verdict.sh"
|
|
23
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-project-key.sh"
|
|
24
|
+
source "$CLAUDE_PLUGIN_ROOT/scripts/lib/tribunal-ulid.sh"
|
|
25
|
+
|
|
26
|
+
tribunal_config_load "$(pwd)"
|
|
27
|
+
tribunal_rubric_load "$(pwd)"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Parse the task description from the user's prompt arguments. If the user passed `--rubric=<id>`, use that; otherwise use `tribunal_rubric_default_id`.
|
|
31
|
+
|
|
32
|
+
Resolve the active rubric with `tribunal_rubric_get "$rubric_id"`. Validate it with `tribunal_rubric_validate "$rubric"`. If validation fails, abort with `tribunal.session.complete` outcome `aborted` and tell the user why.
|
|
33
|
+
|
|
34
|
+
## Per-task initialization
|
|
35
|
+
|
|
36
|
+
Generate identifiers and persist task-level state:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
task_id=$(tribunal_ulid)
|
|
40
|
+
project_key=$(tribunal_project_key "$(pwd)")
|
|
41
|
+
remote=$(tribunal_project_remote_url "$(pwd)")
|
|
42
|
+
repo_root=$(tribunal_project_repo_root "$(pwd)")
|
|
43
|
+
tribunal_write_project_manifest "$project_key" "$remote" "$repo_root"
|
|
44
|
+
tribunal_write_task_manifest "$project_key" "$task_id" "$task_summary" "$rubric_id" "$rubric"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Emit `tribunal.session.start` with the resolved config (`judge_types`, `gate_policy`, `score_threshold`, `max_iterations`, model IDs).
|
|
48
|
+
|
|
49
|
+
## The loop
|
|
50
|
+
|
|
51
|
+
For `iteration_number` from `0` while `iteration_number < max_iterations`:
|
|
52
|
+
|
|
53
|
+
1. **Iteration start.** Generate `iteration_id=$(tribunal_ulid)`. `trigger` is `"initial"` for n=0, `"gate_blocked"` for retries. Emit `tribunal.iteration.start`.
|
|
54
|
+
|
|
55
|
+
2. **Actor.** Emit `tribunal.actor.start`. Use the Task tool to spawn `tribunal-actor` with:
|
|
56
|
+
- The task description.
|
|
57
|
+
- The rubric criteria (just `name` + `weight` + `min_pass`).
|
|
58
|
+
- On retries: a digest of the prior iteration's consensus, dissent (if any), and Meta-Judge override.
|
|
59
|
+
|
|
60
|
+
Capture the Actor's final output. Persist it: `tribunal_write_actor_output "$project_key" "$task_id" "$iteration_id" "$actor_output"`. Emit `tribunal.actor.complete` with `success: true` and the inferred `artifact_kind` (`file` / `patch` / `message` / `command`).
|
|
61
|
+
|
|
62
|
+
3. **Empanel the jury.** Resolve the panel from configured types:
|
|
63
|
+
```bash
|
|
64
|
+
types=$(tribunal_config_get_json '.tribunal.session.judge_types')
|
|
65
|
+
# Rubric may override:
|
|
66
|
+
rubric_types=$(printf '%s' "$rubric" | jq -c '.judge_types // empty')
|
|
67
|
+
[[ -n "$rubric_types" && "$rubric_types" != "null" ]] && types="$rubric_types"
|
|
68
|
+
jury=$(tribunal_jury_empanel "$types")
|
|
69
|
+
```
|
|
70
|
+
Persist the jury (`tribunal_write_iteration_artifact ... jury ...`) and emit `tribunal.jury.empaneled` with the schema-shaped `judges[]` (`tribunal_jury_to_schema_judges "$jury"`).
|
|
71
|
+
|
|
72
|
+
4. **Run each Judge.** For each entry in the jury panel:
|
|
73
|
+
- Emit `tribunal.judge.start` with `judge_id`, `judge_type`, `judge_model_id`.
|
|
74
|
+
- Spawn the judge subagent (`.subagent` field) with the Actor output + rubric.
|
|
75
|
+
- Parse the JSON object the judge returns. Augment it with `task_id`, `iteration_id`, `judge_id`, `judge_model_id` from the panel entry, and `judge_type` from the panel entry (canonical, overriding what the agent self-reported).
|
|
76
|
+
- Emit `tribunal.verdict` with that payload.
|
|
77
|
+
- Persist with `tribunal_write_judge_verdict`.
|
|
78
|
+
|
|
79
|
+
Collect the verdicts into a JSON array `verdicts`.
|
|
80
|
+
|
|
81
|
+
5. **Aggregate + dissent.**
|
|
82
|
+
```bash
|
|
83
|
+
method=$(printf '%s' "$rubric" | jq -r '.aggregation_method // "weighted_mean"')
|
|
84
|
+
threshold=$(printf '%s' "$rubric" | jq -r '.score_threshold // 0.75')
|
|
85
|
+
dissent_threshold=$(tribunal_config_get '.tribunal.session.dissent_threshold')
|
|
86
|
+
[[ -z "$dissent_threshold" ]] && dissent_threshold="0.25"
|
|
87
|
+
|
|
88
|
+
aggregated=$(tribunal_aggregate "$method" "$verdicts" "$rubric")
|
|
89
|
+
dissent=$(tribunal_disagreement "$verdicts")
|
|
90
|
+
```
|
|
91
|
+
Build and emit `tribunal.consensus.reached`. If `dissent > dissent_threshold`, emit `tribunal.dissent.recorded` (set `resolution` to `null` for now — the Meta-Judge may set it on the next step via `override_recommendation`).
|
|
92
|
+
|
|
93
|
+
6. **Meta-Judge.** Emit `tribunal.meta.start`. Spawn `tribunal-meta-judge` with the verdicts and the Actor output. Parse its JSON; augment with `task_id`, `iteration_id`, `meta_model_id`. Emit `tribunal.meta.complete`. Persist.
|
|
94
|
+
|
|
95
|
+
7. **Gate.**
|
|
96
|
+
```bash
|
|
97
|
+
policy=$(printf '%s' "$rubric" | jq -r '.gate_policy // "majority"')
|
|
98
|
+
gate=$(tribunal_gate_decide "$policy" "$verdicts" "$aggregated" "$threshold" "$meta" "$dissent" "$dissent_threshold")
|
|
99
|
+
```
|
|
100
|
+
If `gate.passed == true`, emit `tribunal.gate.passed` with `final_score: aggregated` and break the loop with outcome `accepted`. Otherwise emit `tribunal.gate.blocked` with the `reason`, `will_retry: (iteration_number + 1 < max_iterations)`, and `retry_iteration_number` if retrying. Persist `gate.json` either way.
|
|
101
|
+
|
|
102
|
+
If blocking and retrying, build the retry digest (lowest-scoring criteria + meta override + dissent summary) and feed it into the next iteration's Actor prompt.
|
|
103
|
+
|
|
104
|
+
## Termination
|
|
105
|
+
|
|
106
|
+
When the loop exits:
|
|
107
|
+
|
|
108
|
+
- `accepted` — gate passed.
|
|
109
|
+
- `exhausted_iterations` — loop ran `max_iterations` without acceptance.
|
|
110
|
+
- `aborted` — orchestrator caught an unrecoverable error (rubric validation failed, Actor subagent crashed twice, etc.). Set this explicitly when you catch errors; do not silently swallow.
|
|
111
|
+
|
|
112
|
+
Emit `tribunal.session.complete` with `outcome`, `final_score`, `iterations_used`, `total_duration_ms`. Skip `total_cost_usd` in v0.1 — the runtime does not surface subagent costs to the orchestrator yet.
|
|
113
|
+
|
|
114
|
+
## Summary to the user
|
|
115
|
+
|
|
116
|
+
After emitting `session.complete`, render a compact markdown summary to the user:
|
|
117
|
+
|
|
118
|
+
- Verdict (✓ accepted / ✗ rejected / ⏱ exhausted / ⚠ aborted) with final score.
|
|
119
|
+
- Per-iteration table: iteration | per-judge scores | dissent | gate result.
|
|
120
|
+
- Meta-Judge bias notes if any.
|
|
121
|
+
- Path to the persisted artifacts (`~/.onlooker/tribunal/<key>/<task_id>/`).
|
|
122
|
+
|
|
123
|
+
Keep the summary terse. The artifacts on disk are the long form.
|
|
124
|
+
|
|
125
|
+
## Error handling
|
|
126
|
+
|
|
127
|
+
- If a judge subagent fails to return parseable JSON, treat that judge as `score: 0, passed: false, confidence: 0` and surface the parse error in `feedback_summary`. Do not abort the iteration — let the gate decide.
|
|
128
|
+
- If the Meta-Judge fails, default to `verdict_quality: "questionable", bias_detected: false` so the gate falls back to score-based logic.
|
|
129
|
+
- If event emission fails (schema validation), keep going and write a warning to stderr. The persisted artifacts on disk are still trustworthy.
|
|
@@ -10,18 +10,72 @@
|
|
|
10
10
|
"extra-files": [
|
|
11
11
|
{
|
|
12
12
|
"type": "json",
|
|
13
|
-
"path": "
|
|
13
|
+
"path": ".claude-plugin/plugin.json",
|
|
14
|
+
"jsonpath": "$.version"
|
|
15
|
+
}
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
"plugins/archivist": {
|
|
19
|
+
"changelog-path": "CHANGELOG.md",
|
|
20
|
+
"release-type": "simple",
|
|
21
|
+
"bump-minor-pre-major": true,
|
|
22
|
+
"bump-patch-for-minor-pre-major": false,
|
|
23
|
+
"component": "archivist",
|
|
24
|
+
"draft": false,
|
|
25
|
+
"prerelease": false,
|
|
26
|
+
"extra-files": [
|
|
27
|
+
{
|
|
28
|
+
"type": "json",
|
|
29
|
+
"path": ".claude-plugin/plugin.json",
|
|
30
|
+
"jsonpath": "$.version"
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
},
|
|
34
|
+
"plugins/tribunal": {
|
|
35
|
+
"changelog-path": "CHANGELOG.md",
|
|
36
|
+
"release-type": "simple",
|
|
37
|
+
"bump-minor-pre-major": true,
|
|
38
|
+
"bump-patch-for-minor-pre-major": false,
|
|
39
|
+
"component": "tribunal",
|
|
40
|
+
"draft": false,
|
|
41
|
+
"prerelease": false,
|
|
42
|
+
"extra-files": [
|
|
43
|
+
{
|
|
44
|
+
"type": "json",
|
|
45
|
+
"path": ".claude-plugin/plugin.json",
|
|
14
46
|
"jsonpath": "$.version"
|
|
15
|
-
}
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
},
|
|
50
|
+
"plugins/echo": {
|
|
51
|
+
"changelog-path": "CHANGELOG.md",
|
|
52
|
+
"release-type": "simple",
|
|
53
|
+
"bump-minor-pre-major": true,
|
|
54
|
+
"bump-patch-for-minor-pre-major": false,
|
|
55
|
+
"component": "echo",
|
|
56
|
+
"draft": false,
|
|
57
|
+
"prerelease": false,
|
|
58
|
+
"extra-files": [
|
|
16
59
|
{
|
|
17
60
|
"type": "json",
|
|
18
61
|
"path": ".claude-plugin/plugin.json",
|
|
19
62
|
"jsonpath": "$.version"
|
|
20
|
-
}
|
|
63
|
+
}
|
|
64
|
+
]
|
|
65
|
+
},
|
|
66
|
+
"plugins/cartographer": {
|
|
67
|
+
"changelog-path": "CHANGELOG.md",
|
|
68
|
+
"release-type": "simple",
|
|
69
|
+
"bump-minor-pre-major": true,
|
|
70
|
+
"bump-patch-for-bump-minor-pre-major": false,
|
|
71
|
+
"component": "cartographer",
|
|
72
|
+
"draft": false,
|
|
73
|
+
"prerelease": false,
|
|
74
|
+
"extra-files": [
|
|
21
75
|
{
|
|
22
76
|
"type": "json",
|
|
23
|
-
"path": ".claude-plugin/
|
|
24
|
-
"jsonpath": "$.
|
|
77
|
+
"path": ".claude-plugin/plugin.json",
|
|
78
|
+
"jsonpath": "$.version"
|
|
25
79
|
}
|
|
26
80
|
]
|
|
27
81
|
}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Bash "tested-functions ratio" heuristic.
|
|
3
|
+
//
|
|
4
|
+
// True bash line coverage (bashcov / kcov) is heavy and flaky in CI, so
|
|
5
|
+
// instead we ask the cheaper question: "for every public function defined
|
|
6
|
+
// in scripts/lib/, does at least one bats test reference it by name?". The
|
|
7
|
+
// result is a per-file ratio plus a flat list of untested public functions.
|
|
8
|
+
//
|
|
9
|
+
// What counts as a "public" function:
|
|
10
|
+
// * defined with either `name() { ... }` or `function name { ... }`
|
|
11
|
+
// * name does NOT start with an underscore (those are private helpers and
|
|
12
|
+
// should be tested indirectly through their callers).
|
|
13
|
+
//
|
|
14
|
+
// What counts as a "reference" in tests:
|
|
15
|
+
// * the function name appears as a standalone word in any *.bats file
|
|
16
|
+
// (typical patterns: `run my_func ...`, `my_func "$arg"`, or sourced
|
|
17
|
+
// and called directly). False positives are possible — that's the cost
|
|
18
|
+
// of a heuristic — but the score is still useful as a regression gate
|
|
19
|
+
// and is calibrated against the noise floor.
|
|
20
|
+
//
|
|
21
|
+
// Flags:
|
|
22
|
+
// --json emit structured JSON on stdout (default: human-readable)
|
|
23
|
+
// --root <p> override repo root
|
|
24
|
+
//
|
|
25
|
+
// Exit codes: always 0; this is an informational tool. Use --json to feed
|
|
26
|
+
// into format-comment.mjs.
|
|
27
|
+
|
|
28
|
+
import { readdirSync, readFileSync, statSync } from 'node:fs';
|
|
29
|
+
import { dirname, join, relative, resolve } from 'node:path';
|
|
30
|
+
import { fileURLToPath } from 'node:url';
|
|
31
|
+
|
|
32
|
+
function findRepoRoot(start) {
|
|
33
|
+
let cur = resolve(start);
|
|
34
|
+
while (cur !== '/') {
|
|
35
|
+
try {
|
|
36
|
+
statSync(join(cur, '.claude-plugin', 'marketplace.json'));
|
|
37
|
+
return cur;
|
|
38
|
+
} catch {}
|
|
39
|
+
cur = dirname(cur);
|
|
40
|
+
}
|
|
41
|
+
throw new Error(`no repo root above ${start}`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function parseArgs(argv) {
|
|
45
|
+
const out = { json: false, root: null };
|
|
46
|
+
for (let i = 2; i < argv.length; i++) {
|
|
47
|
+
if (argv[i] === '--json') out.json = true;
|
|
48
|
+
else if (argv[i] === '--root') out.root = argv[++i];
|
|
49
|
+
}
|
|
50
|
+
return out;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function walk(dir, predicate) {
|
|
54
|
+
const out = [];
|
|
55
|
+
const stack = [dir];
|
|
56
|
+
while (stack.length) {
|
|
57
|
+
const cur = stack.pop();
|
|
58
|
+
let items;
|
|
59
|
+
try {
|
|
60
|
+
items = readdirSync(cur, { withFileTypes: true });
|
|
61
|
+
} catch {
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
for (const item of items) {
|
|
65
|
+
const p = join(cur, item.name);
|
|
66
|
+
if (item.isDirectory()) {
|
|
67
|
+
if (item.name === 'node_modules' || item.name === '.git') continue;
|
|
68
|
+
stack.push(p);
|
|
69
|
+
} else if (item.isFile() && predicate(p)) {
|
|
70
|
+
out.push(p);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return out.sort();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Extract `name` from lines like `name() {`, `name () {`, or `function name`.
|
|
78
|
+
// Skips lines indented (those are nested fns / non-top-level callbacks we
|
|
79
|
+
// don't want to attribute to the file's public surface).
|
|
80
|
+
function extractFunctions(content) {
|
|
81
|
+
const out = [];
|
|
82
|
+
const lines = content.split(/\r?\n/);
|
|
83
|
+
const def = /^(?:function\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\(\s*\)\s*\{?/;
|
|
84
|
+
for (const line of lines) {
|
|
85
|
+
// Strict: must start at column 0 (no leading whitespace).
|
|
86
|
+
if (line.length === 0 || line[0] === ' ' || line[0] === '\t') continue;
|
|
87
|
+
const m = line.match(def);
|
|
88
|
+
if (!m) continue;
|
|
89
|
+
const name = m[1];
|
|
90
|
+
// Skip private helpers and bash keywords that look like fn names.
|
|
91
|
+
if (name.startsWith('_')) continue;
|
|
92
|
+
if (['if', 'while', 'for', 'case', 'then', 'do', 'else', 'fi', 'done'].includes(name)) continue;
|
|
93
|
+
out.push(name);
|
|
94
|
+
}
|
|
95
|
+
return [...new Set(out)];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function isReferenced(name, testsContent) {
|
|
99
|
+
// Look for the name as a standalone word (preceded/followed by non-word
|
|
100
|
+
// characters). This catches `run name`, `name "$x"`, `$( name )`, etc.
|
|
101
|
+
const rx = new RegExp(`(^|[^A-Za-z0-9_])${name}([^A-Za-z0-9_]|$)`);
|
|
102
|
+
return rx.test(testsContent);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function main() {
|
|
106
|
+
const args = parseArgs(process.argv);
|
|
107
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
108
|
+
const root = args.root ? resolve(args.root) : findRepoRoot(here);
|
|
109
|
+
|
|
110
|
+
const libDirs = [join(root, 'scripts', 'lib'), join(root, 'plugins', 'archivist', 'scripts', 'lib')];
|
|
111
|
+
const libFiles = [];
|
|
112
|
+
for (const d of libDirs) {
|
|
113
|
+
try {
|
|
114
|
+
libFiles.push(...walk(d, (p) => p.endsWith('.sh')));
|
|
115
|
+
} catch {}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const testsDir = join(root, 'test', 'bats');
|
|
119
|
+
const testFiles = walk(testsDir, (p) => p.endsWith('.bats'));
|
|
120
|
+
const testsContent = testFiles.map((f) => readFileSync(f, 'utf8')).join('\n');
|
|
121
|
+
|
|
122
|
+
const perFile = [];
|
|
123
|
+
let totalFns = 0;
|
|
124
|
+
let totalTested = 0;
|
|
125
|
+
const untested = [];
|
|
126
|
+
|
|
127
|
+
for (const file of libFiles) {
|
|
128
|
+
const fns = extractFunctions(readFileSync(file, 'utf8'));
|
|
129
|
+
const tested = fns.filter((name) => isReferenced(name, testsContent));
|
|
130
|
+
const fileTotal = fns.length;
|
|
131
|
+
const fileTested = tested.length;
|
|
132
|
+
totalFns += fileTotal;
|
|
133
|
+
totalTested += fileTested;
|
|
134
|
+
const relpath = relative(root, file);
|
|
135
|
+
perFile.push({
|
|
136
|
+
file: relpath,
|
|
137
|
+
total: fileTotal,
|
|
138
|
+
tested: fileTested,
|
|
139
|
+
ratio: fileTotal === 0 ? 1 : fileTested / fileTotal,
|
|
140
|
+
untested: fns.filter((n) => !tested.includes(n)),
|
|
141
|
+
});
|
|
142
|
+
for (const u of fns.filter((n) => !tested.includes(n))) {
|
|
143
|
+
untested.push({ file: relpath, name: u });
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const overallRatio = totalFns === 0 ? 1 : totalTested / totalFns;
|
|
148
|
+
const report = {
|
|
149
|
+
overall: { total: totalFns, tested: totalTested, ratio: overallRatio },
|
|
150
|
+
files: perFile,
|
|
151
|
+
untested,
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
if (args.json) {
|
|
155
|
+
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
process.stdout.write(`bash function coverage: ${totalTested}/${totalFns} (${(overallRatio * 100).toFixed(1)}%)\n\n`);
|
|
160
|
+
for (const f of perFile) {
|
|
161
|
+
const pct = (f.ratio * 100).toFixed(0).padStart(3);
|
|
162
|
+
process.stdout.write(` ${pct}% ${f.tested}/${f.total} ${f.file}\n`);
|
|
163
|
+
for (const u of f.untested) {
|
|
164
|
+
process.stdout.write(` - ${u}\n`);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
main();
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Combine node coverage + bash function coverage into a single markdown
|
|
3
|
+
// comment suitable for posting on a pull request via `gh pr comment`.
|
|
4
|
+
//
|
|
5
|
+
// Reads each report from a file path (so the caller can capture stdout
|
|
6
|
+
// once and pass the file through). Emits markdown on stdout.
|
|
7
|
+
//
|
|
8
|
+
// Usage:
|
|
9
|
+
// format-comment.mjs --node coverage-node.json --bash coverage-bash.json
|
|
10
|
+
//
|
|
11
|
+
// Each file should be JSON produced by the matching script's --json mode.
|
|
12
|
+
|
|
13
|
+
import { readFileSync } from 'node:fs';
|
|
14
|
+
|
|
15
|
+
function parseArgs(argv) {
|
|
16
|
+
const out = { node: null, bash: null, sha: process.env.GITHUB_SHA ?? null };
|
|
17
|
+
for (let i = 2; i < argv.length; i++) {
|
|
18
|
+
if (argv[i] === '--node') out.node = argv[++i];
|
|
19
|
+
else if (argv[i] === '--bash') out.bash = argv[++i];
|
|
20
|
+
else if (argv[i] === '--sha') out.sha = argv[++i];
|
|
21
|
+
}
|
|
22
|
+
return out;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function pct(n) {
|
|
26
|
+
if (typeof n !== 'number' || Number.isNaN(n)) return '—';
|
|
27
|
+
return `${n.toFixed(1)}%`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function badge(value, kind) {
|
|
31
|
+
if (typeof value !== 'number') return '⚪';
|
|
32
|
+
if (kind === 'bash') {
|
|
33
|
+
if (value >= 70) return '🟢';
|
|
34
|
+
if (value >= 50) return '🟡';
|
|
35
|
+
return '🔴';
|
|
36
|
+
}
|
|
37
|
+
if (value >= 80) return '🟢';
|
|
38
|
+
if (value >= 60) return '🟡';
|
|
39
|
+
return '🔴';
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function nodeSection(report) {
|
|
43
|
+
if (!report?.overall) {
|
|
44
|
+
return '_No node coverage report._';
|
|
45
|
+
}
|
|
46
|
+
const o = report.overall;
|
|
47
|
+
const lines = [
|
|
48
|
+
`**Overall:** ${badge(o.line, 'node')} ${pct(o.line)} lines · ${pct(o.branch)} branches · ${pct(o.funcs)} functions`,
|
|
49
|
+
'',
|
|
50
|
+
'| file | line | branch | funcs |',
|
|
51
|
+
'| --- | ---: | ---: | ---: |',
|
|
52
|
+
];
|
|
53
|
+
for (const f of report.files) {
|
|
54
|
+
lines.push(`| \`${f.file}\` | ${pct(f.line)} | ${pct(f.branch)} | ${pct(f.funcs)} |`);
|
|
55
|
+
}
|
|
56
|
+
return lines.join('\n');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function bashSection(report) {
|
|
60
|
+
if (!report?.overall) {
|
|
61
|
+
return '_No bash function coverage report._';
|
|
62
|
+
}
|
|
63
|
+
const o = report.overall;
|
|
64
|
+
const overallPct = o.ratio * 100;
|
|
65
|
+
const lines = [
|
|
66
|
+
`**Overall:** ${badge(overallPct, 'bash')} ${o.tested}/${o.total} public functions exercised by bats (${pct(overallPct)})`,
|
|
67
|
+
'',
|
|
68
|
+
'| file | tested / total | ratio |',
|
|
69
|
+
'| --- | ---: | ---: |',
|
|
70
|
+
];
|
|
71
|
+
for (const f of report.files) {
|
|
72
|
+
if (f.total === 0) continue;
|
|
73
|
+
lines.push(`| \`${f.file}\` | ${f.tested} / ${f.total} | ${pct(f.ratio * 100)} |`);
|
|
74
|
+
}
|
|
75
|
+
if (report.untested.length > 0) {
|
|
76
|
+
lines.push('');
|
|
77
|
+
lines.push('<details><summary>Untested public functions</summary>');
|
|
78
|
+
lines.push('');
|
|
79
|
+
for (const u of report.untested) {
|
|
80
|
+
lines.push(`- \`${u.file}\` — \`${u.name}\``);
|
|
81
|
+
}
|
|
82
|
+
lines.push('');
|
|
83
|
+
lines.push('</details>');
|
|
84
|
+
}
|
|
85
|
+
return lines.join('\n');
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function main() {
|
|
89
|
+
const args = parseArgs(process.argv);
|
|
90
|
+
let nodeReport = null;
|
|
91
|
+
let bashReport = null;
|
|
92
|
+
if (args.node) nodeReport = JSON.parse(readFileSync(args.node, 'utf8'));
|
|
93
|
+
if (args.bash) bashReport = JSON.parse(readFileSync(args.bash, 'utf8'));
|
|
94
|
+
|
|
95
|
+
const out = [];
|
|
96
|
+
out.push('<!-- onlooker-coverage-comment -->');
|
|
97
|
+
out.push('## Coverage');
|
|
98
|
+
out.push('');
|
|
99
|
+
if (args.sha) {
|
|
100
|
+
out.push(`Commit: \`${args.sha.slice(0, 12)}\``);
|
|
101
|
+
out.push('');
|
|
102
|
+
}
|
|
103
|
+
out.push('### Node (.mjs)');
|
|
104
|
+
out.push('');
|
|
105
|
+
out.push(nodeSection(nodeReport));
|
|
106
|
+
out.push('');
|
|
107
|
+
out.push('### Bash (function-reference heuristic)');
|
|
108
|
+
out.push('');
|
|
109
|
+
out.push(bashSection(bashReport));
|
|
110
|
+
out.push('');
|
|
111
|
+
out.push('---');
|
|
112
|
+
out.push('');
|
|
113
|
+
out.push(
|
|
114
|
+
'Bash numbers are a heuristic — they count public functions referenced by bats tests, not real line coverage. A red score points to public helpers nobody directly exercises.',
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
process.stdout.write(`${out.join('\n')}\n`);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
main();
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Run the .mjs test suite with node's built-in --experimental-test-coverage,
|
|
3
|
+
// parse the emitted table into structured JSON, and either pretty-print it
|
|
4
|
+
// or hand it off as JSON for downstream tools (format-comment.mjs).
|
|
5
|
+
//
|
|
6
|
+
// The output table is fixed-format text; we parse it line-by-line rather
|
|
7
|
+
// than depending on V8 coverage dumps so we don't need to handle binary
|
|
8
|
+
// formats across node versions.
|
|
9
|
+
//
|
|
10
|
+
// Flags:
|
|
11
|
+
// --json emit structured JSON
|
|
12
|
+
// --root override repo root
|
|
13
|
+
|
|
14
|
+
import { spawnSync } from 'node:child_process';
|
|
15
|
+
import { statSync } from 'node:fs';
|
|
16
|
+
import { dirname, join, resolve } from 'node:path';
|
|
17
|
+
import { fileURLToPath } from 'node:url';
|
|
18
|
+
|
|
19
|
+
function findRepoRoot(start) {
|
|
20
|
+
let cur = resolve(start);
|
|
21
|
+
while (cur !== '/') {
|
|
22
|
+
try {
|
|
23
|
+
statSync(join(cur, '.claude-plugin', 'marketplace.json'));
|
|
24
|
+
return cur;
|
|
25
|
+
} catch {}
|
|
26
|
+
cur = dirname(cur);
|
|
27
|
+
}
|
|
28
|
+
throw new Error(`no repo root above ${start}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function parseArgs(argv) {
|
|
32
|
+
const out = { json: false, root: null };
|
|
33
|
+
for (let i = 2; i < argv.length; i++) {
|
|
34
|
+
if (argv[i] === '--json') out.json = true;
|
|
35
|
+
else if (argv[i] === '--root') out.root = argv[++i];
|
|
36
|
+
}
|
|
37
|
+
return out;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Parse the human-readable coverage report node prints after a --test run.
|
|
41
|
+
// Layout:
|
|
42
|
+
// ℹ start of coverage report
|
|
43
|
+
// ℹ ---------- (separator)
|
|
44
|
+
// ℹ file | line % | branch % | funcs % | uncovered lines
|
|
45
|
+
// ℹ ---------- (separator)
|
|
46
|
+
// ℹ <directory>
|
|
47
|
+
// ℹ <subdir>
|
|
48
|
+
// ℹ <file.mjs> | 74.20 | 58.27 | 85.00 | 130-131 170 ...
|
|
49
|
+
// ℹ ---------- (separator)
|
|
50
|
+
// ℹ all files | 78.55 | 57.38 | 87.18 |
|
|
51
|
+
// ℹ ---------- (separator)
|
|
52
|
+
// ℹ end of coverage report
|
|
53
|
+
function parseCoverageOutput(text) {
|
|
54
|
+
const lines = text.split(/\r?\n/);
|
|
55
|
+
const files = [];
|
|
56
|
+
let overall = null;
|
|
57
|
+
let inReport = false;
|
|
58
|
+
|
|
59
|
+
for (const rawLine of lines) {
|
|
60
|
+
const line = rawLine.replace(/^ℹ\s?/, '').replace(/^[\sℹ]+/, '');
|
|
61
|
+
if (line.startsWith('start of coverage report')) {
|
|
62
|
+
inReport = true;
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
if (line.startsWith('end of coverage report')) {
|
|
66
|
+
inReport = false;
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
if (!inReport) continue;
|
|
70
|
+
|
|
71
|
+
// Skip separators and the header row.
|
|
72
|
+
if (line.startsWith('-')) continue;
|
|
73
|
+
if (line.includes('line %') && line.includes('branch %')) continue;
|
|
74
|
+
|
|
75
|
+
const cells = line.split('|').map((c) => c.trim());
|
|
76
|
+
// A real data row has 5 columns: file, line%, branch%, funcs%, uncovered.
|
|
77
|
+
if (cells.length < 5) continue;
|
|
78
|
+
|
|
79
|
+
const [file, linePct, branchPct, funcsPct, uncovered] = cells;
|
|
80
|
+
if (!file) continue;
|
|
81
|
+
// Directory rows have all-blank metric columns — skip them so we only
|
|
82
|
+
// surface per-file numbers + the all-files total.
|
|
83
|
+
if (!linePct || !branchPct || !funcsPct) continue;
|
|
84
|
+
|
|
85
|
+
const num = (s) => Number.parseFloat(s);
|
|
86
|
+
const entry = {
|
|
87
|
+
file,
|
|
88
|
+
line: num(linePct),
|
|
89
|
+
branch: num(branchPct),
|
|
90
|
+
funcs: num(funcsPct),
|
|
91
|
+
uncoveredLines: uncovered || '',
|
|
92
|
+
};
|
|
93
|
+
if (file === 'all files') {
|
|
94
|
+
overall = entry;
|
|
95
|
+
} else {
|
|
96
|
+
files.push(entry);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return { files, overall };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function main() {
|
|
104
|
+
const args = parseArgs(process.argv);
|
|
105
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
106
|
+
const root = args.root ? resolve(args.root) : findRepoRoot(here);
|
|
107
|
+
|
|
108
|
+
const testGlob = ['test/node'].map((d) => join(root, d, '*.test.mjs'));
|
|
109
|
+
const r = spawnSync(
|
|
110
|
+
'node',
|
|
111
|
+
[
|
|
112
|
+
'--experimental-test-coverage',
|
|
113
|
+
'--test-coverage-include=scripts/**/*.mjs',
|
|
114
|
+
'--test-coverage-exclude=test/**',
|
|
115
|
+
'--test',
|
|
116
|
+
...testGlob,
|
|
117
|
+
],
|
|
118
|
+
{ encoding: 'utf8', cwd: root },
|
|
119
|
+
);
|
|
120
|
+
|
|
121
|
+
if (r.status !== 0) {
|
|
122
|
+
process.stderr.write(`tests failed (exit ${r.status})\n`);
|
|
123
|
+
process.stderr.write(r.stdout);
|
|
124
|
+
process.stderr.write(r.stderr);
|
|
125
|
+
process.exit(r.status ?? 1);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const report = parseCoverageOutput(r.stdout);
|
|
129
|
+
|
|
130
|
+
if (args.json) {
|
|
131
|
+
process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (!report.overall) {
|
|
136
|
+
process.stderr.write('could not parse coverage output\n');
|
|
137
|
+
process.stderr.write(r.stdout);
|
|
138
|
+
process.exit(1);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
process.stdout.write(
|
|
142
|
+
`node coverage: line ${report.overall.line.toFixed(1)}% branch ${report.overall.branch.toFixed(1)}% funcs ${report.overall.funcs.toFixed(1)}%\n\n`,
|
|
143
|
+
);
|
|
144
|
+
for (const f of report.files) {
|
|
145
|
+
process.stdout.write(
|
|
146
|
+
` line ${f.line.toFixed(0).padStart(3)}% branch ${f.branch.toFixed(0).padStart(3)}% ${f.file}\n`,
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
main();
|
|
@@ -64,9 +64,9 @@ BACKGROUND=$(jq -r '.tool_input.background // false' <<<"$INPUT")
|
|
|
64
64
|
STATE_FILE="$ONLOOKER_DIR/agent-spawn-trackers.json"
|
|
65
65
|
LOCKFILE="$STATE_FILE.lock"
|
|
66
66
|
|
|
67
|
-
#
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
# Acquire exclusive access via the portable lock helper (mkdir-based mutex,
|
|
68
|
+
# works on macOS without util-linux).
|
|
69
|
+
lock_acquire "$LOCKFILE" 5 || {
|
|
70
70
|
json_response "deny" "Failed to acquire lock"
|
|
71
71
|
hook_failure
|
|
72
72
|
exit 0
|
|
@@ -103,7 +103,7 @@ STATE=$(jq --arg sid "$SESSION_ID" '
|
|
|
103
103
|
echo "$STATE" > "$STATE_FILE" 2>/dev/null || true
|
|
104
104
|
|
|
105
105
|
# Release lock
|
|
106
|
-
|
|
106
|
+
lock_release "$LOCKFILE"
|
|
107
107
|
|
|
108
108
|
# Get current session stats
|
|
109
109
|
SPAWN_COUNT=$(jq -r --arg sid "$SESSION_ID" '.sessions[$sid].spawns' <<<"$STATE")
|